first usable version

This commit is contained in:
Sebastian Hugentobler 2019-05-28 11:28:20 +02:00
parent d525e7c013
commit 2a88e5c900
6 changed files with 48 additions and 46 deletions

10
.gitignore vendored
View File

@ -3,11 +3,5 @@
*.swp *.swp
**/.idea/workspace.xml **/.idea/workspace.xml
**/.idea/tasks.xml **/.idea/tasks.xml
bin *.pdf
*.exe bin/
*.dll
*.so
*.dylib
*.test
*.out
.glide/

View File

@ -6,11 +6,11 @@ clean:
rm -r bin/ rm -r bin/
go clean ./cmd/ecload/ ./pkg/ecload/ go clean ./cmd/ecload/ ./pkg/ecload/
bin/ecload: bin/ecload: cmd/ecload/*.go pkg/ecload/*.go
GOOS=linux GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go GOOS=linux GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
bin/ecload.exe: bin/ecload.exe: cmd/ecload/*.go pkg/ecload/*.go
GOOS=windows GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go GOOS=windows GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
bin/ecload-mac: bin/ecload-mac: cmd/ecload/*.go pkg/ecload/*.go
GOOS=darwin GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go GOOS=darwin GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go

View File

@ -26,7 +26,7 @@ func initLogger(
Trace: log.New(traceHandle, "TRACE: ", log.Ldate|log.Ltime), Trace: log.New(traceHandle, "TRACE: ", log.Ldate|log.Ltime),
Info: log.New(infoHandle, "INFO: ", log.Ldate|log.Ltime), Info: log.New(infoHandle, "INFO: ", log.Ldate|log.Ltime),
Warning: log.New(warningHandle, "WARNING: ", log.Ldate|log.Ltime), Warning: log.New(warningHandle, "WARNING: ", log.Ldate|log.Ltime),
Error: log.New(errorHandle, "ERROR: ", log.Ldate|log.Ltime), Error: log.New(errorHandle, "ERROR: ", log.Ldate|log.Ltime|log.Lshortfile),
} }
} }

View File

@ -14,6 +14,8 @@ import (
"sync" "sync"
) )
const MAX_DOWNLOADS = 10
const BASEURL = "https://www.e-codices.unifr.ch" const BASEURL = "https://www.e-codices.unifr.ch"
const THUMBNAILURL = "%s/en/thumbs/%s" const THUMBNAILURL = "%s/en/thumbs/%s"
@ -36,16 +38,29 @@ func DownloadBook(outDir string, size string, id string, logger Logger) error {
defer os.RemoveAll(dir) defer os.RemoveAll(dir)
var wg sync.WaitGroup var wg sync.WaitGroup
semaphore := make(chan struct{}, MAX_DOWNLOADS)
logger.Info.Println("downloading pages...") logger.Info.Println("downloading pages...")
for index, pageUrl := range pageUrls { for index, pageUrl := range pageUrls {
wg.Add(1)
go func(index int, pageUrl string) {
defer wg.Done()
semaphore <- struct{}{}
defer func() {
<-semaphore
}()
downloadUrl, err:= getSizeLink(pageUrl, size) downloadUrl, err:= getSizeLink(pageUrl, size)
if err != nil { if err != nil {
return err //return err
} }
filename := fmt.Sprintf("%06d.jpg", index) filename := fmt.Sprintf("%06d.jpg", index)
go downloadToFile(filename, dir, downloadUrl, wg) downloadToFile(filename, dir, downloadUrl)
}(index, pageUrl)
} }
wg.Wait() wg.Wait()
@ -58,7 +73,7 @@ func DownloadBook(outDir string, size string, id string, logger Logger) error {
pdfPath := path.Join(outDir, fmt.Sprintf("%s.pdf", strings.ReplaceAll(id, "/", "_"))) pdfPath := path.Join(outDir, fmt.Sprintf("%s.pdf", strings.ReplaceAll(id, "/", "_")))
logger.Info.Printf("Saving pdf to %s...", pdfPath) logger.Info.Printf("Saving pdf to %s...", pdfPath)
return ImgsToPdf(dir, pdfPath) return ImgDirToPdf(dir, pdfPath)
} }
// Find the download link for a page of a specific size. // Find the download link for a page of a specific size.

View File

@ -11,7 +11,6 @@ import (
"net/http" "net/http"
"os" "os"
"path" "path"
"sync"
) )
// Download a html page from an url (must be UTF-8) and convert it to a goquery document. // Download a html page from an url (must be UTF-8) and convert it to a goquery document.
@ -35,10 +34,7 @@ func fetchDocument(url string) (*goquery.Document, error) {
} }
// Download a file. // Download a file.
func downloadToFile(filename string, dir string, pageUrl string, wg sync.WaitGroup) error { func downloadToFile(filename string, dir string, pageUrl string) error {
wg.Add(1)
defer wg.Done()
fullpath := path.Join(dir, filename) fullpath := path.Join(dir, filename)
out, err := os.Create(fullpath) out, err := os.Create(fullpath)

View File

@ -5,25 +5,26 @@
package ecload package ecload
import ( import (
"image"
_ "image/jpeg"
"io/ioutil" "io/ioutil"
"os" "os"
"path" "path"
"strings"
"github.com/jung-kurt/gofpdf" "github.com/jung-kurt/gofpdf"
) )
// Concatenate all jpg files in a directory to a single pdf. // Concatenate all jpg files in a directory to a single pdf.
func ImgsToPdf(dir string, output string) error { func ImgDirToPdf(dir string, output string) error {
pdf := gofpdf.New("P", "mm", "", "")
files, err := ioutil.ReadDir(dir) files, err := ioutil.ReadDir(dir)
if err != nil { if err != nil {
return err return err
} }
pdf := gofpdf.New("P", "mm", "", "")
opt := gofpdf.ImageOptions{ImageType: "jpg", ReadDpi: true}
for _, f := range files { for _, f := range files {
if strings.HasSuffix(f.Name(), ".jpg") {
filepath := path.Join(dir, f.Name()) filepath := path.Join(dir, f.Name())
reader, err := os.Open(filepath) reader, err := os.Open(filepath)
@ -31,19 +32,15 @@ func ImgsToPdf(dir string, output string) error {
return err return err
} }
img, _, err := image.DecodeConfig(reader)
if err != nil {
return err
}
pdf.AddPageFormat("P", gofpdf.SizeType{Wd: float64(img.Width), Ht: float64(img.Height)})
opt := gofpdf.ImageOptions{ImageType: "jpg", ReadDpi: true}
pdf.RegisterImageOptionsReader(f.Name(), opt, reader) pdf.RegisterImageOptionsReader(f.Name(), opt, reader)
pdf.ImageOptions(f.Name(), 0, 0, 0, 0, false, opt, 0, "") info := pdf.RegisterImageOptions(f.Name(), opt)
pdf.AddPageFormat("P", gofpdf.SizeType{ Wd: info.Width(), Ht: info.Height() })
pdf.ImageOptions(f.Name(), 0, 0, info.Width(), info.Height(), false, opt, 0, "")
reader.Close() reader.Close()
} }
}
return pdf.OutputFileAndClose(output) return pdf.OutputFileAndClose(output)
} }