first usable version

This commit is contained in:
Sebastian Hugentobler 2019-05-28 11:28:20 +02:00
parent d525e7c013
commit 2a88e5c900
6 changed files with 48 additions and 46 deletions

10
.gitignore vendored
View File

@ -3,11 +3,5 @@
*.swp
**/.idea/workspace.xml
**/.idea/tasks.xml
bin
*.exe
*.dll
*.so
*.dylib
*.test
*.out
.glide/
*.pdf
bin/

View File

@ -6,11 +6,11 @@ clean:
rm -r bin/
go clean ./cmd/ecload/ ./pkg/ecload/
bin/ecload:
bin/ecload: cmd/ecload/*.go pkg/ecload/*.go
GOOS=linux GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
bin/ecload.exe:
bin/ecload.exe: cmd/ecload/*.go pkg/ecload/*.go
GOOS=windows GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
bin/ecload-mac:
bin/ecload-mac: cmd/ecload/*.go pkg/ecload/*.go
GOOS=darwin GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go

View File

@ -26,7 +26,7 @@ func initLogger(
Trace: log.New(traceHandle, "TRACE: ", log.Ldate|log.Ltime),
Info: log.New(infoHandle, "INFO: ", log.Ldate|log.Ltime),
Warning: log.New(warningHandle, "WARNING: ", log.Ldate|log.Ltime),
Error: log.New(errorHandle, "ERROR: ", log.Ldate|log.Ltime),
Error: log.New(errorHandle, "ERROR: ", log.Ldate|log.Ltime|log.Lshortfile),
}
}

View File

@ -14,6 +14,8 @@ import (
"sync"
)
const MAX_DOWNLOADS = 10
const BASEURL = "https://www.e-codices.unifr.ch"
const THUMBNAILURL = "%s/en/thumbs/%s"
@ -36,16 +38,29 @@ func DownloadBook(outDir string, size string, id string, logger Logger) error {
defer os.RemoveAll(dir)
var wg sync.WaitGroup
semaphore := make(chan struct{}, MAX_DOWNLOADS)
logger.Info.Println("downloading pages...")
for index, pageUrl := range pageUrls {
downloadUrl, err:= getSizeLink(pageUrl, size)
if err != nil {
return err
}
wg.Add(1)
filename := fmt.Sprintf("%06d.jpg", index)
go downloadToFile(filename, dir, downloadUrl, wg)
go func(index int, pageUrl string) {
defer wg.Done()
semaphore <- struct{}{}
defer func() {
<-semaphore
}()
downloadUrl, err:= getSizeLink(pageUrl, size)
if err != nil {
//return err
}
filename := fmt.Sprintf("%06d.jpg", index)
downloadToFile(filename, dir, downloadUrl)
}(index, pageUrl)
}
wg.Wait()
@ -58,7 +73,7 @@ func DownloadBook(outDir string, size string, id string, logger Logger) error {
pdfPath := path.Join(outDir, fmt.Sprintf("%s.pdf", strings.ReplaceAll(id, "/", "_")))
logger.Info.Printf("Saving pdf to %s...", pdfPath)
return ImgsToPdf(dir, pdfPath)
return ImgDirToPdf(dir, pdfPath)
}
// Find the download link for a page of a specific size.

View File

@ -11,7 +11,6 @@ import (
"net/http"
"os"
"path"
"sync"
)
// Download a html page from an url (must be UTF-8) and convert it to a goquery document.
@ -35,10 +34,7 @@ func fetchDocument(url string) (*goquery.Document, error) {
}
// Download a file.
func downloadToFile(filename string, dir string, pageUrl string, wg sync.WaitGroup) error {
wg.Add(1)
defer wg.Done()
func downloadToFile(filename string, dir string, pageUrl string) error {
fullpath := path.Join(dir, filename)
out, err := os.Create(fullpath)

View File

@ -5,44 +5,41 @@
package ecload
import (
"image"
_ "image/jpeg"
"io/ioutil"
"os"
"path"
"strings"
"github.com/jung-kurt/gofpdf"
)
// Concatenate all jpg files in a directory to a single pdf.
func ImgsToPdf(dir string, output string) error {
pdf := gofpdf.New("P", "mm", "", "")
func ImgDirToPdf(dir string, output string) error {
files, err := ioutil.ReadDir(dir)
if err != nil {
return err
}
pdf := gofpdf.New("P", "mm", "", "")
opt := gofpdf.ImageOptions{ImageType: "jpg", ReadDpi: true}
for _, f := range files {
filepath := path.Join(dir, f.Name())
if strings.HasSuffix(f.Name(), ".jpg") {
filepath := path.Join(dir, f.Name())
reader, err := os.Open(filepath)
if err != nil {
return err
reader, err := os.Open(filepath)
if err != nil {
return err
}
pdf.RegisterImageOptionsReader(f.Name(), opt, reader)
info := pdf.RegisterImageOptions(f.Name(), opt)
pdf.AddPageFormat("P", gofpdf.SizeType{ Wd: info.Width(), Ht: info.Height() })
pdf.ImageOptions(f.Name(), 0, 0, info.Width(), info.Height(), false, opt, 0, "")
reader.Close()
}
img, _, err := image.DecodeConfig(reader)
if err != nil {
return err
}
pdf.AddPageFormat("P", gofpdf.SizeType{Wd: float64(img.Width), Ht: float64(img.Height)})
opt := gofpdf.ImageOptions{ImageType: "jpg", ReadDpi: true}
pdf.RegisterImageOptionsReader(f.Name(), opt, reader)
pdf.ImageOptions(f.Name(), 0, 0, 0, 0, false, opt, 0, "")
reader.Close()
}
return pdf.OutputFileAndClose(output)