first usable version
This commit is contained in:
parent
d525e7c013
commit
2a88e5c900
10
.gitignore
vendored
10
.gitignore
vendored
@ -3,11 +3,5 @@
|
||||
*.swp
|
||||
**/.idea/workspace.xml
|
||||
**/.idea/tasks.xml
|
||||
bin
|
||||
*.exe
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
*.test
|
||||
*.out
|
||||
.glide/
|
||||
*.pdf
|
||||
bin/
|
||||
|
6
Makefile
6
Makefile
@ -6,11 +6,11 @@ clean:
|
||||
rm -r bin/
|
||||
go clean ./cmd/ecload/ ./pkg/ecload/
|
||||
|
||||
bin/ecload:
|
||||
bin/ecload: cmd/ecload/*.go pkg/ecload/*.go
|
||||
GOOS=linux GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
|
||||
|
||||
bin/ecload.exe:
|
||||
bin/ecload.exe: cmd/ecload/*.go pkg/ecload/*.go
|
||||
GOOS=windows GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
|
||||
|
||||
bin/ecload-mac:
|
||||
bin/ecload-mac: cmd/ecload/*.go pkg/ecload/*.go
|
||||
GOOS=darwin GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
|
||||
|
@ -26,7 +26,7 @@ func initLogger(
|
||||
Trace: log.New(traceHandle, "TRACE: ", log.Ldate|log.Ltime),
|
||||
Info: log.New(infoHandle, "INFO: ", log.Ldate|log.Ltime),
|
||||
Warning: log.New(warningHandle, "WARNING: ", log.Ldate|log.Ltime),
|
||||
Error: log.New(errorHandle, "ERROR: ", log.Ldate|log.Ltime),
|
||||
Error: log.New(errorHandle, "ERROR: ", log.Ldate|log.Ltime|log.Lshortfile),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,8 @@ import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
const MAX_DOWNLOADS = 10
|
||||
|
||||
const BASEURL = "https://www.e-codices.unifr.ch"
|
||||
const THUMBNAILURL = "%s/en/thumbs/%s"
|
||||
|
||||
@ -36,16 +38,29 @@ func DownloadBook(outDir string, size string, id string, logger Logger) error {
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
semaphore := make(chan struct{}, MAX_DOWNLOADS)
|
||||
|
||||
logger.Info.Println("downloading pages...")
|
||||
for index, pageUrl := range pageUrls {
|
||||
downloadUrl, err:= getSizeLink(pageUrl, size)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wg.Add(1)
|
||||
|
||||
filename := fmt.Sprintf("%06d.jpg", index)
|
||||
go downloadToFile(filename, dir, downloadUrl, wg)
|
||||
go func(index int, pageUrl string) {
|
||||
defer wg.Done()
|
||||
semaphore <- struct{}{}
|
||||
|
||||
defer func() {
|
||||
<-semaphore
|
||||
}()
|
||||
|
||||
downloadUrl, err:= getSizeLink(pageUrl, size)
|
||||
if err != nil {
|
||||
//return err
|
||||
}
|
||||
|
||||
filename := fmt.Sprintf("%06d.jpg", index)
|
||||
downloadToFile(filename, dir, downloadUrl)
|
||||
|
||||
}(index, pageUrl)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
@ -58,7 +73,7 @@ func DownloadBook(outDir string, size string, id string, logger Logger) error {
|
||||
pdfPath := path.Join(outDir, fmt.Sprintf("%s.pdf", strings.ReplaceAll(id, "/", "_")))
|
||||
logger.Info.Printf("Saving pdf to %s...", pdfPath)
|
||||
|
||||
return ImgsToPdf(dir, pdfPath)
|
||||
return ImgDirToPdf(dir, pdfPath)
|
||||
}
|
||||
|
||||
// Find the download link for a page of a specific size.
|
||||
|
@ -11,7 +11,6 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Download a html page from an url (must be UTF-8) and convert it to a goquery document.
|
||||
@ -35,10 +34,7 @@ func fetchDocument(url string) (*goquery.Document, error) {
|
||||
}
|
||||
|
||||
// Download a file.
|
||||
func downloadToFile(filename string, dir string, pageUrl string, wg sync.WaitGroup) error {
|
||||
wg.Add(1)
|
||||
defer wg.Done()
|
||||
|
||||
func downloadToFile(filename string, dir string, pageUrl string) error {
|
||||
fullpath := path.Join(dir, filename)
|
||||
|
||||
out, err := os.Create(fullpath)
|
||||
|
@ -5,44 +5,41 @@
|
||||
package ecload
|
||||
|
||||
import (
|
||||
"image"
|
||||
_ "image/jpeg"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/jung-kurt/gofpdf"
|
||||
)
|
||||
|
||||
// Concatenate all jpg files in a directory to a single pdf.
|
||||
func ImgsToPdf(dir string, output string) error {
|
||||
pdf := gofpdf.New("P", "mm", "", "")
|
||||
|
||||
func ImgDirToPdf(dir string, output string) error {
|
||||
files, err := ioutil.ReadDir(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pdf := gofpdf.New("P", "mm", "", "")
|
||||
opt := gofpdf.ImageOptions{ImageType: "jpg", ReadDpi: true}
|
||||
|
||||
for _, f := range files {
|
||||
filepath := path.Join(dir, f.Name())
|
||||
if strings.HasSuffix(f.Name(), ".jpg") {
|
||||
filepath := path.Join(dir, f.Name())
|
||||
|
||||
reader, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
return err
|
||||
reader, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pdf.RegisterImageOptionsReader(f.Name(), opt, reader)
|
||||
info := pdf.RegisterImageOptions(f.Name(), opt)
|
||||
pdf.AddPageFormat("P", gofpdf.SizeType{ Wd: info.Width(), Ht: info.Height() })
|
||||
|
||||
pdf.ImageOptions(f.Name(), 0, 0, info.Width(), info.Height(), false, opt, 0, "")
|
||||
|
||||
reader.Close()
|
||||
}
|
||||
|
||||
img, _, err := image.DecodeConfig(reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pdf.AddPageFormat("P", gofpdf.SizeType{Wd: float64(img.Width), Ht: float64(img.Height)})
|
||||
|
||||
opt := gofpdf.ImageOptions{ImageType: "jpg", ReadDpi: true}
|
||||
pdf.RegisterImageOptionsReader(f.Name(), opt, reader)
|
||||
pdf.ImageOptions(f.Name(), 0, 0, 0, 0, false, opt, 0, "")
|
||||
|
||||
reader.Close()
|
||||
}
|
||||
|
||||
return pdf.OutputFileAndClose(output)
|
||||
|
Loading…
Reference in New Issue
Block a user