first usable version
This commit is contained in:
parent
d525e7c013
commit
2a88e5c900
10
.gitignore
vendored
10
.gitignore
vendored
@ -3,11 +3,5 @@
|
|||||||
*.swp
|
*.swp
|
||||||
**/.idea/workspace.xml
|
**/.idea/workspace.xml
|
||||||
**/.idea/tasks.xml
|
**/.idea/tasks.xml
|
||||||
bin
|
*.pdf
|
||||||
*.exe
|
bin/
|
||||||
*.dll
|
|
||||||
*.so
|
|
||||||
*.dylib
|
|
||||||
*.test
|
|
||||||
*.out
|
|
||||||
.glide/
|
|
||||||
|
6
Makefile
6
Makefile
@ -6,11 +6,11 @@ clean:
|
|||||||
rm -r bin/
|
rm -r bin/
|
||||||
go clean ./cmd/ecload/ ./pkg/ecload/
|
go clean ./cmd/ecload/ ./pkg/ecload/
|
||||||
|
|
||||||
bin/ecload:
|
bin/ecload: cmd/ecload/*.go pkg/ecload/*.go
|
||||||
GOOS=linux GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
|
GOOS=linux GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
|
||||||
|
|
||||||
bin/ecload.exe:
|
bin/ecload.exe: cmd/ecload/*.go pkg/ecload/*.go
|
||||||
GOOS=windows GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
|
GOOS=windows GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
|
||||||
|
|
||||||
bin/ecload-mac:
|
bin/ecload-mac: cmd/ecload/*.go pkg/ecload/*.go
|
||||||
GOOS=darwin GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
|
GOOS=darwin GOARCH=amd64 go build -ldflags '-s' -v -o $@ cmd/ecload/main.go
|
||||||
|
@ -26,7 +26,7 @@ func initLogger(
|
|||||||
Trace: log.New(traceHandle, "TRACE: ", log.Ldate|log.Ltime),
|
Trace: log.New(traceHandle, "TRACE: ", log.Ldate|log.Ltime),
|
||||||
Info: log.New(infoHandle, "INFO: ", log.Ldate|log.Ltime),
|
Info: log.New(infoHandle, "INFO: ", log.Ldate|log.Ltime),
|
||||||
Warning: log.New(warningHandle, "WARNING: ", log.Ldate|log.Ltime),
|
Warning: log.New(warningHandle, "WARNING: ", log.Ldate|log.Ltime),
|
||||||
Error: log.New(errorHandle, "ERROR: ", log.Ldate|log.Ltime),
|
Error: log.New(errorHandle, "ERROR: ", log.Ldate|log.Ltime|log.Lshortfile),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,6 +14,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const MAX_DOWNLOADS = 10
|
||||||
|
|
||||||
const BASEURL = "https://www.e-codices.unifr.ch"
|
const BASEURL = "https://www.e-codices.unifr.ch"
|
||||||
const THUMBNAILURL = "%s/en/thumbs/%s"
|
const THUMBNAILURL = "%s/en/thumbs/%s"
|
||||||
|
|
||||||
@ -36,16 +38,29 @@ func DownloadBook(outDir string, size string, id string, logger Logger) error {
|
|||||||
defer os.RemoveAll(dir)
|
defer os.RemoveAll(dir)
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
semaphore := make(chan struct{}, MAX_DOWNLOADS)
|
||||||
|
|
||||||
logger.Info.Println("downloading pages...")
|
logger.Info.Println("downloading pages...")
|
||||||
for index, pageUrl := range pageUrls {
|
for index, pageUrl := range pageUrls {
|
||||||
downloadUrl, err:= getSizeLink(pageUrl, size)
|
wg.Add(1)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
filename := fmt.Sprintf("%06d.jpg", index)
|
go func(index int, pageUrl string) {
|
||||||
go downloadToFile(filename, dir, downloadUrl, wg)
|
defer wg.Done()
|
||||||
|
semaphore <- struct{}{}
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
<-semaphore
|
||||||
|
}()
|
||||||
|
|
||||||
|
downloadUrl, err:= getSizeLink(pageUrl, size)
|
||||||
|
if err != nil {
|
||||||
|
//return err
|
||||||
|
}
|
||||||
|
|
||||||
|
filename := fmt.Sprintf("%06d.jpg", index)
|
||||||
|
downloadToFile(filename, dir, downloadUrl)
|
||||||
|
|
||||||
|
}(index, pageUrl)
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
@ -58,7 +73,7 @@ func DownloadBook(outDir string, size string, id string, logger Logger) error {
|
|||||||
pdfPath := path.Join(outDir, fmt.Sprintf("%s.pdf", strings.ReplaceAll(id, "/", "_")))
|
pdfPath := path.Join(outDir, fmt.Sprintf("%s.pdf", strings.ReplaceAll(id, "/", "_")))
|
||||||
logger.Info.Printf("Saving pdf to %s...", pdfPath)
|
logger.Info.Printf("Saving pdf to %s...", pdfPath)
|
||||||
|
|
||||||
return ImgsToPdf(dir, pdfPath)
|
return ImgDirToPdf(dir, pdfPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find the download link for a page of a specific size.
|
// Find the download link for a page of a specific size.
|
||||||
|
@ -11,7 +11,6 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"sync"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Download a html page from an url (must be UTF-8) and convert it to a goquery document.
|
// Download a html page from an url (must be UTF-8) and convert it to a goquery document.
|
||||||
@ -35,10 +34,7 @@ func fetchDocument(url string) (*goquery.Document, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Download a file.
|
// Download a file.
|
||||||
func downloadToFile(filename string, dir string, pageUrl string, wg sync.WaitGroup) error {
|
func downloadToFile(filename string, dir string, pageUrl string) error {
|
||||||
wg.Add(1)
|
|
||||||
defer wg.Done()
|
|
||||||
|
|
||||||
fullpath := path.Join(dir, filename)
|
fullpath := path.Join(dir, filename)
|
||||||
|
|
||||||
out, err := os.Create(fullpath)
|
out, err := os.Create(fullpath)
|
||||||
|
@ -5,44 +5,41 @@
|
|||||||
package ecload
|
package ecload
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"image"
|
|
||||||
_ "image/jpeg"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/jung-kurt/gofpdf"
|
"github.com/jung-kurt/gofpdf"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Concatenate all jpg files in a directory to a single pdf.
|
// Concatenate all jpg files in a directory to a single pdf.
|
||||||
func ImgsToPdf(dir string, output string) error {
|
func ImgDirToPdf(dir string, output string) error {
|
||||||
pdf := gofpdf.New("P", "mm", "", "")
|
|
||||||
|
|
||||||
files, err := ioutil.ReadDir(dir)
|
files, err := ioutil.ReadDir(dir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pdf := gofpdf.New("P", "mm", "", "")
|
||||||
|
opt := gofpdf.ImageOptions{ImageType: "jpg", ReadDpi: true}
|
||||||
|
|
||||||
for _, f := range files {
|
for _, f := range files {
|
||||||
filepath := path.Join(dir, f.Name())
|
if strings.HasSuffix(f.Name(), ".jpg") {
|
||||||
|
filepath := path.Join(dir, f.Name())
|
||||||
|
|
||||||
reader, err := os.Open(filepath)
|
reader, err := os.Open(filepath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf.RegisterImageOptionsReader(f.Name(), opt, reader)
|
||||||
|
info := pdf.RegisterImageOptions(f.Name(), opt)
|
||||||
|
pdf.AddPageFormat("P", gofpdf.SizeType{ Wd: info.Width(), Ht: info.Height() })
|
||||||
|
|
||||||
|
pdf.ImageOptions(f.Name(), 0, 0, info.Width(), info.Height(), false, opt, 0, "")
|
||||||
|
|
||||||
|
reader.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
img, _, err := image.DecodeConfig(reader)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf.AddPageFormat("P", gofpdf.SizeType{Wd: float64(img.Width), Ht: float64(img.Height)})
|
|
||||||
|
|
||||||
opt := gofpdf.ImageOptions{ImageType: "jpg", ReadDpi: true}
|
|
||||||
pdf.RegisterImageOptionsReader(f.Name(), opt, reader)
|
|
||||||
pdf.ImageOptions(f.Name(), 0, 0, 0, 0, false, opt, 0, "")
|
|
||||||
|
|
||||||
reader.Close()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return pdf.OutputFileAndClose(output)
|
return pdf.OutputFileAndClose(output)
|
||||||
|
Loading…
Reference in New Issue
Block a user