105 lines
2.3 KiB
Go
105 lines
2.3 KiB
Go
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
|
|
|
package ecload
|
|
|
|
import (
|
|
"fmt"
|
|
"github.com/PuerkitoBio/goquery"
|
|
"io/ioutil"
|
|
"os"
|
|
"path"
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
const BASEURL = "https://www.e-codices.unifr.ch"
|
|
const THUMBNAILURL = "%s/en/thumbs/%s"
|
|
|
|
// Save a whole e-codice to the specified directory.
|
|
// If the directory does not exist, it will be created.
|
|
// size must be one of small, medium, large, max.
|
|
func DownloadBook(outDir string, size string, id string, logger Logger) error {
|
|
logger.Info.Println("finding pages...")
|
|
|
|
pageUrls, err := getPageLinks(id)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
dir, err := ioutil.TempDir("", "ecload")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
defer os.RemoveAll(dir)
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
logger.Info.Println("downloading pages...")
|
|
for index, pageUrl := range pageUrls {
|
|
downloadUrl, err:= getSizeLink(pageUrl, size)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
filename := fmt.Sprintf("%06d.jpg", index)
|
|
go downloadToFile(filename, dir, downloadUrl, wg)
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
err = os.MkdirAll(outDir, os.ModePerm)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
pdfPath := path.Join(outDir, fmt.Sprintf("%s.pdf", strings.ReplaceAll(id, "/", "_")))
|
|
logger.Info.Printf("Saving pdf to %s...", pdfPath)
|
|
|
|
return ImgsToPdf(dir, pdfPath)
|
|
}
|
|
|
|
// Find the download link for a page of a specific size.
|
|
func getSizeLink(pageUrl string , size string) (string, error) {
|
|
doc, err := fetchDocument(pageUrl)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
downloadSizeUrl := ""
|
|
|
|
doc.Find("ul.download-page-list > li > a").Each(func(i int, s *goquery.Selection) {
|
|
downloadUrl, ok := s.Attr("href")
|
|
|
|
if ok && strings.HasSuffix(downloadUrl, fmt.Sprintf("/%s", size)) {
|
|
downloadSizeUrl = downloadUrl
|
|
}
|
|
})
|
|
|
|
return downloadSizeUrl, nil
|
|
}
|
|
|
|
// Find page urls for an e-codice.
|
|
// It loads the thumbnail page and parses the html.
|
|
func getPageLinks(id string) ([]string, error) {
|
|
thumbnailsUrl := fmt.Sprintf(THUMBNAILURL, BASEURL, id)
|
|
|
|
doc, err := fetchDocument(thumbnailsUrl)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
pageUrls := make([]string, 0)
|
|
doc.Find("div.thumbnail-image > a").Each(func(i int, s *goquery.Selection) {
|
|
pageUrl, ok := s.Attr("href")
|
|
|
|
if ok {
|
|
pageUrls = append(pageUrls, pageUrl)
|
|
}
|
|
})
|
|
|
|
return pageUrls, nil
|
|
}
|