rewrite so webdriver is only used for the login
This commit is contained in:
parent
3edb6a7671
commit
cf8fbe0965
26 changed files with 5385 additions and 1296 deletions
52
download/src/download.rs
Normal file
52
download/src/download.rs
Normal file
|
@ -0,0 +1,52 @@
|
|||
//! Handle downloads of newspaper issues.
|
||||
|
||||
use std::{
|
||||
fs::{self},
|
||||
io::{Cursor, Read},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::{nzz::Issue, pdf};
|
||||
|
||||
/// Download all pages of the provided `issues` and save them merged to the directory `output_dir`.
|
||||
///
|
||||
/// Create `output_dir` if it does not exist.
|
||||
pub async fn fetch(issues: Vec<Issue>, output_dir: &Path) -> Result<()> {
|
||||
debug!("ensuring {output_dir:?} exists");
|
||||
fs::create_dir_all(output_dir)?;
|
||||
|
||||
for issue in issues {
|
||||
info!("saving issue {}", issue.publication_date);
|
||||
|
||||
let tmp_dir = tempfile::tempdir()?;
|
||||
let mut pages = Vec::new();
|
||||
for (i, page) in issue.pages.into_iter().enumerate() {
|
||||
debug!(
|
||||
"fetching issue {}, page {}: {page}",
|
||||
issue.publication_date,
|
||||
i + 1
|
||||
);
|
||||
|
||||
let response = reqwest::Client::new().get(page).send().await?;
|
||||
let mut content = Cursor::new(response.bytes().await?);
|
||||
let mut page_data = Vec::new();
|
||||
content.read_to_end(&mut page_data)?;
|
||||
|
||||
let tmp_page = tmp_dir.path().join(i.to_string());
|
||||
fs::write(&tmp_page, page_data)?;
|
||||
pages.push(tmp_page);
|
||||
}
|
||||
|
||||
let issue_name = format!("nzz_{}.pdf", issue.publication_date);
|
||||
let issue_path = output_dir.join(issue_name);
|
||||
let issue_title = format!("NZZ {}", issue.publication_date);
|
||||
|
||||
pdf::merge(pages, &issue_path, &issue_title)?;
|
||||
debug!("issue {} saved", issue.publication_date);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue