87 lines
2.4 KiB
Rust
87 lines
2.4 KiB
Rust
//! Handle downloads of newspaper issues.
|
|
|
|
use std::{
|
|
fs,
|
|
io::{Cursor, Read},
|
|
path::{Path, PathBuf},
|
|
sync::Arc,
|
|
};
|
|
|
|
use anyhow::Result;
|
|
use tokio::{spawn, sync::Semaphore, task::JoinSet};
|
|
use tracing::{debug, info};
|
|
|
|
use crate::{nzz::Issue, pdf};
|
|
|
|
const MAX_DOWNLOADS: usize = 4;
|
|
|
|
/// Fetch a single newspaper issue and save the merged pdf to `output_dir`.
|
|
async fn fetch_issue(issue: &Issue, output_dir: PathBuf) -> Result<()> {
|
|
info!(
|
|
"saving issue {} ({})",
|
|
issue.publication_date, issue.edition
|
|
);
|
|
|
|
let client = reqwest::Client::new();
|
|
let tmp_dir = tempfile::tempdir()?;
|
|
let mut pages = Vec::new();
|
|
for (i, page) in issue.pages.iter().enumerate() {
|
|
debug!(
|
|
"fetching issue {}, page {}: {page}",
|
|
issue.publication_date,
|
|
i + 1
|
|
);
|
|
|
|
let response = client.get(page).send().await?;
|
|
let mut content = Cursor::new(response.bytes().await?);
|
|
let mut page_data = Vec::new();
|
|
content.read_to_end(&mut page_data)?;
|
|
|
|
let tmp_page = tmp_dir.path().join(i.to_string());
|
|
fs::write(&tmp_page, page_data)?;
|
|
pages.push(tmp_page);
|
|
}
|
|
|
|
let issue_name = format!(
|
|
"nzz_{}_{}.pdf",
|
|
issue.publication_date,
|
|
issue.edition.to_lowercase()
|
|
);
|
|
let issue_path = output_dir.join(issue_name);
|
|
let issue_title = format!("NZZ {} ({})", issue.publication_date, issue.edition);
|
|
|
|
pdf::merge(pages, &issue_path, &issue_title)?;
|
|
debug!("issue {} saved", issue.publication_date);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Download all pages of the provided `issues` and save them merged to the directory `output_dir`.
|
|
///
|
|
/// Create `output_dir` if it does not exist.
|
|
pub async fn fetch(issues: Vec<Issue>, output_dir: &Path) -> Result<()> {
|
|
debug!("ensuring {output_dir:?} exists");
|
|
fs::create_dir_all(output_dir)?;
|
|
|
|
let permits = Arc::new(Semaphore::new(MAX_DOWNLOADS));
|
|
|
|
let mut jobs = JoinSet::new();
|
|
for issue in issues {
|
|
let permits = permits.clone();
|
|
let output_dir = output_dir.to_path_buf().clone();
|
|
|
|
let job: tokio::task::JoinHandle<Result<()>> = spawn(async move {
|
|
let _permit = permits.acquire().await.unwrap();
|
|
fetch_issue(&issue, output_dir).await?;
|
|
Ok(())
|
|
});
|
|
jobs.spawn(job);
|
|
}
|
|
|
|
while let Some(res) = jobs.join_next().await {
|
|
res???;
|
|
}
|
|
|
|
Ok(())
|
|
}
|