//! Handle downloads of newspaper issues. use std::{ fs, io::{Cursor, Read}, path::{Path, PathBuf}, sync::Arc, }; use anyhow::Result; use tokio::{spawn, sync::Semaphore, task::JoinSet}; use tracing::{debug, info}; use crate::{nzz::Issue, pdf}; const MAX_DOWNLOADS: usize = 4; /// Fetch a single newspaper issue and save the merged pdf to `output_dir`. async fn fetch_issue(issue: &Issue, output_dir: PathBuf) -> Result<()> { info!("saving issue {}", issue.publication_date); let client = reqwest::Client::new(); let tmp_dir = tempfile::tempdir()?; let mut pages = Vec::new(); for (i, page) in issue.pages.iter().enumerate() { debug!( "fetching issue {}, page {}: {page}", issue.publication_date, i + 1 ); let response = client.get(page).send().await?; let mut content = Cursor::new(response.bytes().await?); let mut page_data = Vec::new(); content.read_to_end(&mut page_data)?; let tmp_page = tmp_dir.path().join(i.to_string()); fs::write(&tmp_page, page_data)?; pages.push(tmp_page); } let issue_name = format!("nzz_{}.pdf", issue.publication_date); let issue_path = output_dir.join(issue_name); let issue_title = format!("NZZ {}", issue.publication_date); pdf::merge(pages, &issue_path, &issue_title)?; debug!("issue {} saved", issue.publication_date); Ok(()) } /// Download all pages of the provided `issues` and save them merged to the directory `output_dir`. /// /// Create `output_dir` if it does not exist. pub async fn fetch(issues: Vec, output_dir: &Path) -> Result<()> { debug!("ensuring {output_dir:?} exists"); fs::create_dir_all(output_dir)?; let permits = Arc::new(Semaphore::new(MAX_DOWNLOADS)); let mut jobs = JoinSet::new(); for issue in issues { let permits = permits.clone(); let output_dir = output_dir.to_path_buf().clone(); let job: tokio::task::JoinHandle> = spawn(async move { let _permit = permits.acquire().await.unwrap(); fetch_issue(&issue, output_dir).await?; Ok(()) }); jobs.spawn(job); } while let Some(res) = jobs.join_next().await { res???; } Ok(()) }