rewrite so webdriver is only used for the login

This commit is contained in:
Sebastian Hugentobler 2024-07-03 12:32:47 +02:00
parent 3edb6a7671
commit cf8fbe0965
Signed by: shu
GPG key ID: BB32CF3CA052C2F0
26 changed files with 5385 additions and 1296 deletions

15
cookie/Cargo.toml Normal file
View file

@ -0,0 +1,15 @@
[package]
name = "nzz-cookie"
version = "0.1.0"
edition = "2021"
license = { workspace = true }
authors = { workspace = true }
repository = { workspace = true }
[dependencies]
anyhow = { workspace = true }
clap = { workspace = true }
fantoccini = "0.19.3"
serde_json = { workspace = true }
tokio = { workspace = true }

12
cookie/src/cli.rs Normal file
View file

@ -0,0 +1,12 @@
//! Cli interface.
use clap::Parser;
/// Fetch the authentication cookie from an nzz login.
#[derive(Parser)]
#[command(version, about, long_about = None, after_help = "Provide the password from stdin.")]
pub struct Config {
/// Username.
#[arg(short, long, env)]
pub username: String,
}

33
cookie/src/geckodriver.rs Normal file
View file

@ -0,0 +1,33 @@
//! Start geckodriveror connect to an existing one.
use std::{net::TcpStream, process::Stdio, time::Duration};
use anyhow::Result;
use tokio::{process::Command, spawn, sync::oneshot, task::JoinHandle};
/// Standard address for geckodriver to listen on.
pub const GECKODRIVER_HOST: &str = "127.0.0.1:4444";
const GECKODRIVER_BINARY: &str = "geckodriver";
/// Check if ageckodriver is already running and if not start one.
pub async fn run(stop_rx: oneshot::Receiver<()>) -> Result<Option<JoinHandle<()>>> {
if TcpStream::connect_timeout(&GECKODRIVER_HOST.parse()?, Duration::from_secs(2)).is_err() {
let handle = spawn(async move {
let mut child = Command::new(GECKODRIVER_BINARY)
.stdout(Stdio::null())
.spawn()
.expect("failed to run binary");
tokio::select! {
_ = stop_rx => {
child.kill().await.expect("Failed to kill process");
}
result = child.wait() => {
result.expect("Child process wasn't running");
}
}
});
Ok(Some(handle))
} else {
Ok(None)
}
}

79
cookie/src/lib.rs Normal file
View file

@ -0,0 +1,79 @@
//! Login to the NZZ archive and print the authentication cookie for further usage.
//!
//! Leveraging webdriver + geckodriver.
use std::time::Duration;
use anyhow::Result;
use cli::Config;
use fantoccini::{elements::Element, Client, ClientBuilder, Locator};
use serde_json::json;
use tokio::{sync::oneshot, time::sleep};
use crate::geckodriver::GECKODRIVER_HOST;
pub mod cli;
pub mod geckodriver;
const LOGIN_URL: &str = "https://zeitungsarchiv.nzz.ch/";
/// Entrypoint to login to the NZZ archive.
pub async fn run(args: Config, pw: &str) -> Result<()> {
let (stop_tx, stop_rx) = oneshot::channel();
let driver_handle = geckodriver::run(stop_rx).await?;
let driver_args = json!({ "moz:firefoxOptions": {"args": ["-headless"]} });
let client = ClientBuilder::native()
.capabilities(driver_args.as_object().unwrap().clone())
.connect(&format!("http://{GECKODRIVER_HOST}"))
.await?;
client.goto(LOGIN_URL).await?;
let login_button: Element = element_from_css(&client, ".fup-menu-login-container").await?;
sleep(Duration::from_millis(500)).await;
login_button.click().await?;
let login_iframe = element_from_css(&client, r#"iframe[id^="piano""#).await?;
login_iframe.enter_frame().await?;
let email_input = element_from_css(&client, r#"input[name="email"]"#).await?;
email_input.send_keys(&args.username).await?;
let pw_input: Element = element_from_css(&client, r#"input[type="password"]"#).await?;
pw_input.send_keys(pw).await?;
let submit = element_from_css(&client, r#"button[class="btn prime"]"#).await?;
submit.click().await?;
let main_frame = client.window().await?;
client.switch_to_window(main_frame).await?;
element_from_css(&client, ".fup-login-open.fup-button.fup-s-menu-login-open").await?;
let cookies = client.get_all_cookies().await?;
let cobbled_cookies = cookies
.into_iter()
.map(|cookie| format!("{}={}", cookie.name(), cookie.value()))
.fold(String::new(), |mut acc, word| {
if !acc.is_empty() {
acc.push(';');
}
acc.push_str(&word);
acc
});
println!("{cobbled_cookies}");
client.close().await?;
if let Some(driver_handle) = driver_handle {
let _ = stop_tx.send(());
driver_handle.abort();
}
Ok(())
}
async fn element_from_css(client: &Client, selector: &str) -> Result<Element> {
Ok(client.wait().for_element(Locator::Css(selector)).await?)
}

24
cookie/src/main.rs Normal file
View file

@ -0,0 +1,24 @@
use std::io::{self, Read};
use anyhow::Result;
use clap::Parser;
use nzz_cookie::cli::Config;
#[tokio::main]
async fn main() -> Result<()> {
let args = Config::parse();
let pw = read_pw().unwrap_or_else(|_| panic!("Provide the password via stdin"));
nzz_cookie::run(args, &pw).await?;
Ok(())
}
/// Read password from stdin.
fn read_pw() -> Result<String> {
let stdin = io::stdin();
let mut buffer = String::new();
stdin.lock().read_to_string(&mut buffer)?;
let cookie = buffer.trim();
Ok(cookie.to_string())
}