initial commit
This commit is contained in:
commit
2a9f427bc7
21 changed files with 3692 additions and 0 deletions
21
src/app.rs
Normal file
21
src/app.rs
Normal file
|
@ -0,0 +1,21 @@
|
|||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::Router;
|
||||
use tower_http::trace::TraceLayer;
|
||||
|
||||
use crate::{feeds::Feed, routes};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AppState {
|
||||
pub feeds: HashMap<String, Feed>,
|
||||
}
|
||||
|
||||
pub fn create(feeds: HashMap<String, Feed>) -> Router {
|
||||
let app_state = Arc::new(AppState { feeds });
|
||||
|
||||
Router::new()
|
||||
.nest("/", routes::all())
|
||||
.with_state(app_state)
|
||||
.layer(TraceLayer::new_for_http())
|
||||
}
|
79
src/config.rs
Normal file
79
src/config.rs
Normal file
|
@ -0,0 +1,79 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use config::{Config, ConfigError, Environment};
|
||||
use directories::ProjectDirs;
|
||||
use serde::Deserialize;
|
||||
use thiserror::Error;
|
||||
use tracing::debug;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum AppConfigError {
|
||||
#[error("No valid home directory found.")]
|
||||
HomePathError,
|
||||
#[error("Configuration error.")]
|
||||
ConfigError(#[from] ConfigError),
|
||||
}
|
||||
|
||||
fn default_server_address() -> String {
|
||||
String::from("[::1]:3000")
|
||||
}
|
||||
|
||||
fn default_feeds() -> Vec<String> {
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Deserialize)]
|
||||
pub struct PenguinConfig {
|
||||
#[serde(default = "default_server_address")]
|
||||
pub server_address: String,
|
||||
#[serde(default = "default_feeds")]
|
||||
pub feeds: Vec<String>,
|
||||
}
|
||||
|
||||
impl PenguinConfig {
|
||||
/// Find the correct project directory for the respective operating system.
|
||||
fn get_project_dirs() -> Result<ProjectDirs, AppConfigError> {
|
||||
let project_dirs = ProjectDirs::from("ch", "vanwa", "findpenguins-feed")
|
||||
.ok_or(AppConfigError::HomePathError)?;
|
||||
|
||||
Ok(project_dirs)
|
||||
}
|
||||
|
||||
/// Find the correct configuration file for the respective operating system.
|
||||
/// - linux: `$XDG_CONFIG_HOME/stray/config.toml` or `$HOME/.config/stray/config.toml`
|
||||
/// - macOS: `$HOME/Library/Preferences/ch.fhnw.stray/config.toml`
|
||||
/// - windows: `{FOLDERID_RoamingAppData}/stray/config/config.toml`
|
||||
fn config_path() -> Result<PathBuf, AppConfigError> {
|
||||
let project_dirs = Self::get_project_dirs()?;
|
||||
let config_path = project_dirs.config_dir();
|
||||
let config_file = config_path.join("config.toml");
|
||||
|
||||
Ok(config_file)
|
||||
}
|
||||
|
||||
/// Read the server configuration.
|
||||
/// See [config_path](struct.AppConfig.html#method.config_path) for the different configuration file paths.
|
||||
/// Everything in the config file can be overridden at runtime by setting the respective ENV variables.
|
||||
/// Use `__` (double underscore) as a separator between hierarchies.
|
||||
/// For example `WOWEB__INITIAL_REMOTES="http://[::1]:3002,http://[::1]:3003"`
|
||||
/// This is necessary for configuration keys to have single underscores.
|
||||
pub fn read() -> Result<Self, AppConfigError> {
|
||||
let mut config_builder = Config::builder();
|
||||
if let Ok(config_path) = Self::config_path() {
|
||||
debug!("trying config file at {:#?}...", config_path);
|
||||
config_builder =
|
||||
config_builder.add_source(config::File::from(config_path).required(false));
|
||||
}
|
||||
|
||||
debug!("merging settings from environment variables...");
|
||||
config_builder = config_builder.add_source(
|
||||
Environment::with_prefix("findpenguins_feed")
|
||||
.separator("__")
|
||||
.list_separator(",")
|
||||
.with_list_parse_key("feeds")
|
||||
.try_parsing(true),
|
||||
);
|
||||
|
||||
Ok(config_builder.build()?.try_deserialize()?)
|
||||
}
|
||||
}
|
74
src/feeds/mod.rs
Normal file
74
src/feeds/mod.rs
Normal file
|
@ -0,0 +1,74 @@
|
|||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
|
||||
use futures::future::join_all;
|
||||
use rss::{GuidBuilder, Item, ItemBuilder};
|
||||
use serde::Deserialize;
|
||||
use time::format_description::well_known::iso8601::FormattedComponents;
|
||||
use time::format_description::well_known::{iso8601, Iso8601};
|
||||
use time::Date;
|
||||
|
||||
use crate::scrapers::page_url;
|
||||
use crate::{hash, scrapers};
|
||||
|
||||
pub mod route;
|
||||
pub mod template;
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct Feed {
|
||||
pub title: String,
|
||||
pub url: String,
|
||||
pub id: String,
|
||||
}
|
||||
|
||||
impl fmt::Display for Feed {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{} ({})", self.title, self.url)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
pub struct Footprint {
|
||||
pub title: String,
|
||||
pub text: String,
|
||||
pub url: String,
|
||||
pub date: Date,
|
||||
pub page: u8,
|
||||
}
|
||||
|
||||
const ISO8601_DATE: u128 = iso8601::Config::DEFAULT
|
||||
.set_formatted_components(FormattedComponents::Date)
|
||||
.encode();
|
||||
|
||||
impl Footprint {
|
||||
pub fn into_rss_item(self, root_url: &str) -> Item {
|
||||
let desc = format!(
|
||||
"{} <br /><br /> --- <br /><br /> from {}",
|
||||
self.text,
|
||||
page_url(root_url, self.page)
|
||||
);
|
||||
ItemBuilder::default()
|
||||
.title(Some(self.title))
|
||||
.pub_date(self.date.format(&Iso8601::<ISO8601_DATE>).ok())
|
||||
.link(Some(self.url.clone()))
|
||||
.description(Some(desc))
|
||||
.guid(Some(GuidBuilder::default().value(self.url).build()))
|
||||
.build()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn enrich_feeds(feed_urls: Vec<String>) -> HashMap<String, Feed> {
|
||||
let feeds = join_all(feed_urls.iter().map(|feed_url| async {
|
||||
let title = scrapers::feed_title(feed_url).await.unwrap();
|
||||
let id = hash::fnv_str(feed_url);
|
||||
|
||||
Feed {
|
||||
title,
|
||||
url: feed_url.clone(),
|
||||
id,
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
|
||||
feeds.iter().map(|x| (x.id.clone(), x.clone())).collect()
|
||||
}
|
46
src/feeds/route.rs
Normal file
46
src/feeds/route.rs
Normal file
|
@ -0,0 +1,46 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::Path;
|
||||
use axum::http::StatusCode;
|
||||
use axum::{extract::State, response::IntoResponse};
|
||||
use rss::{ChannelBuilder, Item};
|
||||
|
||||
use crate::scrapers::fetch_footprints;
|
||||
use crate::{app::AppState, templates::HtmlTemplate};
|
||||
|
||||
use super::template::FeedsTemplate;
|
||||
|
||||
pub async fn feeds(State(state): State<Arc<AppState>>) -> impl IntoResponse {
|
||||
let template = FeedsTemplate {
|
||||
feeds: state.feeds.values().cloned().collect(),
|
||||
};
|
||||
HtmlTemplate(template)
|
||||
}
|
||||
|
||||
pub async fn feed(
|
||||
State(state): State<Arc<AppState>>,
|
||||
Path(feed_id): Path<String>,
|
||||
) -> Result<impl IntoResponse, StatusCode> {
|
||||
let feed = state.feeds.get(&feed_id).ok_or(StatusCode::NOT_FOUND)?;
|
||||
|
||||
let footprints = fetch_footprints(&feed.url)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let footprints: Vec<Item> = footprints
|
||||
.iter()
|
||||
.map(|x| x.clone().into_rss_item(&feed.url))
|
||||
.collect();
|
||||
|
||||
let channel = ChannelBuilder::default()
|
||||
.title(&feed.title)
|
||||
.link(&feed.url)
|
||||
.description(format!("FindPenguins: {}", &feed.title))
|
||||
.items(footprints)
|
||||
.build();
|
||||
|
||||
Ok((
|
||||
StatusCode::OK,
|
||||
[("Content-Type", "application/rss+xml")],
|
||||
channel.to_string(),
|
||||
))
|
||||
}
|
9
src/feeds/template.rs
Normal file
9
src/feeds/template.rs
Normal file
|
@ -0,0 +1,9 @@
|
|||
use askama::Template;
|
||||
|
||||
use super::Feed;
|
||||
|
||||
#[derive(Template)]
|
||||
#[template(path = "feeds.html")]
|
||||
pub struct FeedsTemplate {
|
||||
pub feeds: Vec<Feed>,
|
||||
}
|
12
src/hash.rs
Normal file
12
src/hash.rs
Normal file
|
@ -0,0 +1,12 @@
|
|||
use std::hash::Hasher;
|
||||
|
||||
use fnv::FnvHasher;
|
||||
|
||||
pub fn fnv_str(input: &str) -> String {
|
||||
let mut hasher = FnvHasher::default();
|
||||
hasher.write(input.as_bytes());
|
||||
let hash = hasher.finish();
|
||||
let short_hash = format!("{:x}", hash);
|
||||
|
||||
short_hash
|
||||
}
|
14
src/logging.rs
Normal file
14
src/logging.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
use tracing::debug;
|
||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
|
||||
pub(crate) fn setup(bin_name: &str) {
|
||||
let default_config = format!("{}=debug,tower_http=debug,axum::rejection=trace", bin_name);
|
||||
tracing_subscriber::registry()
|
||||
.with(
|
||||
tracing_subscriber::EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| default_config.into()),
|
||||
)
|
||||
.with(tracing_subscriber::fmt::layer().with_target(true))
|
||||
.init();
|
||||
debug!("tracing/logging is setup");
|
||||
}
|
45
src/main.rs
Normal file
45
src/main.rs
Normal file
|
@ -0,0 +1,45 @@
|
|||
use std::net::SocketAddr;
|
||||
|
||||
use crate::{config::PenguinConfig, feeds::enrich_feeds};
|
||||
|
||||
mod app;
|
||||
mod config;
|
||||
mod feeds;
|
||||
mod hash;
|
||||
mod logging;
|
||||
mod routes;
|
||||
mod scrapers;
|
||||
mod signals;
|
||||
mod templates;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
logging::setup("findpenguins_feed");
|
||||
let config = PenguinConfig::read().expect("Failed to read application configuration");
|
||||
let addr = get_addr(&config);
|
||||
let feeds = enrich_feeds(config.feeds).await;
|
||||
let app = app::create(feeds);
|
||||
|
||||
tracing::debug!("Listening on {}", addr);
|
||||
axum::Server::bind(&addr)
|
||||
.serve(app.into_make_service())
|
||||
.with_graceful_shutdown(signals::shutdown())
|
||||
.await
|
||||
.expect("Failed to start server");
|
||||
}
|
||||
|
||||
/// Retrieve the address to listen on.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A `SocketAddr` object that represents the address to listen on.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the `host` string cannot be parsed into a `SocketAddr` object.
|
||||
fn get_addr(config: &PenguinConfig) -> SocketAddr {
|
||||
config
|
||||
.server_address
|
||||
.parse()
|
||||
.expect("No proper address to listen on: {}")
|
||||
}
|
11
src/routes.rs
Normal file
11
src/routes.rs
Normal file
|
@ -0,0 +1,11 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::{body::Body, routing::get, Router};
|
||||
|
||||
use crate::{app::AppState, feeds};
|
||||
|
||||
pub fn all() -> Router<Arc<AppState>, Body> {
|
||||
Router::new()
|
||||
.route("/", get(feeds::route::feeds))
|
||||
.route("/feeds/:id", get(feeds::route::feed))
|
||||
}
|
105
src/scrapers.rs
Normal file
105
src/scrapers.rs
Normal file
|
@ -0,0 +1,105 @@
|
|||
use crate::feeds::Footprint;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use thiserror::Error;
|
||||
use time::{format_description, Date};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum ScrapeError {
|
||||
#[error("An error occurred fetching a document.")]
|
||||
FetchError(#[from] reqwest::Error),
|
||||
#[error("An error occurred constructing a selector.")]
|
||||
SelectorError(#[from] scraper::error::SelectorErrorKind<'static>),
|
||||
}
|
||||
|
||||
pub async fn feed_title(feed_url: &str) -> Result<String, ScrapeError> {
|
||||
let resp = reqwest::get(feed_url).await?.text().await?;
|
||||
let doc = Html::parse_document(&resp);
|
||||
let title_selector = Selector::parse("span.placeholder")?;
|
||||
let title = doc
|
||||
.select(&title_selector)
|
||||
.find_map(|x| x.text().next())
|
||||
.unwrap_or("no title");
|
||||
|
||||
Ok(title.to_string())
|
||||
}
|
||||
|
||||
pub async fn fetch_footprints(feed_url: &str) -> Result<Vec<Footprint>, ScrapeError> {
|
||||
let footprint_selector = Selector::parse("li.footprint div.footprint-container")?;
|
||||
let footprint_title_selector = Selector::parse("div.title > h2.headline > a")?;
|
||||
let footprint_date_selector = Selector::parse("div.title > span.date > span.desc")?;
|
||||
let more_selector = Selector::parse("a#footprintListLoadMore")?;
|
||||
let text_selector = Selector::parse("div.text > p")?;
|
||||
let text_rest_selector = Selector::parse("div.text > p > span.rest")?;
|
||||
|
||||
let mut footprints: Vec<Footprint> = Vec::new();
|
||||
let mut has_more_pages = true;
|
||||
let mut page = 1u8;
|
||||
while has_more_pages {
|
||||
let feed_url = page_url(feed_url, page);
|
||||
let resp = reqwest::get(feed_url).await?.text().await?;
|
||||
let doc = Html::parse_document(&resp);
|
||||
|
||||
footprints.extend(doc.select(&footprint_selector).flat_map(|x| {
|
||||
parse_footprint(
|
||||
&x,
|
||||
&footprint_title_selector,
|
||||
&footprint_date_selector,
|
||||
&text_selector,
|
||||
&text_rest_selector,
|
||||
page,
|
||||
)
|
||||
}));
|
||||
|
||||
has_more_pages = doc.select(&more_selector).next().is_some();
|
||||
page += 1;
|
||||
}
|
||||
|
||||
Ok(footprints)
|
||||
}
|
||||
|
||||
fn parse_footprint(
|
||||
footprint_el: &ElementRef,
|
||||
footprint_title_selector: &Selector,
|
||||
footprint_date_selector: &Selector,
|
||||
text_selector: &Selector,
|
||||
text_rest_selector: &Selector,
|
||||
page: u8,
|
||||
) -> Option<Footprint> {
|
||||
let title_el = footprint_el.select(footprint_title_selector).next()?;
|
||||
let title = title_el.text().next()?.to_string();
|
||||
let url = title_el.value().attr("href")?.to_string();
|
||||
let date = footprint_el
|
||||
.select(footprint_date_selector)
|
||||
.next()?
|
||||
.value()
|
||||
.attr("content")?;
|
||||
let format = format_description::parse("[year]-[month]-[day]").ok()?;
|
||||
let date = Date::parse(date, &format).ok()?;
|
||||
let text = footprint_el
|
||||
.select(text_selector)
|
||||
.next()?
|
||||
.text()
|
||||
.next()?
|
||||
.to_string();
|
||||
|
||||
let text = if let Some(text_rest) = footprint_el.select(text_rest_selector).next() {
|
||||
format!("{}{}", text, text_rest.text().next()?)
|
||||
} else {
|
||||
text
|
||||
}
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
Some(Footprint {
|
||||
title,
|
||||
url,
|
||||
date,
|
||||
text,
|
||||
page,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn page_url(feed_url: &str, page: u8) -> String {
|
||||
let connector = if feed_url.contains('?') { "&" } else { "?" };
|
||||
format!("{}{}page={}&sort=ASC", feed_url, connector, page)
|
||||
}
|
32
src/signals.rs
Normal file
32
src/signals.rs
Normal file
|
@ -0,0 +1,32 @@
|
|||
use tokio::signal;
|
||||
use tracing::debug;
|
||||
|
||||
///
|
||||
/// Handle SIGTERM and SIGINT on our own. This is needed for the process to behave properly when
|
||||
/// running as PID 1 (as is the case as the sole thing in a container).
|
||||
///
|
||||
/// Includes configuration for non-unix systems but that is untested as well as not expected to be
|
||||
/// used.
|
||||
pub(crate) async fn shutdown() {
|
||||
let ctrl_c = async {
|
||||
signal::ctrl_c()
|
||||
.await
|
||||
.expect("failed to install Ctrl+C handler");
|
||||
};
|
||||
|
||||
#[cfg(unix)]
|
||||
let terminate = async {
|
||||
signal::unix::signal(signal::unix::SignalKind::terminate())
|
||||
.expect("failed to install signal handler")
|
||||
.recv()
|
||||
.await;
|
||||
};
|
||||
#[cfg(not(unix))]
|
||||
let terminate = std::future::pending::<()>();
|
||||
|
||||
tokio::select! {
|
||||
_ = ctrl_c => {},
|
||||
_ = terminate => {},
|
||||
}
|
||||
debug!("signal received, shutting down...");
|
||||
}
|
23
src/templates.rs
Normal file
23
src/templates.rs
Normal file
|
@ -0,0 +1,23 @@
|
|||
use askama::Template;
|
||||
use axum::{
|
||||
http::StatusCode,
|
||||
response::{Html, IntoResponse, Response},
|
||||
};
|
||||
|
||||
pub struct HtmlTemplate<T>(pub T);
|
||||
|
||||
impl<T> IntoResponse for HtmlTemplate<T>
|
||||
where
|
||||
T: Template,
|
||||
{
|
||||
fn into_response(self) -> Response {
|
||||
match self.0.render() {
|
||||
Ok(html) => Html(html).into_response(),
|
||||
Err(err) => (
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to render template. Error: {}", err),
|
||||
)
|
||||
.into_response(),
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue