From 55d3364b0e503fe014511e6e1433a8bea0c814f1 Mon Sep 17 00:00:00 2001 From: Sebastian Hugentobler Date: Wed, 26 Jun 2024 13:53:00 +0200 Subject: [PATCH] implement html & opds search --- Cargo.lock | 2 +- README.md | 8 ++- calibre-db/src/calibre.rs | 16 ++++- calibre-db/src/lib.rs | 2 + calibre-db/src/search.rs | 71 +++++++++++++++++++ little-hesinde/Cargo.toml | 2 +- little-hesinde/src/handlers/html/recent.rs | 2 +- little-hesinde/src/handlers/html/search.rs | 20 ++++++ little-hesinde/src/handlers/opds/books.rs | 4 ++ little-hesinde/src/handlers/opds/search.rs | 12 ++++ .../src/handlers/opds/search_info.rs | 27 +++++++ little-hesinde/src/handlers/search.rs | 38 ++++++++++ little-hesinde/src/lib.rs | 10 ++- little-hesinde/src/opds/feed.rs | 7 ++ little-hesinde/src/opds/media_type.rs | 2 + little-hesinde/src/opds/relation.rs | 3 + little-hesinde/src/opds/search.rs | 65 +++++++++++++++++ little-hesinde/static/style.css | 4 ++ little-hesinde/templates/base.html | 15 ++++ 19 files changed, 302 insertions(+), 8 deletions(-) create mode 100644 calibre-db/src/search.rs create mode 100644 little-hesinde/src/handlers/html/search.rs create mode 100644 little-hesinde/src/handlers/opds/search.rs create mode 100644 little-hesinde/src/handlers/opds/search_info.rs create mode 100644 little-hesinde/src/handlers/search.rs create mode 100644 little-hesinde/src/opds/search.rs diff --git a/Cargo.lock b/Cargo.lock index 59e23ff..18f80c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -889,7 +889,7 @@ dependencies = [ [[package]] name = "little-hesinde" -version = "0.1.5" +version = "0.2.0" dependencies = [ "calibre-db", "clap", diff --git a/README.md b/README.md index 73b837b..26ba9f5 100644 --- a/README.md +++ b/README.md @@ -59,9 +59,13 @@ http://localhost:3000/opds is the entry point for the OPDS feed. Not planned, put a reverse proxy in front of it that handles access. -## No search? +## How do I search? -On my todo list once I feel like I need it. +Putting in your search text and you are done. Searching is done on title, tags, +author, series title, identifiers and comments. + +For more sophisticated queries take a look at the +[fts5 documentation](https://www.sqlite.org/fts5.html#full_text_query_syntax). ## Why are the OPDS entries not paginated? diff --git a/calibre-db/src/calibre.rs b/calibre-db/src/calibre.rs index fc6bc53..94bfd11 100644 --- a/calibre-db/src/calibre.rs +++ b/calibre-db/src/calibre.rs @@ -5,8 +5,11 @@ use std::path::Path; use r2d2::Pool; use r2d2_sqlite::SqliteConnectionManager; -use crate::data::{ - author::Author, book::Book, error::DataStoreError, pagination::SortOrder, series::Series, +use crate::{ + data::{ + author::Author, book::Book, error::DataStoreError, pagination::SortOrder, series::Series, + }, + search::{self, search}, }; /// Top level calibre functions, bundling all sub functions in one place and providing secure access to @@ -24,9 +27,18 @@ impl Calibre { let manager = SqliteConnectionManager::file(path); let pool = r2d2::Pool::new(manager)?; + search::attach(&pool)?; + Ok(Self { pool }) } + /// Full text search with a query. + /// + /// See https://www.sqlite.org/fts5.html#full_text_query_syntax for syntax. + pub fn search(&self, query: &str) -> Result, DataStoreError> { + search(query, &self.pool) + } + /// Fetch book data from calibre, starting at `cursor`, fetching up to an amount of `limit` and /// ordering by `sort_order`. pub fn books( diff --git a/calibre-db/src/lib.rs b/calibre-db/src/lib.rs index bae8fbc..6b93b72 100644 --- a/calibre-db/src/lib.rs +++ b/calibre-db/src/lib.rs @@ -1,6 +1,8 @@ //! Read data from a calibre library, leveraging its SQLite metadata database. pub mod calibre; +pub mod search; + /// Data structs for the calibre database. pub mod data { pub mod author; diff --git a/calibre-db/src/search.rs b/calibre-db/src/search.rs new file mode 100644 index 0000000..bcfb68c --- /dev/null +++ b/calibre-db/src/search.rs @@ -0,0 +1,71 @@ +//! Provide search funcitonality for calibre. +//! +//! Because the calibre database can not be disturbed (it is treated as read-only) +//! it attaches an in-memory database and inserts the relevant data into a +//! virtual table leveraging fts5 (https://www.sqlite.org/fts5.html). Full-text search is run on +//! that virtual table. + +use r2d2::{Pool, PooledConnection}; +use r2d2_sqlite::SqliteConnectionManager; +use rusqlite::named_params; + +use crate::data::{book::Book, error::DataStoreError}; + +/// A lot of joins but only run once at startup. +const SEARCH_INIT_QUERY: &str = "INSERT INTO search.fts(book_id, data) + SELECT b.id as book_id, + b.title || ' ' || + a.name || ' ' || + c.text || ' ' || + GROUP_CONCAT(DISTINCT t.name) || ' ' || + GROUP_CONCAT(DISTINCT i.val) || ' ' || + GROUP_CONCAT(DISTINCT s.name) as data + FROM main.books as b + JOIN main.books_authors_link AS b2a ON b.id = b2a.book + JOIN main.authors AS a ON b2a.author = a.id + JOIN main.comments AS c ON c.book = b.id + JOIN main.books_tags_link AS b2t ON b.id = b2t.book + JOIN main.tags AS t ON b2t.tag = t.id + JOIN main.identifiers AS i ON i.book = b.id + JOIN main.books_series_link AS b2s ON b.id = b2s.book + JOIN main.series AS s ON b2s.series = s.id"; + +/// Attach the fts in-memory database to the read-only calibre database. +pub(crate) fn attach(pool: &Pool) -> Result<(), DataStoreError> { + let conn = pool.get()?; + + conn.execute("ATTACH DATABASE ':memory:' AS search", [])?; + init(&conn)?; + + Ok(()) +} + +/// Initialise the fts virtual table. +fn init(conn: &PooledConnection) -> Result<(), DataStoreError> { + conn.execute( + "CREATE VIRTUAL TABLE search.fts USING fts5(book_id, data)", + [], + )?; + conn.execute(SEARCH_INIT_QUERY, [])?; + + Ok(()) +} + +/// Run a full-text search with the parameter `query`. +pub(crate) fn search( + query: &str, + pool: &Pool, +) -> Result, DataStoreError> { + let conn = pool.get()?; + + let mut stmt = + conn.prepare("SELECT book_id FROM search.fts WHERE data MATCH (:query) ORDER BY rank")?; + let params = named_params! { ":query": query }; + let books = stmt + .query_map(params, |r| -> Result { r.get(0) })? + .filter_map(Result::ok) + .filter_map(|id| Book::scalar_book(&conn, id).ok()) + .collect(); + + Ok(books) +} diff --git a/little-hesinde/Cargo.toml b/little-hesinde/Cargo.toml index 80c72c8..71589d7 100644 --- a/little-hesinde/Cargo.toml +++ b/little-hesinde/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "little-hesinde" -version = "0.1.5" +version = "0.2.0" edition = "2021" license = { workspace = true } authors = { workspace = true } diff --git a/little-hesinde/src/handlers/html/recent.rs b/little-hesinde/src/handlers/html/recent.rs index 05c142f..d9ab06f 100644 --- a/little-hesinde/src/handlers/html/recent.rs +++ b/little-hesinde/src/handlers/html/recent.rs @@ -8,7 +8,7 @@ use crate::{data::book::Book, templates::TEMPLATES}; /// Render recent books as html. pub async fn handler(recent_books: Vec) -> Result { let mut context = Context::new(); - context.insert("title", "Recent Books"); + context.insert("title", ""); context.insert("nav", "recent"); context.insert("books", &recent_books); diff --git a/little-hesinde/src/handlers/html/search.rs b/little-hesinde/src/handlers/html/search.rs new file mode 100644 index 0000000..fc1b547 --- /dev/null +++ b/little-hesinde/src/handlers/html/search.rs @@ -0,0 +1,20 @@ +//! Handle search results in html. + +use poem::{error::InternalServerError, web::Html, IntoResponse, Response}; +use tera::Context; + +use crate::{data::book::Book, templates::TEMPLATES}; + +/// Render all search results as html. +pub async fn handler(books: Vec) -> Result { + let mut context = Context::new(); + context.insert("title", "Search Results"); + context.insert("nav", "search"); + context.insert("books", &books); + + Ok(TEMPLATES + .render("book_list", &context) + .map_err(InternalServerError) + .map(Html)? + .into_response()) +} diff --git a/little-hesinde/src/handlers/opds/books.rs b/little-hesinde/src/handlers/opds/books.rs index 22de7dd..90f2a60 100644 --- a/little-hesinde/src/handlers/opds/books.rs +++ b/little-hesinde/src/handlers/opds/books.rs @@ -23,7 +23,11 @@ pub async fn handler( .books(u32::MAX.into(), None, &SortOrder::ASC) .map(|x| x.iter().filter_map(|y| Book::full_book(y, state)).collect()) .map_err(HandlerError::DataError)?; + render_books(books).await +} +/// Render a list of books as OPDS entries in a feed. +pub(crate) async fn render_books(books: Vec) -> Result { let entries: Vec = books.into_iter().map(Entry::from).collect(); let now = OffsetDateTime::now_utc(); diff --git a/little-hesinde/src/handlers/opds/search.rs b/little-hesinde/src/handlers/opds/search.rs new file mode 100644 index 0000000..efbf3f0 --- /dev/null +++ b/little-hesinde/src/handlers/opds/search.rs @@ -0,0 +1,12 @@ +//! Handle search results in opds. + +use poem::Response; + +use crate::data::book::Book; + +use super::books::render_books; + +/// Render search results as OPDS entries in a feed. +pub async fn handler(books: Vec) -> Result { + render_books(books).await +} diff --git a/little-hesinde/src/handlers/opds/search_info.rs b/little-hesinde/src/handlers/opds/search_info.rs new file mode 100644 index 0000000..92cee0d --- /dev/null +++ b/little-hesinde/src/handlers/opds/search_info.rs @@ -0,0 +1,27 @@ +//! Handle open search description.. + +use crate::{ + handlers::error::HandlerError, + opds::search::{OpenSearchDescription, Url}, + APP_NAME, +}; +use poem::{handler, IntoResponse, Response}; + +/// Render search information as open search description. +#[handler] +pub async fn handler() -> Result { + let search = OpenSearchDescription { + short_name: APP_NAME.to_string(), + description: "Search for ebooks".to_string(), + input_encoding: "UTF-8".to_string(), + output_encoding: "UTF-8".to_string(), + url: Url { + type_name: "application/atom+xml".to_string(), + template: "/opds/search?query={searchTerms}".to_string(), + }, + }; + let xml = search.as_xml().map_err(HandlerError::OpdsError)?; + Ok(xml + .with_content_type("application/atom+xml") + .into_response()) +} diff --git a/little-hesinde/src/handlers/search.rs b/little-hesinde/src/handlers/search.rs new file mode 100644 index 0000000..416043d --- /dev/null +++ b/little-hesinde/src/handlers/search.rs @@ -0,0 +1,38 @@ +//! Handle search requests. + +use std::sync::Arc; + +use poem::{ + handler, + web::{Data, Query}, + Response, +}; +use serde::Deserialize; + +use crate::{app_state::AppState, data::book::Book, handlers::error::HandlerError, Accept}; + +#[derive(Deserialize)] +struct Params { + /// Query for a search request. + query: String, +} +/// Handle a search request with query parameter `query`. +#[handler] +pub async fn handler( + accept: Data<&Accept>, + state: Data<&Arc>, + Query(params): Query, +) -> Result { + let books = state + .calibre + .search(¶ms.query) + .map_err(HandlerError::DataError)? + .iter() + .filter_map(|book| Book::full_book(book, *state)) + .collect(); + + match *accept { + Accept::Html => crate::handlers::html::search::handler(books).await, + Accept::Opds => crate::handlers::opds::search::handler(books).await, + } +} diff --git a/little-hesinde/src/lib.rs b/little-hesinde/src/lib.rs index 87a7fe4..8cf2d2f 100644 --- a/little-hesinde/src/lib.rs +++ b/little-hesinde/src/lib.rs @@ -32,6 +32,7 @@ pub mod handlers { pub mod authors; pub mod books; pub mod recent; + pub mod search; pub mod series; pub mod series_single; } @@ -42,6 +43,8 @@ pub mod handlers { pub mod books; pub mod feed; pub mod recent; + pub mod search; + pub mod search_info; pub mod series; pub mod series_single; } @@ -53,6 +56,7 @@ pub mod handlers { pub mod error; pub mod paginated; pub mod recent; + pub mod search; pub mod series; pub mod series_single; pub mod source_archive; @@ -67,11 +71,12 @@ pub mod opds { pub mod link; pub mod media_type; pub mod relation; + pub mod search; } pub mod templates; pub const APP_NAME: &str = "little-hesinde"; -pub const VERSION: &str = "0.1.5"; +pub const VERSION: &str = "0.2.0"; /// Internal marker data in lieu of a proper `Accept` header. #[derive(Debug, Clone, Copy)] @@ -114,6 +119,7 @@ pub async fn run(config: Config) -> Result<(), std::io::Error> { .at("/cover/:id", get(handlers::cover::handler)) .at("/book/:id/:format", get(handlers::books::handler_download)) .at("/archive", get(handlers::source_archive::handler)) + .at("/search", get(handlers::search::handler)) .nest("/static", EmbeddedFilesEndpoint::::new()) .data(Accept::Html); @@ -125,6 +131,8 @@ pub async fn run(config: Config) -> Result<(), std::io::Error> { .at("/authors/:id", get(handlers::author::handler)) .at("/series", get(handlers::series::handler_init)) .at("/series/:id", get(handlers::series_single::handler)) + .at("/search/info", get(handlers::opds::search_info::handler)) + .at("/search", get(handlers::search::handler)) .data(Accept::Opds); let app = Route::new() diff --git a/little-hesinde/src/opds/feed.rs b/little-hesinde/src/opds/feed.rs index e6ce331..02f8ecf 100644 --- a/little-hesinde/src/opds/feed.rs +++ b/little-hesinde/src/opds/feed.rs @@ -61,6 +61,13 @@ impl Feed { title: Some("Home".to_string()), count: None, }, + Link { + href: "/opds/search/info".to_string(), + media_type: MediaType::Search, + rel: Relation::Search, + title: Some("Search".to_string()), + count: None, + }, self_link, ]; links.append(&mut additional_links); diff --git a/little-hesinde/src/opds/media_type.rs b/little-hesinde/src/opds/media_type.rs index 7a98869..dc2632c 100644 --- a/little-hesinde/src/opds/media_type.rs +++ b/little-hesinde/src/opds/media_type.rs @@ -16,6 +16,7 @@ pub enum MediaType { Navigation, Pdf, Text, + Search, } /// Convert `epub` and `pdf` formats to their respective media type. Everything else is `Text`. @@ -46,6 +47,7 @@ impl std::fmt::Display for MediaType { ), MediaType::Pdf => write!(f, "application/pdf"), MediaType::Text => write!(f, "text"), + MediaType::Search => write!(f, "application/opensearchdescription+xml"), } } } diff --git a/little-hesinde/src/opds/relation.rs b/little-hesinde/src/opds/relation.rs index 8a9a9d3..f1ee94c 100644 --- a/little-hesinde/src/opds/relation.rs +++ b/little-hesinde/src/opds/relation.rs @@ -14,6 +14,7 @@ pub enum Relation { Subsection, Thumbnail, Acquisition, + Search, } /// Convert a media type int a relation. @@ -29,6 +30,7 @@ impl From for Relation { MediaType::Navigation => Relation::Myself, MediaType::Pdf => Relation::Acquisition, MediaType::Text => Relation::Acquisition, + MediaType::Search => Relation::Search, } } } @@ -43,6 +45,7 @@ impl std::fmt::Display for Relation { Relation::Subsection => write!(f, "subsection"), Relation::Thumbnail => write!(f, "http://opds-spec.org/image/thumbnail"), Relation::Acquisition => write!(f, "http://opds-spec.org/acquisition"), + Relation::Search => write!(f, "application/opensearchdescription+xml"), } } } diff --git a/little-hesinde/src/opds/search.rs b/little-hesinde/src/opds/search.rs new file mode 100644 index 0000000..42b1cb0 --- /dev/null +++ b/little-hesinde/src/opds/search.rs @@ -0,0 +1,65 @@ +//! Search data. + +use std::io::Cursor; + +use quick_xml::{ + events::{BytesDecl, BytesStart, Event}, + se::to_string, + Reader, Writer, +}; +use serde::Serialize; + +use super::error::OpdsError; + +/// Url pointing to a location. +#[derive(Debug, Serialize)] +pub struct Url { + #[serde(rename = "@type")] + pub type_name: String, + #[serde(rename = "@template")] + pub template: String, +} + +/// Search information. +#[derive(Debug, Serialize)] +pub struct OpenSearchDescription { + #[serde(rename = "ShortName")] + pub short_name: String, + #[serde(rename = "Description")] + pub description: String, + #[serde(rename = "InputEncoding")] + pub input_encoding: String, + #[serde(rename = "OutputEncoding")] + pub output_encoding: String, + #[serde(rename = "Url")] + pub url: Url, +} + +impl OpenSearchDescription { + /// Serialize search information to an open search description xml. + pub fn as_xml(&self) -> Result { + let xml = to_string(&self)?; + let mut reader = Reader::from_str(&xml); + reader.config_mut().trim_text(true); + + let declaration = BytesDecl::new("1.0", Some("UTF-8"), None); + let mut writer = Writer::new(Cursor::new(Vec::new())); + writer.write_event(Event::Decl(declaration))?; + + let mut search_start = BytesStart::new("OpenSearchDescription"); + search_start.push_attribute(("xmlns", "http://a9.com/-/spec/opensearch/1.1/")); + + loop { + match reader.read_event() { + Ok(Event::Start(e)) if e.name().as_ref() == b"feed" => { + writer.write_event(Event::Start(search_start.clone()))? + } + Ok(Event::Eof) => break, + Ok(e) => writer.write_event(e)?, + Err(e) => return Err(e)?, + } + } + let result = writer.into_inner().into_inner(); + Ok(String::from_utf8(result)?) + } +} diff --git a/little-hesinde/static/style.css b/little-hesinde/static/style.css index bcb9324..1ae602b 100644 --- a/little-hesinde/static/style.css +++ b/little-hesinde/static/style.css @@ -15,6 +15,10 @@ nav ul li { padding-bottom: 0.25rem; } +.nav-input { + margin-bottom: 0; +} + .nav-active { border-bottom: solid var(--pico-primary-underline); } diff --git a/little-hesinde/templates/base.html b/little-hesinde/templates/base.html index 105d379..e854e15 100644 --- a/little-hesinde/templates/base.html +++ b/little-hesinde/templates/base.html @@ -11,6 +11,21 @@