more documentation

This commit is contained in:
Sebastian Hugentobler 2025-07-01 21:15:09 +02:00
parent 6a5b309391
commit a62b0aacf5
Signed by: shu
SSH key fingerprint: SHA256:ppcx6MlixdNZd5EUM1nkHOKoyQYoJwzuQKXM6J/t66M
8 changed files with 26 additions and 1 deletions

View file

@ -24,6 +24,7 @@ pub mod query;
pub mod routes; pub mod routes;
pub mod state; pub mod state;
/// OpenAPI tag for all endpoints.
const TAG: &str = "little-librarian"; const TAG: &str = "little-librarian";
/// OpenAPI documentation configuration. /// OpenAPI documentation configuration.

View file

@ -8,8 +8,10 @@ use uuid::Uuid;
use crate::storage; use crate::storage;
use crate::storage::jobs::AddError; use crate::storage::jobs::AddError;
/// Manage background job execution and storage.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct JobManager { pub struct JobManager {
/// Database connection for job storage.
db: Arc<storage::jobs::Jobs>, db: Arc<storage::jobs::Jobs>,
} }
@ -21,10 +23,12 @@ pub enum ExecuteError {
} }
impl JobManager { impl JobManager {
/// Create a new job manager with database connection.
pub fn new(db: Arc<storage::jobs::Jobs>) -> Self { pub fn new(db: Arc<storage::jobs::Jobs>) -> Self {
Self { db } Self { db }
} }
/// Execute a task in the background and return job ID.
pub async fn execute<F, Fut, T, E>(&self, task: F) -> Result<Uuid, ExecuteError> pub async fn execute<F, Fut, T, E>(&self, task: F) -> Result<Uuid, ExecuteError>
where where
F: FnOnce() -> Fut + Send + 'static, F: FnOnce() -> Fut + Send + 'static,
@ -49,6 +53,7 @@ impl JobManager {
} }
} }
/// Mark job as successful and store result.
async fn success<T>(job_id: &Uuid, result: &T, db: &storage::jobs::Jobs) async fn success<T>(job_id: &Uuid, result: &T, db: &storage::jobs::Jobs)
where where
T: serde::Serialize + Send + Sync + 'static, T: serde::Serialize + Send + Sync + 'static,
@ -66,6 +71,7 @@ where
} }
} }
/// Mark job as failed and log error.
async fn failure<E>(job_id: &Uuid, error: E, db: &storage::jobs::Jobs) async fn failure<E>(job_id: &Uuid, error: E, db: &storage::jobs::Jobs)
where where
E: snafu::Error + Send + Sync + 'static, E: snafu::Error + Send + Sync + 'static,

View file

@ -18,6 +18,7 @@ use crate::{
storage::{self, queries::DocumentMatch}, storage::{self, queries::DocumentMatch},
}; };
/// Maximum allowed limit for query results.
const MAX_LIMIT: usize = 10; const MAX_LIMIT: usize = 10;
/// Errors that occur during query processing. /// Errors that occur during query processing.
@ -83,7 +84,7 @@ pub struct QueryStartResponse {
pub id: String, pub id: String,
} }
/// Execute a semantic search query against the document database. /// Execute a semantic search query against the document database and return the job id.
#[utoipa::path( #[utoipa::path(
post, post,
path = "/query", path = "/query",
@ -151,6 +152,7 @@ impl HttpStatus for RetrieveError {
http_error!(RetrieveError); http_error!(RetrieveError);
/// Get results for a completed query job.
#[utoipa::path( #[utoipa::path(
get, get,
path = "/query/{id}", path = "/query/{id}",

View file

@ -6,8 +6,10 @@ use zip::result::ZipError;
use super::extractor::{EpubSnafu, ExtractionError, Extractor, extension}; use super::extractor::{EpubSnafu, ExtractionError, Extractor, extension};
/// EPUB text extractor.
pub struct Epub; pub struct Epub;
/// Errors that occur during EPUB text extraction.
#[derive(Debug, Snafu)] #[derive(Debug, Snafu)]
pub enum EpubExtractionError { pub enum EpubExtractionError {
#[snafu(display("Failed to open epub as zip archive."))] #[snafu(display("Failed to open epub as zip archive."))]

View file

@ -7,6 +7,7 @@ use super::{
pdf::{Pdf, PdfExtractionError}, pdf::{Pdf, PdfExtractionError},
}; };
/// Errors that occur during text extraction from documents.
#[derive(Debug, Snafu)] #[derive(Debug, Snafu)]
pub enum ExtractionError { pub enum ExtractionError {
#[snafu(display("Failed to read input for extraction."))] #[snafu(display("Failed to read input for extraction."))]
@ -17,10 +18,13 @@ pub enum ExtractionError {
Epub { source: EpubExtractionError }, Epub { source: EpubExtractionError },
} }
/// Extract text content from document bytes.
pub trait Extractor { pub trait Extractor {
/// Extract text from document bytes.
fn extract(input: &[u8]) -> Result<String, ExtractionError>; fn extract(input: &[u8]) -> Result<String, ExtractionError>;
} }
/// Get file extension from path as lowercase string.
pub fn extension<P: AsRef<Path>>(path: P) -> String { pub fn extension<P: AsRef<Path>>(path: P) -> String {
let path = path.as_ref(); let path = path.as_ref();
path.extension() path.extension()
@ -29,6 +33,7 @@ pub fn extension<P: AsRef<Path>>(path: P) -> String {
.into_owned() .into_owned()
} }
/// Extract text from input stream based on file extension.
pub fn extract<R: Read>(mut input: R, extension: &str) -> Result<Option<String>, ExtractionError> { pub fn extract<R: Read>(mut input: R, extension: &str) -> Result<Option<String>, ExtractionError> {
let mut buffer = Vec::new(); let mut buffer = Vec::new();
input.read_to_end(&mut buffer).context(ReadSnafu)?; input.read_to_end(&mut buffer).context(ReadSnafu)?;

View file

@ -3,8 +3,10 @@ use snafu::{ResultExt, Snafu};
use super::extractor::{Extractor, PdfSnafu}; use super::extractor::{Extractor, PdfSnafu};
/// PDF text extractor.
pub struct Pdf; pub struct Pdf;
/// Extract text from all pages of a PDF document.
fn get_pdf_text(doc: &Document) -> Result<String, lopdf::Error> { fn get_pdf_text(doc: &Document) -> Result<String, lopdf::Error> {
let pages: Vec<u32> = doc.get_pages().keys().cloned().collect(); let pages: Vec<u32> = doc.get_pages().keys().cloned().collect();
let text = doc.extract_text(&pages)?; let text = doc.extract_text(&pages)?;
@ -12,6 +14,7 @@ fn get_pdf_text(doc: &Document) -> Result<String, lopdf::Error> {
Ok(text) Ok(text)
} }
/// Errors that occur during PDF text extraction.
#[derive(Debug, Snafu)] #[derive(Debug, Snafu)]
pub enum PdfExtractionError { pub enum PdfExtractionError {
#[snafu(display("Failed to open pdf."))] #[snafu(display("Failed to open pdf."))]

View file

@ -7,12 +7,14 @@ use uuid::Uuid;
use super::queries::DocumentMatch; use super::queries::DocumentMatch;
/// Database operations for background job management.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Jobs { pub struct Jobs {
/// Connection pool for database operations. /// Connection pool for database operations.
pool: Arc<PgPool>, pool: Arc<PgPool>,
} }
/// Status of a background job.
#[derive(Debug, Clone, Serialize, Deserialize, Type)] #[derive(Debug, Clone, Serialize, Deserialize, Type)]
#[sqlx(type_name = "job_status", rename_all = "lowercase")] #[sqlx(type_name = "job_status", rename_all = "lowercase")]
pub enum JobStatus { pub enum JobStatus {
@ -75,6 +77,7 @@ pub enum RetrieveError {
} }
impl Jobs { impl Jobs {
/// Create a new jobs handler with database pool.
pub fn new(pool: Arc<PgPool>) -> Self { pub fn new(pool: Arc<PgPool>) -> Self {
Self { pool } Self { pool }
} }
@ -114,6 +117,7 @@ impl Jobs {
Ok(()) Ok(())
} }
/// Retrieve job status and results by ID.
pub async fn retrieve( pub async fn retrieve(
&self, &self,
id: &Uuid, id: &Uuid,

View file

@ -7,6 +7,7 @@ use sqlx::{FromRow, PgPool};
use crate::{hash::SHA256_LENGTH, text_encoder::Embeddings}; use crate::{hash::SHA256_LENGTH, text_encoder::Embeddings};
/// Database operations for document queries and embeddings.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Queries { pub struct Queries {
/// Connection pool for database operations. /// Connection pool for database operations.
@ -67,6 +68,7 @@ pub struct DocumentMatch {
} }
impl Queries { impl Queries {
/// Create a new queries handler with database pool.
pub fn new(pool: Arc<PgPool>) -> Self { pub fn new(pool: Arc<PgPool>) -> Self {
Self { pool } Self { pool }
} }