diff --git a/src/api.rs b/src/api.rs index 47c7683..80d6202 100644 --- a/src/api.rs +++ b/src/api.rs @@ -24,6 +24,7 @@ pub mod query; pub mod routes; pub mod state; +/// OpenAPI tag for all endpoints. const TAG: &str = "little-librarian"; /// OpenAPI documentation configuration. diff --git a/src/api/jobs.rs b/src/api/jobs.rs index ae9bdb5..933fddc 100644 --- a/src/api/jobs.rs +++ b/src/api/jobs.rs @@ -8,8 +8,10 @@ use uuid::Uuid; use crate::storage; use crate::storage::jobs::AddError; +/// Manage background job execution and storage. #[derive(Clone, Debug)] pub struct JobManager { + /// Database connection for job storage. db: Arc, } @@ -21,10 +23,12 @@ pub enum ExecuteError { } impl JobManager { + /// Create a new job manager with database connection. pub fn new(db: Arc) -> Self { Self { db } } + /// Execute a task in the background and return job ID. pub async fn execute(&self, task: F) -> Result where F: FnOnce() -> Fut + Send + 'static, @@ -49,6 +53,7 @@ impl JobManager { } } +/// Mark job as successful and store result. async fn success(job_id: &Uuid, result: &T, db: &storage::jobs::Jobs) where T: serde::Serialize + Send + Sync + 'static, @@ -66,6 +71,7 @@ where } } +/// Mark job as failed and log error. async fn failure(job_id: &Uuid, error: E, db: &storage::jobs::Jobs) where E: snafu::Error + Send + Sync + 'static, diff --git a/src/api/query.rs b/src/api/query.rs index 2661a27..5e6c672 100644 --- a/src/api/query.rs +++ b/src/api/query.rs @@ -18,6 +18,7 @@ use crate::{ storage::{self, queries::DocumentMatch}, }; +/// Maximum allowed limit for query results. const MAX_LIMIT: usize = 10; /// Errors that occur during query processing. @@ -83,7 +84,7 @@ pub struct QueryStartResponse { pub id: String, } -/// Execute a semantic search query against the document database. +/// Execute a semantic search query against the document database and return the job id. #[utoipa::path( post, path = "/query", @@ -151,6 +152,7 @@ impl HttpStatus for RetrieveError { http_error!(RetrieveError); +/// Get results for a completed query job. #[utoipa::path( get, path = "/query/{id}", diff --git a/src/extractors/epub.rs b/src/extractors/epub.rs index 5e7061a..7235c0c 100644 --- a/src/extractors/epub.rs +++ b/src/extractors/epub.rs @@ -6,8 +6,10 @@ use zip::result::ZipError; use super::extractor::{EpubSnafu, ExtractionError, Extractor, extension}; +/// EPUB text extractor. pub struct Epub; +/// Errors that occur during EPUB text extraction. #[derive(Debug, Snafu)] pub enum EpubExtractionError { #[snafu(display("Failed to open epub as zip archive."))] diff --git a/src/extractors/extractor.rs b/src/extractors/extractor.rs index 3a582b3..511751f 100644 --- a/src/extractors/extractor.rs +++ b/src/extractors/extractor.rs @@ -7,6 +7,7 @@ use super::{ pdf::{Pdf, PdfExtractionError}, }; +/// Errors that occur during text extraction from documents. #[derive(Debug, Snafu)] pub enum ExtractionError { #[snafu(display("Failed to read input for extraction."))] @@ -17,10 +18,13 @@ pub enum ExtractionError { Epub { source: EpubExtractionError }, } +/// Extract text content from document bytes. pub trait Extractor { + /// Extract text from document bytes. fn extract(input: &[u8]) -> Result; } +/// Get file extension from path as lowercase string. pub fn extension>(path: P) -> String { let path = path.as_ref(); path.extension() @@ -29,6 +33,7 @@ pub fn extension>(path: P) -> String { .into_owned() } +/// Extract text from input stream based on file extension. pub fn extract(mut input: R, extension: &str) -> Result, ExtractionError> { let mut buffer = Vec::new(); input.read_to_end(&mut buffer).context(ReadSnafu)?; diff --git a/src/extractors/pdf.rs b/src/extractors/pdf.rs index a3b36b3..48b8695 100644 --- a/src/extractors/pdf.rs +++ b/src/extractors/pdf.rs @@ -3,8 +3,10 @@ use snafu::{ResultExt, Snafu}; use super::extractor::{Extractor, PdfSnafu}; +/// PDF text extractor. pub struct Pdf; +/// Extract text from all pages of a PDF document. fn get_pdf_text(doc: &Document) -> Result { let pages: Vec = doc.get_pages().keys().cloned().collect(); let text = doc.extract_text(&pages)?; @@ -12,6 +14,7 @@ fn get_pdf_text(doc: &Document) -> Result { Ok(text) } +/// Errors that occur during PDF text extraction. #[derive(Debug, Snafu)] pub enum PdfExtractionError { #[snafu(display("Failed to open pdf."))] diff --git a/src/storage/jobs.rs b/src/storage/jobs.rs index 94942d8..5b8971c 100644 --- a/src/storage/jobs.rs +++ b/src/storage/jobs.rs @@ -7,12 +7,14 @@ use uuid::Uuid; use super::queries::DocumentMatch; +/// Database operations for background job management. #[derive(Debug, Clone)] pub struct Jobs { /// Connection pool for database operations. pool: Arc, } +/// Status of a background job. #[derive(Debug, Clone, Serialize, Deserialize, Type)] #[sqlx(type_name = "job_status", rename_all = "lowercase")] pub enum JobStatus { @@ -75,6 +77,7 @@ pub enum RetrieveError { } impl Jobs { + /// Create a new jobs handler with database pool. pub fn new(pool: Arc) -> Self { Self { pool } } @@ -114,6 +117,7 @@ impl Jobs { Ok(()) } + /// Retrieve job status and results by ID. pub async fn retrieve( &self, id: &Uuid, diff --git a/src/storage/queries.rs b/src/storage/queries.rs index 65c8bc8..be988d2 100644 --- a/src/storage/queries.rs +++ b/src/storage/queries.rs @@ -7,6 +7,7 @@ use sqlx::{FromRow, PgPool}; use crate::{hash::SHA256_LENGTH, text_encoder::Embeddings}; +/// Database operations for document queries and embeddings. #[derive(Debug, Clone)] pub struct Queries { /// Connection pool for database operations. @@ -67,6 +68,7 @@ pub struct DocumentMatch { } impl Queries { + /// Create a new queries handler with database pool. pub fn new(pool: Arc) -> Self { Self { pool } }