more documentation

This commit is contained in:
Sebastian Hugentobler 2025-07-01 21:15:09 +02:00
parent 6a5b309391
commit a62b0aacf5
Signed by: shu
SSH key fingerprint: SHA256:ppcx6MlixdNZd5EUM1nkHOKoyQYoJwzuQKXM6J/t66M
8 changed files with 26 additions and 1 deletions

View file

@ -24,6 +24,7 @@ pub mod query;
pub mod routes;
pub mod state;
/// OpenAPI tag for all endpoints.
const TAG: &str = "little-librarian";
/// OpenAPI documentation configuration.

View file

@ -8,8 +8,10 @@ use uuid::Uuid;
use crate::storage;
use crate::storage::jobs::AddError;
/// Manage background job execution and storage.
#[derive(Clone, Debug)]
pub struct JobManager {
/// Database connection for job storage.
db: Arc<storage::jobs::Jobs>,
}
@ -21,10 +23,12 @@ pub enum ExecuteError {
}
impl JobManager {
/// Create a new job manager with database connection.
pub fn new(db: Arc<storage::jobs::Jobs>) -> Self {
Self { db }
}
/// Execute a task in the background and return job ID.
pub async fn execute<F, Fut, T, E>(&self, task: F) -> Result<Uuid, ExecuteError>
where
F: FnOnce() -> Fut + Send + 'static,
@ -49,6 +53,7 @@ impl JobManager {
}
}
/// Mark job as successful and store result.
async fn success<T>(job_id: &Uuid, result: &T, db: &storage::jobs::Jobs)
where
T: serde::Serialize + Send + Sync + 'static,
@ -66,6 +71,7 @@ where
}
}
/// Mark job as failed and log error.
async fn failure<E>(job_id: &Uuid, error: E, db: &storage::jobs::Jobs)
where
E: snafu::Error + Send + Sync + 'static,

View file

@ -18,6 +18,7 @@ use crate::{
storage::{self, queries::DocumentMatch},
};
/// Maximum allowed limit for query results.
const MAX_LIMIT: usize = 10;
/// Errors that occur during query processing.
@ -83,7 +84,7 @@ pub struct QueryStartResponse {
pub id: String,
}
/// Execute a semantic search query against the document database.
/// Execute a semantic search query against the document database and return the job id.
#[utoipa::path(
post,
path = "/query",
@ -151,6 +152,7 @@ impl HttpStatus for RetrieveError {
http_error!(RetrieveError);
/// Get results for a completed query job.
#[utoipa::path(
get,
path = "/query/{id}",

View file

@ -6,8 +6,10 @@ use zip::result::ZipError;
use super::extractor::{EpubSnafu, ExtractionError, Extractor, extension};
/// EPUB text extractor.
pub struct Epub;
/// Errors that occur during EPUB text extraction.
#[derive(Debug, Snafu)]
pub enum EpubExtractionError {
#[snafu(display("Failed to open epub as zip archive."))]

View file

@ -7,6 +7,7 @@ use super::{
pdf::{Pdf, PdfExtractionError},
};
/// Errors that occur during text extraction from documents.
#[derive(Debug, Snafu)]
pub enum ExtractionError {
#[snafu(display("Failed to read input for extraction."))]
@ -17,10 +18,13 @@ pub enum ExtractionError {
Epub { source: EpubExtractionError },
}
/// Extract text content from document bytes.
pub trait Extractor {
/// Extract text from document bytes.
fn extract(input: &[u8]) -> Result<String, ExtractionError>;
}
/// Get file extension from path as lowercase string.
pub fn extension<P: AsRef<Path>>(path: P) -> String {
let path = path.as_ref();
path.extension()
@ -29,6 +33,7 @@ pub fn extension<P: AsRef<Path>>(path: P) -> String {
.into_owned()
}
/// Extract text from input stream based on file extension.
pub fn extract<R: Read>(mut input: R, extension: &str) -> Result<Option<String>, ExtractionError> {
let mut buffer = Vec::new();
input.read_to_end(&mut buffer).context(ReadSnafu)?;

View file

@ -3,8 +3,10 @@ use snafu::{ResultExt, Snafu};
use super::extractor::{Extractor, PdfSnafu};
/// PDF text extractor.
pub struct Pdf;
/// Extract text from all pages of a PDF document.
fn get_pdf_text(doc: &Document) -> Result<String, lopdf::Error> {
let pages: Vec<u32> = doc.get_pages().keys().cloned().collect();
let text = doc.extract_text(&pages)?;
@ -12,6 +14,7 @@ fn get_pdf_text(doc: &Document) -> Result<String, lopdf::Error> {
Ok(text)
}
/// Errors that occur during PDF text extraction.
#[derive(Debug, Snafu)]
pub enum PdfExtractionError {
#[snafu(display("Failed to open pdf."))]

View file

@ -7,12 +7,14 @@ use uuid::Uuid;
use super::queries::DocumentMatch;
/// Database operations for background job management.
#[derive(Debug, Clone)]
pub struct Jobs {
/// Connection pool for database operations.
pool: Arc<PgPool>,
}
/// Status of a background job.
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
#[sqlx(type_name = "job_status", rename_all = "lowercase")]
pub enum JobStatus {
@ -75,6 +77,7 @@ pub enum RetrieveError {
}
impl Jobs {
/// Create a new jobs handler with database pool.
pub fn new(pool: Arc<PgPool>) -> Self {
Self { pool }
}
@ -114,6 +117,7 @@ impl Jobs {
Ok(())
}
/// Retrieve job status and results by ID.
pub async fn retrieve(
&self,
id: &Uuid,

View file

@ -7,6 +7,7 @@ use sqlx::{FromRow, PgPool};
use crate::{hash::SHA256_LENGTH, text_encoder::Embeddings};
/// Database operations for document queries and embeddings.
#[derive(Debug, Clone)]
pub struct Queries {
/// Connection pool for database operations.
@ -67,6 +68,7 @@ pub struct DocumentMatch {
}
impl Queries {
/// Create a new queries handler with database pool.
pub fn new(pool: Arc<PgPool>) -> Self {
Self { pool }
}