more documentation
This commit is contained in:
parent
6a5b309391
commit
a62b0aacf5
8 changed files with 26 additions and 1 deletions
|
@ -24,6 +24,7 @@ pub mod query;
|
|||
pub mod routes;
|
||||
pub mod state;
|
||||
|
||||
/// OpenAPI tag for all endpoints.
|
||||
const TAG: &str = "little-librarian";
|
||||
|
||||
/// OpenAPI documentation configuration.
|
||||
|
|
|
@ -8,8 +8,10 @@ use uuid::Uuid;
|
|||
use crate::storage;
|
||||
use crate::storage::jobs::AddError;
|
||||
|
||||
/// Manage background job execution and storage.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct JobManager {
|
||||
/// Database connection for job storage.
|
||||
db: Arc<storage::jobs::Jobs>,
|
||||
}
|
||||
|
||||
|
@ -21,10 +23,12 @@ pub enum ExecuteError {
|
|||
}
|
||||
|
||||
impl JobManager {
|
||||
/// Create a new job manager with database connection.
|
||||
pub fn new(db: Arc<storage::jobs::Jobs>) -> Self {
|
||||
Self { db }
|
||||
}
|
||||
|
||||
/// Execute a task in the background and return job ID.
|
||||
pub async fn execute<F, Fut, T, E>(&self, task: F) -> Result<Uuid, ExecuteError>
|
||||
where
|
||||
F: FnOnce() -> Fut + Send + 'static,
|
||||
|
@ -49,6 +53,7 @@ impl JobManager {
|
|||
}
|
||||
}
|
||||
|
||||
/// Mark job as successful and store result.
|
||||
async fn success<T>(job_id: &Uuid, result: &T, db: &storage::jobs::Jobs)
|
||||
where
|
||||
T: serde::Serialize + Send + Sync + 'static,
|
||||
|
@ -66,6 +71,7 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// Mark job as failed and log error.
|
||||
async fn failure<E>(job_id: &Uuid, error: E, db: &storage::jobs::Jobs)
|
||||
where
|
||||
E: snafu::Error + Send + Sync + 'static,
|
||||
|
|
|
@ -18,6 +18,7 @@ use crate::{
|
|||
storage::{self, queries::DocumentMatch},
|
||||
};
|
||||
|
||||
/// Maximum allowed limit for query results.
|
||||
const MAX_LIMIT: usize = 10;
|
||||
|
||||
/// Errors that occur during query processing.
|
||||
|
@ -83,7 +84,7 @@ pub struct QueryStartResponse {
|
|||
pub id: String,
|
||||
}
|
||||
|
||||
/// Execute a semantic search query against the document database.
|
||||
/// Execute a semantic search query against the document database and return the job id.
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/query",
|
||||
|
@ -151,6 +152,7 @@ impl HttpStatus for RetrieveError {
|
|||
|
||||
http_error!(RetrieveError);
|
||||
|
||||
/// Get results for a completed query job.
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/query/{id}",
|
||||
|
|
|
@ -6,8 +6,10 @@ use zip::result::ZipError;
|
|||
|
||||
use super::extractor::{EpubSnafu, ExtractionError, Extractor, extension};
|
||||
|
||||
/// EPUB text extractor.
|
||||
pub struct Epub;
|
||||
|
||||
/// Errors that occur during EPUB text extraction.
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum EpubExtractionError {
|
||||
#[snafu(display("Failed to open epub as zip archive."))]
|
||||
|
|
|
@ -7,6 +7,7 @@ use super::{
|
|||
pdf::{Pdf, PdfExtractionError},
|
||||
};
|
||||
|
||||
/// Errors that occur during text extraction from documents.
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum ExtractionError {
|
||||
#[snafu(display("Failed to read input for extraction."))]
|
||||
|
@ -17,10 +18,13 @@ pub enum ExtractionError {
|
|||
Epub { source: EpubExtractionError },
|
||||
}
|
||||
|
||||
/// Extract text content from document bytes.
|
||||
pub trait Extractor {
|
||||
/// Extract text from document bytes.
|
||||
fn extract(input: &[u8]) -> Result<String, ExtractionError>;
|
||||
}
|
||||
|
||||
/// Get file extension from path as lowercase string.
|
||||
pub fn extension<P: AsRef<Path>>(path: P) -> String {
|
||||
let path = path.as_ref();
|
||||
path.extension()
|
||||
|
@ -29,6 +33,7 @@ pub fn extension<P: AsRef<Path>>(path: P) -> String {
|
|||
.into_owned()
|
||||
}
|
||||
|
||||
/// Extract text from input stream based on file extension.
|
||||
pub fn extract<R: Read>(mut input: R, extension: &str) -> Result<Option<String>, ExtractionError> {
|
||||
let mut buffer = Vec::new();
|
||||
input.read_to_end(&mut buffer).context(ReadSnafu)?;
|
||||
|
|
|
@ -3,8 +3,10 @@ use snafu::{ResultExt, Snafu};
|
|||
|
||||
use super::extractor::{Extractor, PdfSnafu};
|
||||
|
||||
/// PDF text extractor.
|
||||
pub struct Pdf;
|
||||
|
||||
/// Extract text from all pages of a PDF document.
|
||||
fn get_pdf_text(doc: &Document) -> Result<String, lopdf::Error> {
|
||||
let pages: Vec<u32> = doc.get_pages().keys().cloned().collect();
|
||||
let text = doc.extract_text(&pages)?;
|
||||
|
@ -12,6 +14,7 @@ fn get_pdf_text(doc: &Document) -> Result<String, lopdf::Error> {
|
|||
Ok(text)
|
||||
}
|
||||
|
||||
/// Errors that occur during PDF text extraction.
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum PdfExtractionError {
|
||||
#[snafu(display("Failed to open pdf."))]
|
||||
|
|
|
@ -7,12 +7,14 @@ use uuid::Uuid;
|
|||
|
||||
use super::queries::DocumentMatch;
|
||||
|
||||
/// Database operations for background job management.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Jobs {
|
||||
/// Connection pool for database operations.
|
||||
pool: Arc<PgPool>,
|
||||
}
|
||||
|
||||
/// Status of a background job.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Type)]
|
||||
#[sqlx(type_name = "job_status", rename_all = "lowercase")]
|
||||
pub enum JobStatus {
|
||||
|
@ -75,6 +77,7 @@ pub enum RetrieveError {
|
|||
}
|
||||
|
||||
impl Jobs {
|
||||
/// Create a new jobs handler with database pool.
|
||||
pub fn new(pool: Arc<PgPool>) -> Self {
|
||||
Self { pool }
|
||||
}
|
||||
|
@ -114,6 +117,7 @@ impl Jobs {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieve job status and results by ID.
|
||||
pub async fn retrieve(
|
||||
&self,
|
||||
id: &Uuid,
|
||||
|
|
|
@ -7,6 +7,7 @@ use sqlx::{FromRow, PgPool};
|
|||
|
||||
use crate::{hash::SHA256_LENGTH, text_encoder::Embeddings};
|
||||
|
||||
/// Database operations for document queries and embeddings.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Queries {
|
||||
/// Connection pool for database operations.
|
||||
|
@ -67,6 +68,7 @@ pub struct DocumentMatch {
|
|||
}
|
||||
|
||||
impl Queries {
|
||||
/// Create a new queries handler with database pool.
|
||||
pub fn new(pool: Arc<PgPool>) -> Self {
|
||||
Self { pool }
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue