2025-07-01 08:55:08 +02:00
|
|
|
//! Query processing and document retrieval.
|
|
|
|
|
|
|
|
use snafu::{ResultExt, Snafu};
|
|
|
|
|
|
|
|
use crate::{
|
2025-07-01 21:10:35 +02:00
|
|
|
storage::{
|
|
|
|
self,
|
|
|
|
queries::{DocumentMatch, Queries},
|
|
|
|
},
|
2025-07-01 08:55:08 +02:00
|
|
|
text_encoder::{self, TextEncoder},
|
|
|
|
tokenize::{self, Tokenizer},
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Errors that occur during query processing.
|
|
|
|
#[derive(Debug, Snafu)]
|
|
|
|
pub enum AskError {
|
|
|
|
#[snafu(display("Failed to encode query."))]
|
|
|
|
Encode { source: tokenize::EncodeError },
|
|
|
|
#[snafu(display("Failed to embed query."))]
|
|
|
|
Embed { source: text_encoder::EmbedError },
|
|
|
|
#[snafu(display("Failed to retrieve similar documents."))]
|
2025-07-01 21:10:35 +02:00
|
|
|
Query {
|
|
|
|
source: storage::queries::QueryError,
|
|
|
|
},
|
2025-07-01 08:55:08 +02:00
|
|
|
#[snafu(display("Failed to rerank documents."))]
|
|
|
|
Rerank { source: text_encoder::RerankError },
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Process a user query and return ranked document matches.
|
|
|
|
pub async fn ask(
|
|
|
|
query: &str,
|
2025-07-01 21:10:35 +02:00
|
|
|
db: &Queries,
|
2025-07-01 08:55:08 +02:00
|
|
|
tokenizer: &Tokenizer,
|
2025-07-01 14:02:18 +02:00
|
|
|
embedder: &TextEncoder,
|
|
|
|
reranker: &TextEncoder,
|
2025-07-01 08:55:08 +02:00
|
|
|
chunk_size: usize,
|
|
|
|
limit: usize,
|
2025-07-01 14:02:18 +02:00
|
|
|
) -> Result<Vec<DocumentMatch>, AskError> {
|
2025-07-01 08:55:08 +02:00
|
|
|
let encodings = tokenizer.encode(query, chunk_size).context(EncodeSnafu)?;
|
2025-07-01 14:02:18 +02:00
|
|
|
let embeddings = embedder.embed(encodings[0].clone()).context(EmbedSnafu)?;
|
2025-07-01 08:55:08 +02:00
|
|
|
let documents = db
|
|
|
|
.query(embeddings, (limit * 10) as i32)
|
|
|
|
.await
|
|
|
|
.context(QuerySnafu)?;
|
|
|
|
|
|
|
|
let reranked_docs = reranker
|
|
|
|
.rerank(query, documents, tokenizer, limit)
|
|
|
|
.context(RerankSnafu)?;
|
|
|
|
|
2025-07-01 14:02:18 +02:00
|
|
|
Ok(reranked_docs)
|
2025-07-01 08:55:08 +02:00
|
|
|
}
|