I can index documents without meilisearch

This commit is contained in:
Irevoire
2022-09-21 12:01:46 +02:00
committed by Clément Renault
parent edd8344dc9
commit 8770e07397
13 changed files with 357 additions and 222 deletions

View File

@@ -1,7 +1,9 @@
use std::error::Error;
use std::fmt;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::internal_error;
use milli::UserError;
use serde_json::Value;
pub type Result<T> = std::result::Result<T, IndexError>;
@@ -27,6 +29,17 @@ internal_error!(
milli::documents::Error
);
impl ErrorCode for IndexError {
fn error_code(&self) -> Code {
match self {
IndexError::Internal(_) => Code::Internal,
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
IndexError::Facet(e) => e.error_code(),
IndexError::Milli(e) => MilliError(e).error_code(),
}
}
}
impl From<milli::UserError> for IndexError {
fn from(error: milli::UserError) -> IndexError {
IndexError::Milli(error.into())
@@ -46,3 +59,53 @@ impl ErrorCode for FacetError {
}
}
}
#[derive(Debug)]
pub struct MilliError<'a>(pub &'a milli::Error);
impl Error for MilliError<'_> {}
impl fmt::Display for MilliError<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl ErrorCode for MilliError<'_> {
fn error_code(&self) -> Code {
match self.0 {
milli::Error::InternalError(_) => Code::Internal,
milli::Error::IoError(_) => Code::Internal,
milli::Error::UserError(ref error) => {
match error {
// TODO: wait for spec for new error codes.
UserError::SerdeJson(_)
| UserError::InvalidLmdbOpenOptions
| UserError::DocumentLimitReached
| UserError::AccessingSoftDeletedDocument { .. }
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStore,
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::Filter,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
Code::InvalidDocumentId
}
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
UserError::PrimaryKeyCannotBeChanged(_) => Code::PrimaryKeyAlreadyPresent,
UserError::SortRankingRuleMissing => Code::Sort,
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
UserError::InvalidSortableAttribute { .. } => Code::Sort,
UserError::CriterionError(_) => Code::InvalidRankingRule,
UserError::InvalidGeoField { .. } => Code::InvalidGeoField,
UserError::SortError(_) => Code::Sort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidMinWordLengthForTypo
}
}
}
}
}
}

View File

@@ -248,26 +248,20 @@ impl Index {
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<(u64, Vec<Document>)> {
let txn = self.read_txn()?;
let fields_ids_map = self.fields_ids_map(&txn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let rtxn = self.read_txn()?;
let mut documents = Vec::new();
for entry in self.all_documents(&txn)?.skip(offset).take(limit) {
let (_id, obkv) = entry?;
let document = obkv_to_json(&all_fields, &fields_ids_map, obkv)?;
for document in self.all_documents(&rtxn)?.skip(offset).take(limit) {
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document,
&document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document,
None => document?,
};
documents.push(document);
}
let number_of_documents = self.number_of_documents(&txn)?;
let number_of_documents = self.number_of_documents(&rtxn)?;
Ok((number_of_documents, documents))
}
@@ -306,6 +300,21 @@ impl Index {
Ok(document)
}
pub fn all_documents<'a>(
&self,
rtxn: &'a RoTxn,
) -> Result<impl Iterator<Item = Result<Document>> + 'a> {
let fields_ids_map = self.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
Ok(self.inner.all_documents(&rtxn)?.map(move |ret| {
ret.map_err(IndexError::from)
.and_then(|(_key, document)| -> Result<_> {
Ok(obkv_to_json(&all_fields, &fields_ids_map, document)?)
})
}))
}
pub fn size(&self) -> Result<u64> {
Ok(self.inner.on_disk_size()?)
}