mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 16:51:01 +00:00
Merge remote-tracking branch 'origin/main' into facet-levels-refactor
This commit is contained in:
@ -56,6 +56,8 @@ pub enum InternalError {
|
||||
Store(#[from] MdbError),
|
||||
#[error(transparent)]
|
||||
Utf8(#[from] str::Utf8Error),
|
||||
#[error("An indexation process was explicitly aborted.")]
|
||||
AbortedIndexation,
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
|
@ -1188,6 +1188,7 @@ pub(crate) mod tests {
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::documents::DocumentsBatchReader;
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::update::{self, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use crate::{db_snap, Index};
|
||||
@ -1237,6 +1238,7 @@ pub(crate) mod tests {
|
||||
&self.indexer_config,
|
||||
self.index_documents_config.clone(),
|
||||
|_| (),
|
||||
|| false,
|
||||
)
|
||||
.unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
@ -1273,11 +1275,45 @@ pub(crate) mod tests {
|
||||
) -> Result<(), crate::error::Error> {
|
||||
let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config);
|
||||
update(&mut builder);
|
||||
builder.execute(drop)?;
|
||||
builder.execute(drop, || false)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn aborting_indexation() {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering::Relaxed;
|
||||
|
||||
let index = TempIndex::new();
|
||||
let mut wtxn = index.inner.write_txn().unwrap();
|
||||
|
||||
let should_abort = AtomicBool::new(false);
|
||||
let builder = IndexDocuments::new(
|
||||
&mut wtxn,
|
||||
&index.inner,
|
||||
&index.indexer_config,
|
||||
index.index_documents_config.clone(),
|
||||
|_| (),
|
||||
|| should_abort.load(Relaxed),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let (builder, user_error) = builder
|
||||
.add_documents(documents!([
|
||||
{ "id": 1, "name": "kevin" },
|
||||
{ "id": 2, "name": "bob", "age": 20 },
|
||||
{ "id": 2, "name": "bob", "age": 20 },
|
||||
]))
|
||||
.unwrap();
|
||||
user_error.unwrap();
|
||||
|
||||
should_abort.store(true, Relaxed);
|
||||
let err = builder.execute().unwrap_err();
|
||||
|
||||
assert!(matches!(err, Error::InternalError(InternalError::AbortedIndexation)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn initial_field_distribution() {
|
||||
let index = TempIndex::new();
|
||||
|
@ -89,7 +89,7 @@ mod test {
|
||||
let config = IndexerConfig::default();
|
||||
let mut update = Settings::new(&mut txn, &index, &config);
|
||||
update.set_distinct_field(distinct.to_string());
|
||||
update.execute(|_| ()).unwrap();
|
||||
update.execute(|_| (), || false).unwrap();
|
||||
|
||||
// add documents to the index
|
||||
let config = IndexerConfig::default();
|
||||
@ -98,7 +98,8 @@ mod test {
|
||||
..Default::default()
|
||||
};
|
||||
let addition =
|
||||
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| ()).unwrap();
|
||||
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| (), || false)
|
||||
.unwrap();
|
||||
|
||||
let reader =
|
||||
crate::documents::DocumentsBatchReader::from_reader(Cursor::new(JSON.as_slice()))
|
||||
|
@ -32,7 +32,7 @@ pub use self::helpers::{
|
||||
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
||||
pub use self::transform::{Transform, TransformOutput};
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::error::UserError;
|
||||
use crate::error::{Error, InternalError, UserError};
|
||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||
use crate::update::{
|
||||
self, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, WordPrefixDocids,
|
||||
@ -70,13 +70,14 @@ impl Default for IndexDocumentsMethod {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IndexDocuments<'t, 'u, 'i, 'a, F> {
|
||||
pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
index: &'i Index,
|
||||
config: IndexDocumentsConfig,
|
||||
indexer_config: &'a IndexerConfig,
|
||||
transform: Option<Transform<'a, 'i>>,
|
||||
progress: F,
|
||||
progress: FP,
|
||||
should_abort: FA,
|
||||
added_documents: u64,
|
||||
}
|
||||
|
||||
@ -90,17 +91,19 @@ pub struct IndexDocumentsConfig {
|
||||
pub autogenerate_docids: bool,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i, 'a, F> IndexDocuments<'t, 'u, 'i, 'a, F>
|
||||
impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
pub fn new(
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
index: &'i Index,
|
||||
indexer_config: &'a IndexerConfig,
|
||||
config: IndexDocumentsConfig,
|
||||
progress: F,
|
||||
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, F>> {
|
||||
progress: FP,
|
||||
should_abort: FA,
|
||||
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, FP, FA>> {
|
||||
let transform = Some(Transform::new(
|
||||
wtxn,
|
||||
index,
|
||||
@ -114,6 +117,7 @@ where
|
||||
config,
|
||||
indexer_config,
|
||||
progress,
|
||||
should_abort,
|
||||
wtxn,
|
||||
index,
|
||||
added_documents: 0,
|
||||
@ -148,12 +152,13 @@ where
|
||||
Err(user_error) => return Ok((self, Err(user_error))),
|
||||
};
|
||||
|
||||
let indexed_documents = self
|
||||
.transform
|
||||
.as_mut()
|
||||
.expect("Invalid document addition state")
|
||||
.read_documents(enriched_documents_reader, self.wtxn, &self.progress)?
|
||||
as u64;
|
||||
let indexed_documents =
|
||||
self.transform.as_mut().expect("Invalid document addition state").read_documents(
|
||||
enriched_documents_reader,
|
||||
self.wtxn,
|
||||
&self.progress,
|
||||
&self.should_abort,
|
||||
)? as u64;
|
||||
|
||||
self.added_documents += indexed_documents;
|
||||
|
||||
@ -197,7 +202,8 @@ where
|
||||
#[logging_timer::time("IndexDocuments::{}")]
|
||||
pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
let TransformOutput {
|
||||
primary_key,
|
||||
@ -346,6 +352,10 @@ where
|
||||
});
|
||||
|
||||
for result in lmdb_writer_rx {
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
let typed_chunk = match result? {
|
||||
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
|
||||
let cloneable_chunk = unsafe { as_cloneable_grenad(&word_docids_reader)? };
|
||||
@ -422,17 +432,26 @@ where
|
||||
word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
||||
) -> Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
// Merged databases are already been indexed, we start from this count;
|
||||
let mut databases_seen = MERGED_DATABASE_COUNT;
|
||||
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
databases_seen += 1;
|
||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
databases_seen,
|
||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||
});
|
||||
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
let previous_words_prefixes_fst =
|
||||
self.index.words_prefixes_fst(self.wtxn)?.map_data(|cow| cow.into_owned())?;
|
||||
|
||||
@ -446,6 +465,10 @@ where
|
||||
}
|
||||
builder.execute()?;
|
||||
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||
|
||||
// We retrieve the common words between the previous and new prefix word fst.
|
||||
@ -473,6 +496,10 @@ where
|
||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||
});
|
||||
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
if let Some(word_docids) = word_docids {
|
||||
execute_word_prefix_docids(
|
||||
self.wtxn,
|
||||
@ -499,6 +526,10 @@ where
|
||||
)?;
|
||||
}
|
||||
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
databases_seen += 1;
|
||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
databases_seen,
|
||||
@ -521,6 +552,10 @@ where
|
||||
)?;
|
||||
}
|
||||
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
databases_seen += 1;
|
||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
databases_seen,
|
||||
@ -548,6 +583,10 @@ where
|
||||
)?;
|
||||
}
|
||||
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
databases_seen += 1;
|
||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
databases_seen,
|
||||
|
@ -138,15 +138,17 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn read_documents<R, F>(
|
||||
pub fn read_documents<R, FP, FA>(
|
||||
&mut self,
|
||||
reader: EnrichedDocumentsBatchReader<R>,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
progress_callback: F,
|
||||
progress_callback: FP,
|
||||
should_abort: FA,
|
||||
) -> Result<usize>
|
||||
where
|
||||
R: Read + Seek,
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
|
||||
|
||||
@ -165,6 +167,10 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
while let Some(enriched_document) = cursor.next_enriched_document()? {
|
||||
let EnrichedDocument { document, document_id } = enriched_document;
|
||||
|
||||
if should_abort() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
// drop_and_reuse is called instead of .clear() to communicate to the compiler that field_buffer
|
||||
// does not keep references from the cursor between loop iterations
|
||||
let mut field_buffer_cache = drop_and_reuse(field_buffer);
|
||||
|
@ -266,9 +266,15 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.pagination_max_total_hits = Setting::Reset;
|
||||
}
|
||||
|
||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
||||
fn reindex<FP, FA>(
|
||||
&mut self,
|
||||
progress_callback: &FP,
|
||||
should_abort: &FA,
|
||||
old_fields_ids_map: FieldsIdsMap,
|
||||
) -> Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
// if the settings are set before any document update, we don't need to do anything, and
|
||||
@ -302,7 +308,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.index,
|
||||
self.indexer_config,
|
||||
IndexDocumentsConfig::default(),
|
||||
&cb,
|
||||
&progress_callback,
|
||||
&should_abort,
|
||||
)?;
|
||||
indexing_builder.execute_raw(output)?;
|
||||
|
||||
@ -657,9 +664,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
||||
pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||
|
||||
@ -695,7 +703,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
|| searchable_updated
|
||||
|| exact_attributes_updated
|
||||
{
|
||||
self.reindex(&progress_callback, old_fields_ids_map)?;
|
||||
self.reindex(&progress_callback, &should_abort, old_fields_ids_map)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
Reference in New Issue
Block a user