implement placeholder search

This commit is contained in:
mpostma
2020-05-28 19:35:34 +02:00
parent 37ee0f36c1
commit bbe3a10107
12 changed files with 361 additions and 106 deletions

View File

@ -217,7 +217,7 @@ pub fn apply_addition<'a, 'b>(
let mut indexer = RawIndexer::new(stop_words);
// For each document in this update
for (document_id, document) in documents_additions {
for (document_id, document) in &documents_additions {
// For each key-value pair in the document.
for (attribute, value) in document {
let field_id = schema.insert_and_index(&attribute)?;
@ -229,7 +229,7 @@ pub fn apply_addition<'a, 'b>(
&mut indexer,
&schema,
field_id,
document_id,
*document_id,
&value,
)?;
}
@ -257,6 +257,10 @@ pub fn apply_addition<'a, 'b>(
index.facets.add(writer, facet_map)?;
}
// update is finished; update sorted document id cache with new state
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
super::cache_document_ids_sorted(writer, &ranked_map, index, &mut document_ids)?;
Ok(())
}
@ -313,8 +317,8 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
index.facets.add(writer, facet_map)?;
}
// ^-- https://github.com/meilisearch/MeiliSearch/pull/631#issuecomment-626624470 --v
for document_id in documents_ids_to_reindex {
for result in index.documents_fields.document_fields(writer, document_id)? {
for document_id in &documents_ids_to_reindex {
for result in index.documents_fields.document_fields(writer, *document_id)? {
let (field_id, bytes) = result?;
let value: Value = serde_json::from_slice(bytes)?;
ram_store.insert((document_id, field_id), value);
@ -330,7 +334,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
&mut indexer,
&schema,
field_id,
document_id,
*document_id,
&value,
)?;
}
@ -354,6 +358,10 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
index.facets.add(writer, facet_map)?;
}
// update is finished; update sorted document id cache with new state
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
super::cache_document_ids_sorted(writer, &ranked_map, index, &mut document_ids)?;
Ok(())
}

View File

@ -8,7 +8,7 @@ use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::facets;
use crate::store;
use crate::update::{next_update_id, compute_short_prefixes, Update};
use crate::{DocumentId, Error, MResult, RankedMap};
use crate::{DocumentId, Error, MResult, RankedMap, MainWriter, Index};
pub struct DocumentsDeletion {
updates_store: store::Updates,
@ -153,8 +153,8 @@ pub fn apply_documents_deletion(
}
let deleted_documents_len = deleted_documents.len() as u64;
for id in deleted_documents {
index.docs_words.del_doc_words(writer, id)?;
for id in &deleted_documents {
index.docs_words.del_doc_words(writer, *id)?;
}
let removed_words = fst::Set::from_iter(removed_words).unwrap();
@ -180,5 +180,28 @@ pub fn apply_documents_deletion(
compute_short_prefixes(writer, &words, index)?;
// update is finished; update sorted document id cache with new state
document_cache_remove_deleted(writer, index, &ranked_map, &deleted_documents)?;
Ok(())
}
/// rebuilds the document id cache by either removing deleted documents from the existing cache,
/// and generating a new one from docs in store
fn document_cache_remove_deleted(writer: &mut MainWriter, index: &Index, ranked_map: &RankedMap, documents_to_delete: &HashSet<DocumentId>) -> MResult<()> {
let new_cache = match index.main.sorted_document_ids_cache(writer)? {
// only keep documents that are not in the list of deleted documents. Order is preserved,
// no need to resort
Some(old_cache) => {
old_cache.iter().filter(|docid| !documents_to_delete.contains(docid)).cloned().collect::<Vec<_>>()
}
// couldn't find cached documents, try building a new cache from documents in store
None => {
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
super::cache_document_ids_sorted(writer, ranked_map, index, &mut document_ids)?;
document_ids
}
};
index.main.put_sorted_document_ids_cache(writer, &new_cache)?;
Ok(())
}

View File

@ -25,8 +25,9 @@ use serde::{Deserialize, Serialize};
use serde_json::Value;
use meilisearch_error::ErrorCode;
use meilisearch_types::DocumentId;
use crate::{store, MResult};
use crate::{store, MResult, RankedMap};
use crate::database::{MainT, UpdateT};
use crate::settings::SettingsUpdate;
@ -371,3 +372,13 @@ where A: AsRef<[u8]>,
Ok(())
}
fn cache_document_ids_sorted(
writer: &mut heed::RwTxn<MainT>,
ranked_map: &RankedMap,
index: &store::Index,
document_ids: &mut [DocumentId],
) -> MResult<()> {
crate::bucket_sort::placeholder_document_sort(document_ids, index, writer, ranked_map)?;
index.main.put_sorted_document_ids_cache(writer, &document_ids)
}