mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-21 04:06:26 +00:00
Use fst 0.4.4 in the project
This commit is contained in:
@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, BTreeMap};
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
@ -108,17 +109,18 @@ pub fn push_documents_addition<D: serde::Serialize>(
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
fn index_document(
|
||||
fn index_document<A>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
documents_fields: DocumentsFields,
|
||||
documents_fields_counts: DocumentsFieldsCounts,
|
||||
ranked_map: &mut RankedMap,
|
||||
indexer: &mut RawIndexer,
|
||||
indexer: &mut RawIndexer<A>,
|
||||
schema: &Schema,
|
||||
field_id: FieldId,
|
||||
document_id: DocumentId,
|
||||
value: &Value,
|
||||
) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
let serialized = serde_json::to_vec(value)?;
|
||||
documents_fields.put_document_field(writer, document_id, field_id, &serialized)?;
|
||||
@ -208,10 +210,7 @@ pub fn apply_addition<'a, 'b>(
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
let stop_words = match index.main.stop_words_fst(writer)? {
|
||||
Some(stop_words) => stop_words,
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
let stop_words = index.main.stop_words_fst(writer)?.map_data(Cow::into_owned)?;
|
||||
|
||||
// 3. index the documents fields in the stores
|
||||
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
@ -297,10 +296,10 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
||||
index.postings_lists.clear(writer)?;
|
||||
index.docs_words.clear(writer)?;
|
||||
|
||||
let stop_words = match index.main.stop_words_fst(writer)? {
|
||||
Some(stop_words) => stop_words,
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
let stop_words = index.main
|
||||
.stop_words_fst(writer)?
|
||||
.map_data(Cow::into_owned)
|
||||
.unwrap();
|
||||
|
||||
let number_of_inserted_documents = documents_ids_to_reindex.len();
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
@ -348,13 +347,15 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_documents_addition_index(
|
||||
pub fn write_documents_addition_index<A>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
ranked_map: &RankedMap,
|
||||
number_of_inserted_documents: usize,
|
||||
indexer: RawIndexer,
|
||||
) -> MResult<()> {
|
||||
indexer: RawIndexer<A>,
|
||||
) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
let indexed = indexer.build();
|
||||
let mut delta_words_builder = SetBuilder::memory();
|
||||
|
||||
@ -373,33 +374,27 @@ pub fn write_documents_addition_index(
|
||||
index.docs_words.put_doc_words(writer, id, &words)?;
|
||||
}
|
||||
|
||||
let delta_words = delta_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
let delta_words = delta_words_builder.into_set();
|
||||
|
||||
let words = match index.main.words_fst(writer)? {
|
||||
Some(words) => {
|
||||
let op = OpBuilder::new()
|
||||
.add(words.stream())
|
||||
.add(delta_words.stream())
|
||||
.r#union();
|
||||
let words_fst = index.main.words_fst(writer)?;
|
||||
let words = if !words_fst.is_empty() {
|
||||
let op = OpBuilder::new()
|
||||
.add(words_fst.stream())
|
||||
.add(delta_words.stream())
|
||||
.r#union();
|
||||
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
}
|
||||
None => delta_words,
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder.into_set()
|
||||
} else {
|
||||
delta_words
|
||||
};
|
||||
|
||||
index.main.put_words_fst(writer, &words)?;
|
||||
index.main.put_ranked_map(writer, ranked_map)?;
|
||||
index.main.put_number_of_documents(writer, |old| old + number_of_inserted_documents as u64)?;
|
||||
|
||||
compute_short_prefixes(writer, index)?;
|
||||
compute_short_prefixes(writer, &words, index)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -114,7 +114,8 @@ pub fn apply_documents_deletion(
|
||||
ranked_map.remove(id, *ranked_attr);
|
||||
}
|
||||
|
||||
if let Some(words) = index.docs_words.doc_words(writer, id)? {
|
||||
let words = index.docs_words.doc_words(writer, id)?;
|
||||
if !words.is_empty() {
|
||||
let mut stream = words.stream();
|
||||
while let Some(word) = stream.next() {
|
||||
let word = word.to_vec();
|
||||
@ -157,21 +158,16 @@ pub fn apply_documents_deletion(
|
||||
}
|
||||
|
||||
let removed_words = fst::Set::from_iter(removed_words).unwrap();
|
||||
let words = match index.main.words_fst(writer)? {
|
||||
Some(words_set) => {
|
||||
let op = fst::set::OpBuilder::new()
|
||||
.add(words_set.stream())
|
||||
.add(removed_words.stream())
|
||||
.difference();
|
||||
let words = {
|
||||
let words_set = index.main.words_fst(writer)?;
|
||||
let op = fst::set::OpBuilder::new()
|
||||
.add(words_set.stream())
|
||||
.add(removed_words.stream())
|
||||
.difference();
|
||||
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
}
|
||||
None => fst::Set::default(),
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder.into_set()
|
||||
};
|
||||
|
||||
index.main.put_words_fst(writer, &words)?;
|
||||
@ -182,7 +178,7 @@ pub fn apply_documents_deletion(
|
||||
index.main.remove_external_docids(writer, &external_docids)?;
|
||||
index.main.remove_internal_docids(writer, &internal_docids)?;
|
||||
|
||||
compute_short_prefixes(writer, index)?;
|
||||
compute_short_prefixes(writer, &words, index)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -6,18 +6,19 @@ use meilisearch_types::DocumentId;
|
||||
use ordered_float::OrderedFloat;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::Number;
|
||||
use crate::{Number, FstMapCow};
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::SerializerError;
|
||||
use crate::store::DiscoverIds;
|
||||
|
||||
/// Returns the number of words indexed or `None` if the type is unindexable.
|
||||
pub fn index_value(
|
||||
indexer: &mut RawIndexer,
|
||||
pub fn index_value<A>(
|
||||
indexer: &mut RawIndexer<A>,
|
||||
document_id: DocumentId,
|
||||
indexed_pos: IndexedPos,
|
||||
value: &Value,
|
||||
) -> Option<usize>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
match value {
|
||||
Value::Null => None,
|
||||
@ -99,7 +100,7 @@ pub fn value_to_number(value: &Value) -> Option<Number> {
|
||||
/// the corresponding id or generate a new one, this is the way we produce documents ids.
|
||||
pub fn discover_document_id(
|
||||
docid: &str,
|
||||
external_docids: &fst::Map,
|
||||
external_docids: &FstMapCow,
|
||||
available_docids: &mut DiscoverIds<'_>,
|
||||
) -> Result<DocumentId, SerializerError>
|
||||
{
|
||||
@ -120,7 +121,7 @@ pub fn discover_document_id(
|
||||
pub fn extract_document_id(
|
||||
primary_key: &str,
|
||||
document: &IndexMap<String, Value>,
|
||||
external_docids: &fst::Map,
|
||||
external_docids: &FstMapCow,
|
||||
available_docids: &mut DiscoverIds<'_>,
|
||||
) -> Result<(DocumentId, String), SerializerError>
|
||||
{
|
||||
|
@ -297,13 +297,13 @@ pub fn update_task<'a, 'b>(
|
||||
Ok(status)
|
||||
}
|
||||
|
||||
fn compute_short_prefixes(writer: &mut heed::RwTxn<MainT>, index: &store::Index) -> MResult<()> {
|
||||
// retrieve the words fst to compute all those prefixes
|
||||
let words_fst = match index.main.words_fst(writer)? {
|
||||
Some(fst) => fst,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
fn compute_short_prefixes<A>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
words_fst: &fst::Set<A>,
|
||||
index: &store::Index,
|
||||
) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
// clear the prefixes
|
||||
let pplc_store = index.prefix_postings_lists_cache;
|
||||
pplc_store.clear(writer)?;
|
||||
|
@ -168,7 +168,6 @@ pub fn apply_stop_words_update(
|
||||
|
||||
let old_stop_words: BTreeSet<String> = index.main
|
||||
.stop_words_fst(writer)?
|
||||
.unwrap_or_default()
|
||||
.stream()
|
||||
.into_strs()?
|
||||
.into_iter()
|
||||
@ -186,7 +185,8 @@ pub fn apply_stop_words_update(
|
||||
apply_stop_words_deletion(writer, index, deletion)?;
|
||||
}
|
||||
|
||||
if let Some(words_fst) = index.main.words_fst(writer)? {
|
||||
let words_fst = index.main.words_fst(writer)?;
|
||||
if !words_fst.is_empty() {
|
||||
let stop_words = fst::Set::from_iter(stop_words)?;
|
||||
let op = OpBuilder::new()
|
||||
.add(&words_fst)
|
||||
@ -195,7 +195,7 @@ pub fn apply_stop_words_update(
|
||||
|
||||
let mut builder = fst::SetBuilder::memory();
|
||||
builder.extend_stream(op)?;
|
||||
let words_fst = builder.into_inner().and_then(fst::Set::from_bytes)?;
|
||||
let words_fst = builder.into_set();
|
||||
|
||||
index.main.put_words_fst(writer, &words_fst)?;
|
||||
index.main.put_stop_words_fst(writer, &stop_words)?;
|
||||
@ -222,28 +222,25 @@ fn apply_stop_words_addition(
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)?;
|
||||
let delta_stop_words = stop_words_builder.into_set();
|
||||
|
||||
// we also need to remove all the stop words from the main fst
|
||||
if let Some(word_fst) = main_store.words_fst(writer)? {
|
||||
let words_fst = main_store.words_fst(writer)?;
|
||||
if !words_fst.is_empty() {
|
||||
let op = OpBuilder::new()
|
||||
.add(&word_fst)
|
||||
.add(&words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut word_fst_builder = SetBuilder::memory();
|
||||
word_fst_builder.extend_stream(op)?;
|
||||
let word_fst = word_fst_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)?;
|
||||
let word_fst = word_fst_builder.into_set();
|
||||
|
||||
main_store.put_words_fst(writer, &word_fst)?;
|
||||
}
|
||||
|
||||
// now we add all of these stop words from the main store
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?;
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
@ -252,9 +249,7 @@ fn apply_stop_words_addition(
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op)?;
|
||||
let stop_words_fst = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)?;
|
||||
let stop_words_fst = stop_words_builder.into_set();
|
||||
|
||||
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
@ -274,12 +269,10 @@ fn apply_stop_words_deletion(
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)?;
|
||||
let delta_stop_words = stop_words_builder.into_set();
|
||||
|
||||
// now we delete all of these stop words from the main store
|
||||
let stop_words_fst = index.main.stop_words_fst(writer)?.unwrap_or_default();
|
||||
let stop_words_fst = index.main.stop_words_fst(writer)?;
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
@ -288,7 +281,7 @@ fn apply_stop_words_deletion(
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op)?;
|
||||
let stop_words_fst = stop_words_builder.into_inner().and_then(fst::Set::from_bytes)?;
|
||||
let stop_words_fst = stop_words_builder.into_set();
|
||||
|
||||
Ok(index.main.put_stop_words_fst(writer, &stop_words_fst)?)
|
||||
}
|
||||
@ -311,16 +304,13 @@ pub fn apply_synonyms_update(
|
||||
let alternatives = SetBuf::from_dirty(alternatives);
|
||||
let mut alternatives_builder = SetBuilder::memory();
|
||||
alternatives_builder.extend_iter(alternatives)?;
|
||||
let bytes = alternatives_builder.into_inner()?;
|
||||
fst::Set::from_bytes(bytes)?
|
||||
alternatives_builder.into_set()
|
||||
};
|
||||
|
||||
synonyms_store.put_synonyms(writer, word.as_bytes(), &alternatives)?;
|
||||
}
|
||||
|
||||
let synonyms_set = synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)?;
|
||||
let synonyms_set = synonyms_builder.into_set();
|
||||
|
||||
main_store.put_synonyms_fst(writer, &synonyms_set)?;
|
||||
|
||||
|
Reference in New Issue
Block a user