Merge remote-tracking branch 'origin/main' into facet-levels-refactor

This commit is contained in:
Loïc Lecrenier
2022-10-26 15:13:34 +02:00
35 changed files with 132 additions and 149 deletions

View File

@ -182,12 +182,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
// and we can reset the soft deleted bitmap
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
let primary_key = self.index.primary_key(self.wtxn)?.ok_or_else(|| {
InternalError::DatabaseMissingEntry {
let primary_key =
self.index.primary_key(self.wtxn)?.ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::MAIN,
key: Some(main_key::PRIMARY_KEY_KEY),
}
})?;
})?;
// Since we already checked if the DB was empty, if we can't find the primary key, then
// something is wrong, and we must return an error.
@ -457,7 +456,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
.map(|point| (point, point.data.0))
.unzip();
points_to_remove.iter().for_each(|point| {
rtree.remove(&point);
rtree.remove(point);
});
geo_faceted_doc_ids -= docids_to_remove;
@ -546,7 +545,7 @@ fn remove_from_word_docids(
// We create an iterator to be able to get the content and delete the word docids.
// It's faster to acquire a cursor to get and delete or put, as we avoid traversing
// the LMDB B-Tree two times but only once.
let mut iter = db.prefix_iter_mut(txn, &word)?;
let mut iter = db.prefix_iter_mut(txn, word)?;
if let Some((key, mut docids)) = iter.next().transpose()? {
if key == word {
let previous_len = docids.len();

View File

@ -0,0 +1 @@

View File

@ -140,7 +140,7 @@ fn fetch_or_generate_document_id(
}
None => Ok(Err(UserError::MissingDocumentId {
primary_key: primary_key.to_string(),
document: obkv_to_object(&document, &documents_batch_index)?,
document: obkv_to_object(document, documents_batch_index)?,
})),
}
}
@ -156,7 +156,7 @@ fn fetch_or_generate_document_id(
if matching_documents_ids.len() >= 2 {
return Ok(Err(UserError::TooManyDocumentIds {
primary_key: nested.name().to_string(),
document: obkv_to_object(&document, &documents_batch_index)?,
document: obkv_to_object(document, documents_batch_index)?,
}));
}
}
@ -170,7 +170,7 @@ fn fetch_or_generate_document_id(
},
None => Ok(Err(UserError::MissingDocumentId {
primary_key: nested.name().to_string(),
document: obkv_to_object(&document, &documents_batch_index)?,
document: obkv_to_object(document, documents_batch_index)?,
})),
}
}
@ -313,7 +313,7 @@ pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String
None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
},
Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
content => Ok(Err(UserError::InvalidDocumentId { document_id: content.clone() })),
content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
}
}

View File

@ -132,7 +132,7 @@ fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a st
}
if let Value::String(string) = value {
Some(&string)
Some(string)
} else if inner(value, buffer) {
Some(buffer)
} else {

View File

@ -67,7 +67,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
facet_exists_docids.entry(field_id).or_default().insert(document);
// For the other extraction tasks, prefix the key with the field_id and the document_id
key_buffer.extend_from_slice(&docid_bytes);
key_buffer.extend_from_slice(docid_bytes);
let value =
serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
@ -107,8 +107,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?;
Ok((
sorter_into_reader(fid_docid_facet_numbers_sorter, indexer.clone())?,
sorter_into_reader(fid_docid_facet_strings_sorter, indexer.clone())?,
sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
facet_exists_docids_reader,
))
}

View File

@ -150,7 +150,7 @@ pub(crate) fn data_from_obkv_documents(
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_fid_facet_numbers_chunks,
indexer,
lmdb_writer_sx.clone(),
lmdb_writer_sx,
extract_facet_number_docids,
merge_cbo_roaring_bitmaps,
TypedChunk::FieldIdFacetNumberDocids,

View File

@ -30,9 +30,8 @@ pub fn index_prefix_word_database(
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
let common_prefixes: Vec<_> = common_prefix_fst_words
.into_iter()
.map(|s| s.into_iter())
.flatten()
.iter()
.flat_map(|s| s.iter())
.map(|s| s.as_str())
.filter(|s| s.len() <= max_prefix_length)
.collect();
@ -73,7 +72,7 @@ pub fn index_prefix_word_database(
// Now we do the same thing with the new prefixes and all word pairs in the DB
let new_prefixes: Vec<_> = new_prefix_fst_words
.into_iter()
.iter()
.map(|s| s.as_str())
.filter(|s| s.len() <= max_prefix_length)
.collect();

View File

@ -195,9 +195,8 @@ pub fn index_word_prefix_database(
// Make a prefix trie from the common prefixes that are shorter than self.max_prefix_length
let prefixes = PrefixTrieNode::from_sorted_prefixes(
common_prefix_fst_words
.into_iter()
.map(|s| s.into_iter())
.flatten()
.iter()
.flat_map(|s| s.iter())
.map(|s| s.as_str())
.filter(|s| s.len() <= max_prefix_length),
);
@ -237,10 +236,7 @@ pub fn index_word_prefix_database(
// Now we do the same thing with the new prefixes and all word pairs in the DB
let prefixes = PrefixTrieNode::from_sorted_prefixes(
new_prefix_fst_words
.into_iter()
.map(|s| s.as_str())
.filter(|s| s.len() <= max_prefix_length),
new_prefix_fst_words.iter().map(|s| s.as_str()).filter(|s| s.len() <= max_prefix_length),
);
if !prefixes.is_empty() {
@ -366,7 +362,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
&mut prefix_buffer,
&prefix_search_start,
|prefix_buffer| {
batch.insert(&prefix_buffer, data.to_vec());
batch.insert(prefix_buffer, data.to_vec());
},
);
}
@ -484,7 +480,7 @@ impl PrefixTrieNode {
fn set_search_start(&self, word: &[u8], search_start: &mut PrefixTrieNodeSearchStart) -> bool {
let byte = word[0];
if self.children[search_start.0].1 == byte {
return true;
true
} else {
match self.children[search_start.0..].binary_search_by_key(&byte, |x| x.1) {
Ok(position) => {
@ -502,7 +498,7 @@ impl PrefixTrieNode {
fn from_sorted_prefixes<'a>(prefixes: impl Iterator<Item = &'a str>) -> Self {
let mut node = PrefixTrieNode::default();
for prefix in prefixes {
node.insert_sorted_prefix(prefix.as_bytes().into_iter());
node.insert_sorted_prefix(prefix.as_bytes().iter());
}
node
}