mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-12 07:36:29 +00:00
Remove docid_word_positions_db + fix deletion bug
That would happen when a word was deleted from all exact attributes but not all regular attributes.
This commit is contained in:
@ -325,8 +325,6 @@ fn send_and_extract_flattened_documents_data(
|
||||
// send docid_word_positions_chunk to DB writer
|
||||
let docid_word_positions_chunk =
|
||||
unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? };
|
||||
let _ = lmdb_writer_sx
|
||||
.send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone())));
|
||||
|
||||
let _ =
|
||||
lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair)));
|
||||
|
@ -4,7 +4,6 @@ use std::result::Result as StdResult;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::read_u32_ne_bytes;
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::update::index_documents::transform::Operation;
|
||||
use crate::Result;
|
||||
@ -22,10 +21,6 @@ pub fn concat_u32s_array<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Co
|
||||
}
|
||||
}
|
||||
|
||||
pub fn roaring_bitmap_from_u32s_array(slice: &[u8]) -> RoaringBitmap {
|
||||
read_u32_ne_bytes(slice).collect()
|
||||
}
|
||||
|
||||
pub fn serialize_roaring_bitmap(bitmap: &RoaringBitmap, buffer: &mut Vec<u8>) -> io::Result<()> {
|
||||
buffer.clear();
|
||||
buffer.reserve(bitmap.serialized_size());
|
||||
|
@ -14,8 +14,8 @@ pub use grenad_helpers::{
|
||||
};
|
||||
pub use merge_functions::{
|
||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps,
|
||||
merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
|
||||
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn,
|
||||
merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs, serialize_roaring_bitmap,
|
||||
MergeFn,
|
||||
};
|
||||
|
||||
use crate::MAX_WORD_LENGTH;
|
||||
|
@ -2471,11 +2471,11 @@ mod tests {
|
||||
{
|
||||
"id": 3,
|
||||
"text": "a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a a a a a a
|
||||
a a a a a a a a a a a a a a a a a a a a a "
|
||||
}
|
||||
]))
|
||||
@ -2513,6 +2513,5 @@ mod tests {
|
||||
|
||||
db_snap!(index, word_fid_docids, 3, @"4c2e2a1832e5802796edc1638136d933");
|
||||
db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
|
||||
db_snap!(index, docid_word_positions, 3, @"5287245332627675740b28bd46e1cde1");
|
||||
}
|
||||
}
|
||||
|
@ -7,24 +7,19 @@ use std::io;
|
||||
use charabia::{Language, Script};
|
||||
use grenad::MergerBuilder;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::{BytesDecode, RwTxn};
|
||||
use heed::RwTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::helpers::{
|
||||
self, merge_ignore_values, roaring_bitmap_from_u32s_array, serialize_roaring_bitmap,
|
||||
valid_lmdb_key, CursorClonableMmap,
|
||||
self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
|
||||
};
|
||||
use super::{ClonableMmap, MergeFn};
|
||||
use crate::facet::FacetType;
|
||||
use crate::update::facet::FacetsUpdate;
|
||||
use crate::update::index_documents::helpers::as_cloneable_grenad;
|
||||
use crate::{
|
||||
lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
|
||||
Result,
|
||||
};
|
||||
use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
|
||||
|
||||
pub(crate) enum TypedChunk {
|
||||
DocidWordPositions(grenad::Reader<CursorClonableMmap>),
|
||||
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
||||
FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
|
||||
Documents(grenad::Reader<CursorClonableMmap>),
|
||||
@ -56,29 +51,6 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
) -> Result<(RoaringBitmap, bool)> {
|
||||
let mut is_merged_database = false;
|
||||
match typed_chunk {
|
||||
TypedChunk::DocidWordPositions(docid_word_positions_iter) => {
|
||||
write_entries_into_database(
|
||||
docid_word_positions_iter,
|
||||
&index.docid_word_positions,
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
|value, buffer| {
|
||||
// ensure that values are unique and ordered
|
||||
let positions = roaring_bitmap_from_u32s_array(value);
|
||||
BoRoaringBitmapCodec::serialize_into(&positions, buffer);
|
||||
Ok(buffer)
|
||||
},
|
||||
|new_values, db_values, buffer| {
|
||||
let new_values = roaring_bitmap_from_u32s_array(new_values);
|
||||
let positions = match BoRoaringBitmapCodec::bytes_decode(db_values) {
|
||||
Some(db_values) => new_values | db_values,
|
||||
None => new_values, // should not happen
|
||||
};
|
||||
BoRoaringBitmapCodec::serialize_into(&positions, buffer);
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
}
|
||||
TypedChunk::Documents(obkv_documents_iter) => {
|
||||
let mut cursor = obkv_documents_iter.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
|
Reference in New Issue
Block a user