mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 01:46:28 +00:00 
			
		
		
		
	Fix some facet indexing bugs
This commit is contained in:
		
				
					committed by
					
						
						Loïc Lecrenier
					
				
			
			
				
	
			
			
			
						parent
						
							68cbcdf08b
						
					
				
				
					commit
					61252248fb
				
			@@ -38,12 +38,13 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
 | 
			
		||||
 | 
			
		||||
        let (document_id_bytes, normalized_value_bytes) =
 | 
			
		||||
            try_split_array_at::<_, 4>(bytes).unwrap();
 | 
			
		||||
        let document_id = u32::from_be_bytes(document_id_bytes);
 | 
			
		||||
 | 
			
		||||
        let normalised_value = std::str::from_utf8(normalized_value_bytes)?;
 | 
			
		||||
        let key = FacetKey { field_id, level: 0, left_bound: normalised_value };
 | 
			
		||||
        let key_bytes = FacetKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
 | 
			
		||||
 | 
			
		||||
        facet_string_docids_sorter.insert(&key_bytes, &document_id_bytes)?;
 | 
			
		||||
        facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    sorter_into_reader(facet_string_docids_sorter, indexer)
 | 
			
		||||
 
 | 
			
		||||
@@ -592,7 +592,7 @@ mod tests {
 | 
			
		||||
    use crate::index::tests::TempIndex;
 | 
			
		||||
    use crate::search::TermsMatchingStrategy;
 | 
			
		||||
    use crate::update::DeleteDocuments;
 | 
			
		||||
    use crate::BEU16;
 | 
			
		||||
    use crate::{db_snap, BEU16};
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn simple_document_replacement() {
 | 
			
		||||
@@ -1379,6 +1379,25 @@ mod tests {
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
        db_snap!(index, facet_id_string_docids, @r###"
 | 
			
		||||
        3   0  first        1  [1, ]
 | 
			
		||||
        3   0  second       1  [2, ]
 | 
			
		||||
        3   0  third        1  [3, ]
 | 
			
		||||
        3   0  zeroth       1  [0, ]
 | 
			
		||||
        "###);
 | 
			
		||||
        db_snap!(index, field_id_docid_facet_strings, @r###"
 | 
			
		||||
        3   0    zeroth       zeroth
 | 
			
		||||
        3   1    first        first
 | 
			
		||||
        3   2    second       second
 | 
			
		||||
        3   3    third        third
 | 
			
		||||
        "###);
 | 
			
		||||
        db_snap!(index, string_faceted_documents_ids, @r###"
 | 
			
		||||
        0   []
 | 
			
		||||
        1   []
 | 
			
		||||
        2   []
 | 
			
		||||
        3   [0, 1, 2, 3, ]
 | 
			
		||||
        "###);
 | 
			
		||||
 | 
			
		||||
        let rtxn = index.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let hidden = index.faceted_fields(&rtxn).unwrap();
 | 
			
		||||
@@ -1399,6 +1418,15 @@ mod tests {
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
        db_snap!(index, facet_id_string_docids, @"");
 | 
			
		||||
        db_snap!(index, field_id_docid_facet_strings, @"");
 | 
			
		||||
        db_snap!(index, string_faceted_documents_ids, @r###"
 | 
			
		||||
        0   []
 | 
			
		||||
        1   []
 | 
			
		||||
        2   []
 | 
			
		||||
        3   [0, 1, 2, 3, ]
 | 
			
		||||
        "###);
 | 
			
		||||
 | 
			
		||||
        let rtxn = index.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let facets = index.faceted_fields(&rtxn).unwrap();
 | 
			
		||||
@@ -1412,6 +1440,25 @@ mod tests {
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
        db_snap!(index, facet_id_string_docids, @r###"
 | 
			
		||||
        3   0  first        1  [1, ]
 | 
			
		||||
        3   0  second       1  [2, ]
 | 
			
		||||
        3   0  third        1  [3, ]
 | 
			
		||||
        3   0  zeroth       1  [0, ]
 | 
			
		||||
        "###);
 | 
			
		||||
        db_snap!(index, field_id_docid_facet_strings, @r###"
 | 
			
		||||
        3   0    zeroth       zeroth
 | 
			
		||||
        3   1    first        first
 | 
			
		||||
        3   2    second       second
 | 
			
		||||
        3   3    third        third
 | 
			
		||||
        "###);
 | 
			
		||||
        db_snap!(index, string_faceted_documents_ids, @r###"
 | 
			
		||||
        0   []
 | 
			
		||||
        1   []
 | 
			
		||||
        2   []
 | 
			
		||||
        3   [0, 1, 2, 3, ]
 | 
			
		||||
        "###);
 | 
			
		||||
 | 
			
		||||
        let rtxn = index.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let facets = index.faceted_fields(&rtxn).unwrap();
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,5 @@
 | 
			
		||||
use std::borrow::Cow;
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
use std::convert::TryInto;
 | 
			
		||||
use std::fs::File;
 | 
			
		||||
use std::io;
 | 
			
		||||
@@ -17,8 +18,8 @@ use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
 | 
			
		||||
use crate::update::index_documents::helpers::as_cloneable_grenad;
 | 
			
		||||
use crate::update::FacetsUpdateIncremental;
 | 
			
		||||
use crate::{
 | 
			
		||||
    lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
 | 
			
		||||
    Result,
 | 
			
		||||
    lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint,
 | 
			
		||||
    Index, Result,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
pub(crate) enum TypedChunk {
 | 
			
		||||
@@ -138,14 +139,41 @@ pub(crate) fn write_typed_chunk_into_index(
 | 
			
		||||
            is_merged_database = true;
 | 
			
		||||
        }
 | 
			
		||||
        TypedChunk::FieldIdFacetNumberDocids(facet_id_f64_docids_iter) => {
 | 
			
		||||
            append_entries_into_database(
 | 
			
		||||
                facet_id_f64_docids_iter,
 | 
			
		||||
                &index.facet_id_f64_docids,
 | 
			
		||||
                wtxn,
 | 
			
		||||
                index_is_empty,
 | 
			
		||||
                |value, _buffer| Ok(value),
 | 
			
		||||
                merge_cbo_roaring_bitmaps,
 | 
			
		||||
            )?;
 | 
			
		||||
            // merge cbo roaring bitmaps is not the correct merger because the data in the DB
 | 
			
		||||
            // is FacetGroupValue and not RoaringBitmap
 | 
			
		||||
            // so I need to create my own merging function
 | 
			
		||||
 | 
			
		||||
            // facet_id_string_docids is encoded as:
 | 
			
		||||
            // key: FacetKeyCodec<StrRefCodec>
 | 
			
		||||
            // value: CboRoaringBitmapCodec
 | 
			
		||||
            // basically
 | 
			
		||||
 | 
			
		||||
            // TODO: a condition saying "if I have more than 1/50th of the DB to add,
 | 
			
		||||
            // then I do it in bulk, otherwise I do it incrementally". But instead of 1/50,
 | 
			
		||||
            // it is a ratio I determine empirically
 | 
			
		||||
 | 
			
		||||
            // for now I only do it incrementally, to see if things work
 | 
			
		||||
            let indexer = FacetsUpdateIncremental::new(
 | 
			
		||||
                index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
 | 
			
		||||
 | 
			
		||||
            let mut cursor = facet_id_f64_docids_iter.into_cursor()?;
 | 
			
		||||
            while let Some((key, value)) = cursor.move_on_next()? {
 | 
			
		||||
                let key =
 | 
			
		||||
                    FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?;
 | 
			
		||||
                let docids =
 | 
			
		||||
                    CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
 | 
			
		||||
                indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;
 | 
			
		||||
                *new_faceted_docids.entry(key.field_id).or_default() |= docids;
 | 
			
		||||
            }
 | 
			
		||||
            for (field_id, new_docids) in new_faceted_docids {
 | 
			
		||||
                let mut docids = index.number_faceted_documents_ids(wtxn, field_id)?;
 | 
			
		||||
                docids |= new_docids;
 | 
			
		||||
                index.put_number_faceted_documents_ids(wtxn, field_id, &docids)?;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            is_merged_database = true;
 | 
			
		||||
        }
 | 
			
		||||
        TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids) => {
 | 
			
		||||
@@ -163,16 +191,24 @@ pub(crate) fn write_typed_chunk_into_index(
 | 
			
		||||
            // it is a ratio I determine empirically
 | 
			
		||||
 | 
			
		||||
            // for now I only do it incrementally, to see if things work
 | 
			
		||||
            let builder = FacetsUpdateIncremental::new(
 | 
			
		||||
            let indexer = FacetsUpdateIncremental::new(
 | 
			
		||||
                index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
 | 
			
		||||
            );
 | 
			
		||||
            let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
 | 
			
		||||
 | 
			
		||||
            let mut cursor = facet_id_string_docids.into_cursor()?;
 | 
			
		||||
            while let Some((key, value)) = cursor.move_on_next()? {
 | 
			
		||||
                let key =
 | 
			
		||||
                    FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?;
 | 
			
		||||
                let value =
 | 
			
		||||
                let docids =
 | 
			
		||||
                    CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
 | 
			
		||||
                builder.insert(wtxn, key.field_id, key.left_bound, &value)?;
 | 
			
		||||
                indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;
 | 
			
		||||
                *new_faceted_docids.entry(key.field_id).or_default() |= docids;
 | 
			
		||||
            }
 | 
			
		||||
            for (field_id, new_docids) in new_faceted_docids {
 | 
			
		||||
                let mut docids = index.string_faceted_documents_ids(wtxn, field_id)?;
 | 
			
		||||
                docids |= new_docids;
 | 
			
		||||
                index.put_string_faceted_documents_ids(wtxn, field_id, &docids)?;
 | 
			
		||||
            }
 | 
			
		||||
            is_merged_database = true;
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user