mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Merge branch 'filter/field-exist'
This commit is contained in:
		| @@ -6,6 +6,8 @@ mod facet_string_zero_bounds_value_codec; | ||||
| mod field_doc_id_facet_f64_codec; | ||||
| mod field_doc_id_facet_string_codec; | ||||
|  | ||||
| use heed::types::OwnedType; | ||||
|  | ||||
| pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec; | ||||
| pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec; | ||||
| pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec; | ||||
| @@ -15,6 +17,9 @@ pub use self::facet_string_level_zero_value_codec::{ | ||||
| pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec; | ||||
| pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec; | ||||
| pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec; | ||||
| use crate::BEU16; | ||||
|  | ||||
| pub type FieldIdCodec = OwnedType<BEU16>; | ||||
|  | ||||
| /// Tries to split a slice in half at the given middle point, | ||||
| /// `None` if the slice is too short. | ||||
|   | ||||
| @@ -15,13 +15,13 @@ use crate::error::{InternalError, UserError}; | ||||
| use crate::fields_ids_map::FieldsIdsMap; | ||||
| use crate::heed_codec::facet::{ | ||||
|     FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, | ||||
|     FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, | ||||
|     FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec, | ||||
| }; | ||||
| use crate::{ | ||||
|     default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, | ||||
|     DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, | ||||
|     FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, | ||||
|     Search, StrBEU32Codec, StrStrU8Codec, BEU32, | ||||
|     Search, StrBEU32Codec, StrStrU8Codec, BEU16, BEU32, | ||||
| }; | ||||
|  | ||||
| pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; | ||||
| @@ -75,6 +75,7 @@ pub mod db_name { | ||||
|     pub const WORD_PREFIX_POSITION_DOCIDS: &str = "word-prefix-position-docids"; | ||||
|     pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids"; | ||||
|     pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids"; | ||||
|     pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids"; | ||||
|     pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; | ||||
|     pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; | ||||
|     pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; | ||||
| @@ -116,6 +117,9 @@ pub struct Index { | ||||
|     /// Maps the position of a word prefix with all the docids where this prefix appears. | ||||
|     pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>, | ||||
|  | ||||
|     /// Maps the facet field id and the docids for which this field exists | ||||
|     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, | ||||
|  | ||||
|     /// Maps the facet field id, level and the number with the docids that corresponds to it. | ||||
|     pub facet_id_f64_docids: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>, | ||||
|     /// Maps the facet field id and the string with the original string and docids that corresponds to it. | ||||
| @@ -134,7 +138,7 @@ impl Index { | ||||
|     pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> Result<Index> { | ||||
|         use db_name::*; | ||||
|  | ||||
|         options.max_dbs(16); | ||||
|         options.max_dbs(17); | ||||
|         unsafe { options.flag(Flags::MdbAlwaysFreePages) }; | ||||
|  | ||||
|         let env = options.open(path)?; | ||||
| @@ -152,6 +156,8 @@ impl Index { | ||||
|         let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?; | ||||
|         let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?; | ||||
|         let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; | ||||
|         let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?; | ||||
|  | ||||
|         let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; | ||||
|         let field_id_docid_facet_strings = | ||||
|             env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?; | ||||
| @@ -174,6 +180,7 @@ impl Index { | ||||
|             field_id_word_count_docids, | ||||
|             facet_id_f64_docids, | ||||
|             facet_id_string_docids, | ||||
|             facet_id_exists_docids, | ||||
|             field_id_docid_facet_f64s, | ||||
|             field_id_docid_facet_strings, | ||||
|             documents, | ||||
| @@ -806,6 +813,18 @@ impl Index { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Retrieve all the documents which contain this field id | ||||
|     pub fn exists_faceted_documents_ids( | ||||
|         &self, | ||||
|         rtxn: &RoTxn, | ||||
|         field_id: FieldId, | ||||
|     ) -> heed::Result<RoaringBitmap> { | ||||
|         match self.facet_id_exists_docids.get(rtxn, &BEU16::new(field_id))? { | ||||
|             Some(docids) => Ok(docids), | ||||
|             None => Ok(RoaringBitmap::new()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /* distinct field */ | ||||
|  | ||||
|     pub(crate) fn put_distinct_field( | ||||
|   | ||||
| @@ -44,6 +44,7 @@ pub use self::search::{ | ||||
| pub type Result<T> = std::result::Result<T, error::Error>; | ||||
|  | ||||
| pub type Attribute = u32; | ||||
| pub type BEU16 = heed::zerocopy::U16<heed::byteorder::BE>; | ||||
| pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>; | ||||
| pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>; | ||||
| pub type DocumentId = u32; | ||||
|   | ||||
| @@ -280,6 +280,18 @@ impl<'a> Filter<'a> { | ||||
|             Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.parse()?)), | ||||
|             Condition::LowerThanOrEqual(val) => (Included(f64::MIN), Included(val.parse()?)), | ||||
|             Condition::Between { from, to } => (Included(from.parse()?), Included(to.parse()?)), | ||||
|             Condition::Exists => { | ||||
|                 let exist = index.exists_faceted_documents_ids(rtxn, field_id)?; | ||||
|                 return Ok(exist); | ||||
|             } | ||||
|             Condition::NotExists => { | ||||
|                 let all_ids = index.documents_ids(rtxn)?; | ||||
|  | ||||
|                 let exist = Self::evaluate_operator(rtxn, index, field_id, &Condition::Exists)?; | ||||
|  | ||||
|                 let notexist = all_ids - exist; | ||||
|                 return Ok(notexist); | ||||
|             } | ||||
|             Condition::Equal(val) => { | ||||
|                 let (_original_value, string_docids) = strings_db | ||||
|                     .get(rtxn, &(field_id, &val.value().to_lowercase()))? | ||||
|   | ||||
| @@ -30,6 +30,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | ||||
|             word_prefix_position_docids, | ||||
|             facet_id_f64_docids, | ||||
|             facet_id_string_docids, | ||||
|             facet_id_exists_docids, | ||||
|             field_id_docid_facet_f64s, | ||||
|             field_id_docid_facet_strings, | ||||
|             documents, | ||||
| @@ -69,6 +70,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | ||||
|         field_id_word_count_docids.clear(self.wtxn)?; | ||||
|         word_prefix_position_docids.clear(self.wtxn)?; | ||||
|         facet_id_f64_docids.clear(self.wtxn)?; | ||||
|         facet_id_exists_docids.clear(self.wtxn)?; | ||||
|         facet_id_string_docids.clear(self.wtxn)?; | ||||
|         field_id_docid_facet_f64s.clear(self.wtxn)?; | ||||
|         field_id_docid_facet_strings.clear(self.wtxn)?; | ||||
|   | ||||
| @@ -170,6 +170,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | ||||
|             word_position_docids, | ||||
|             word_prefix_position_docids, | ||||
|             facet_id_f64_docids, | ||||
|             facet_id_exists_docids, | ||||
|             facet_id_string_docids, | ||||
|             field_id_docid_facet_f64s, | ||||
|             field_id_docid_facet_strings, | ||||
| @@ -424,11 +425,17 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | ||||
|         } | ||||
|  | ||||
|         // We delete the documents ids that are under the facet field id values. | ||||
|         remove_docids_from_facet_field_id_number_docids( | ||||
|         remove_docids_from_facet_field_id_docids( | ||||
|             self.wtxn, | ||||
|             facet_id_f64_docids, | ||||
|             &self.to_delete_docids, | ||||
|         )?; | ||||
|         // We delete the documents ids that are under the facet field id values. | ||||
|         remove_docids_from_facet_field_id_docids( | ||||
|             self.wtxn, | ||||
|             facet_id_exists_docids, | ||||
|             &self.to_delete_docids, | ||||
|         )?; | ||||
|  | ||||
|         remove_docids_from_facet_field_id_string_docids( | ||||
|             self.wtxn, | ||||
| @@ -618,7 +625,7 @@ fn remove_docids_from_facet_field_id_string_docids<'a, C, D>( | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| fn remove_docids_from_facet_field_id_number_docids<'a, C>( | ||||
| fn remove_docids_from_facet_field_id_docids<'a, C>( | ||||
|     wtxn: &'a mut heed::RwTxn, | ||||
|     db: &heed::Database<C, CboRoaringBitmapCodec>, | ||||
|     to_remove: &RoaringBitmap, | ||||
|   | ||||
| @@ -1,15 +1,19 @@ | ||||
| use std::collections::HashSet; | ||||
| use std::collections::{BTreeMap, HashSet}; | ||||
| use std::convert::TryInto; | ||||
| use std::fs::File; | ||||
| use std::io; | ||||
| use std::mem::size_of; | ||||
|  | ||||
| use heed::zerocopy::AsBytes; | ||||
| use heed::BytesEncode; | ||||
| use roaring::RoaringBitmap; | ||||
| use serde_json::Value; | ||||
|  | ||||
| use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters}; | ||||
| use crate::error::InternalError; | ||||
| use crate::facet::value_encoding::f64_into_bytes; | ||||
| use crate::{DocumentId, FieldId, Result}; | ||||
| use crate::update::index_documents::{create_writer, writer_into_reader}; | ||||
| use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32}; | ||||
|  | ||||
| /// Extracts the facet values of each faceted field of each document. | ||||
| /// | ||||
| @@ -20,7 +24,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | ||||
|     obkv_documents: grenad::Reader<R>, | ||||
|     indexer: GrenadParameters, | ||||
|     faceted_fields: &HashSet<FieldId>, | ||||
| ) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> { | ||||
| ) -> Result<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)> { | ||||
|     let max_memory = indexer.max_memory_by_thread(); | ||||
|  | ||||
|     let mut fid_docid_facet_numbers_sorter = create_sorter( | ||||
| @@ -39,6 +43,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | ||||
|         max_memory.map(|m| m / 2), | ||||
|     ); | ||||
|  | ||||
|     let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new(); | ||||
|  | ||||
|     let mut key_buffer = Vec::new(); | ||||
|     let mut cursor = obkv_documents.into_cursor()?; | ||||
|     while let Some((docid_bytes, value)) = cursor.move_on_next()? { | ||||
| @@ -46,16 +52,26 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | ||||
|  | ||||
|         for (field_id, field_bytes) in obkv.iter() { | ||||
|             if faceted_fields.contains(&field_id) { | ||||
|                 let value = | ||||
|                     serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?; | ||||
|                 let (numbers, strings) = extract_facet_values(&value); | ||||
|  | ||||
|                 key_buffer.clear(); | ||||
|  | ||||
|                 // prefix key with the field_id and the document_id | ||||
|                 // Set key to the field_id | ||||
|                 // Note: this encoding is consistent with FieldIdCodec | ||||
|                 key_buffer.extend_from_slice(&field_id.to_be_bytes()); | ||||
|  | ||||
|                 // Here, we know already that the document must be added to the “field id exists” database | ||||
|                 let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap(); | ||||
|                 let document = BEU32::from(document).get(); | ||||
|  | ||||
|                 facet_exists_docids.entry(field_id).or_default().insert(document); | ||||
|  | ||||
|                 // For the other extraction tasks, prefix the key with the field_id and the document_id | ||||
|                 key_buffer.extend_from_slice(&docid_bytes); | ||||
|  | ||||
|                 let value = | ||||
|                     serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?; | ||||
|  | ||||
|                 let (numbers, strings) = extract_facet_values(&value); | ||||
|  | ||||
|                 // insert facet numbers in sorter | ||||
|                 for number in numbers { | ||||
|                     key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>()); | ||||
| @@ -77,9 +93,21 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let mut facet_exists_docids_writer = create_writer( | ||||
|         indexer.chunk_compression_type, | ||||
|         indexer.chunk_compression_level, | ||||
|         tempfile::tempfile()?, | ||||
|     ); | ||||
|     for (fid, bitmap) in facet_exists_docids.into_iter() { | ||||
|         let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap(); | ||||
|         facet_exists_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?; | ||||
|     } | ||||
|     let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?; | ||||
|  | ||||
|     Ok(( | ||||
|         sorter_into_reader(fid_docid_facet_numbers_sorter, indexer.clone())?, | ||||
|         sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?, | ||||
|         sorter_into_reader(fid_docid_facet_strings_sorter, indexer.clone())?, | ||||
|         facet_exists_docids_reader, | ||||
|     )) | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -53,7 +53,7 @@ pub(crate) fn data_from_obkv_documents( | ||||
|         }) | ||||
|         .collect::<Result<()>>()?; | ||||
|  | ||||
|     let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = flattened_obkv_chunks | ||||
|     let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))> = flattened_obkv_chunks | ||||
|         .par_bridge() | ||||
|         .map(|flattened_obkv_chunks| { | ||||
|             send_and_extract_flattened_documents_data( | ||||
| @@ -72,9 +72,28 @@ pub(crate) fn data_from_obkv_documents( | ||||
|  | ||||
|     let ( | ||||
|         docid_word_positions_chunks, | ||||
|         (docid_fid_facet_numbers_chunks, docid_fid_facet_strings_chunks), | ||||
|         ( | ||||
|             docid_fid_facet_numbers_chunks, | ||||
|             (docid_fid_facet_strings_chunks, facet_exists_docids_chunks), | ||||
|         ), | ||||
|     ) = result?; | ||||
|  | ||||
|     // merge facet_exists_docids and send them as a typed chunk | ||||
|     { | ||||
|         let lmdb_writer_sx = lmdb_writer_sx.clone(); | ||||
|         rayon::spawn(move || { | ||||
|             debug!("merge {} database", "facet-id-exists-docids"); | ||||
|             match facet_exists_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) { | ||||
|                 Ok(reader) => { | ||||
|                     let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader))); | ||||
|                 } | ||||
|                 Err(e) => { | ||||
|                     let _ = lmdb_writer_sx.send(Err(e)); | ||||
|                 } | ||||
|             } | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>( | ||||
|         docid_word_positions_chunks.clone(), | ||||
|         indexer.clone(), | ||||
| @@ -197,6 +216,7 @@ fn send_original_documents_data( | ||||
| /// - docid_word_positions | ||||
| /// - docid_fid_facet_numbers | ||||
| /// - docid_fid_facet_strings | ||||
| /// - docid_fid_facet_exists | ||||
| fn send_and_extract_flattened_documents_data( | ||||
|     flattened_documents_chunk: Result<grenad::Reader<File>>, | ||||
|     indexer: GrenadParameters, | ||||
| @@ -209,7 +229,10 @@ fn send_and_extract_flattened_documents_data( | ||||
|     max_positions_per_attributes: Option<u32>, | ||||
| ) -> Result<( | ||||
|     grenad::Reader<CursorClonableMmap>, | ||||
|     (grenad::Reader<CursorClonableMmap>, grenad::Reader<CursorClonableMmap>), | ||||
|     ( | ||||
|         grenad::Reader<CursorClonableMmap>, | ||||
|         (grenad::Reader<CursorClonableMmap>, grenad::Reader<File>), | ||||
|     ), | ||||
| )> { | ||||
|     let flattened_documents_chunk = | ||||
|         flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?; | ||||
| @@ -250,12 +273,15 @@ fn send_and_extract_flattened_documents_data( | ||||
|                 Ok(docid_word_positions_chunk) | ||||
|             }, | ||||
|             || { | ||||
|                 let (docid_fid_facet_numbers_chunk, docid_fid_facet_strings_chunk) = | ||||
|                     extract_fid_docid_facet_values( | ||||
|                         flattened_documents_chunk.clone(), | ||||
|                         indexer.clone(), | ||||
|                         faceted_fields, | ||||
|                     )?; | ||||
|                 let ( | ||||
|                     docid_fid_facet_numbers_chunk, | ||||
|                     docid_fid_facet_strings_chunk, | ||||
|                     fid_facet_exists_docids_chunk, | ||||
|                 ) = extract_fid_docid_facet_values( | ||||
|                     flattened_documents_chunk.clone(), | ||||
|                     indexer.clone(), | ||||
|                     faceted_fields, | ||||
|                 )?; | ||||
|  | ||||
|                 // send docid_fid_facet_numbers_chunk to DB writer | ||||
|                 let docid_fid_facet_numbers_chunk = | ||||
| @@ -273,7 +299,10 @@ fn send_and_extract_flattened_documents_data( | ||||
|                     docid_fid_facet_strings_chunk.clone(), | ||||
|                 ))); | ||||
|  | ||||
|                 Ok((docid_fid_facet_numbers_chunk, docid_fid_facet_strings_chunk)) | ||||
|                 Ok(( | ||||
|                     docid_fid_facet_numbers_chunk, | ||||
|                     (docid_fid_facet_strings_chunk, fid_facet_exists_docids_chunk), | ||||
|                 )) | ||||
|             }, | ||||
|         ); | ||||
|  | ||||
|   | ||||
| @@ -613,6 +613,7 @@ mod tests { | ||||
|     use super::*; | ||||
|     use crate::documents::DocumentsBatchBuilder; | ||||
|     use crate::update::DeleteDocuments; | ||||
|     use crate::BEU16; | ||||
|  | ||||
|     #[test] | ||||
|     fn simple_document_replacement() { | ||||
| @@ -2040,6 +2041,109 @@ mod tests { | ||||
|         assert_eq!(ids.len(), map.len()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn index_documents_check_exists_database() { | ||||
|         let config = IndexerConfig::default(); | ||||
|         let indexing_config = IndexDocumentsConfig::default(); | ||||
|  | ||||
|         let faceted_fields = hashset!(S("colour")); | ||||
|         let content = || { | ||||
|             documents!([ | ||||
|                 { | ||||
|                     "id": 0, | ||||
|                     "colour": 0, | ||||
|                 }, | ||||
|                 { | ||||
|                     "id": 1, | ||||
|                     "colour": [] | ||||
|                 }, | ||||
|                 { | ||||
|                     "id": 2, | ||||
|                     "colour": {} | ||||
|                 }, | ||||
|                 { | ||||
|                     "id": 3, | ||||
|                     "colour": null | ||||
|                 }, | ||||
|                 { | ||||
|                     "id": 4, | ||||
|                     "colour": [1] | ||||
|                 }, | ||||
|                 { | ||||
|                     "id": 5 | ||||
|                 }, | ||||
|                 { | ||||
|                     "id": 6, | ||||
|                     "colour": { | ||||
|                         "green": 1 | ||||
|                     } | ||||
|                 }, | ||||
|                 { | ||||
|                     "id": 7, | ||||
|                     "colour": { | ||||
|                         "green": { | ||||
|                             "blue": [] | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             ]) | ||||
|         }; | ||||
|         let make_index = || { | ||||
|             let path = tempfile::tempdir().unwrap(); | ||||
|             let mut options = EnvOpenOptions::new(); | ||||
|             options.map_size(10 * 1024 * 1024); // 10 MB | ||||
|             Index::new(options, &path).unwrap() | ||||
|         }; | ||||
|  | ||||
|         let set_filterable_fields = |index: &Index| { | ||||
|             let mut wtxn = index.write_txn().unwrap(); | ||||
|             let mut builder = update::Settings::new(&mut wtxn, &index, &config); | ||||
|             builder.set_filterable_fields(faceted_fields.clone()); | ||||
|             builder.execute(|_| ()).unwrap(); | ||||
|             wtxn.commit().unwrap(); | ||||
|         }; | ||||
|         let add_documents = |index: &Index| { | ||||
|             let mut wtxn = index.write_txn().unwrap(); | ||||
|             let builder = | ||||
|                 IndexDocuments::new(&mut wtxn, index, &config, indexing_config.clone(), |_| ()) | ||||
|                     .unwrap(); | ||||
|             let (builder, user_error) = builder.add_documents(content()).unwrap(); | ||||
|             user_error.unwrap(); | ||||
|             builder.execute().unwrap(); | ||||
|             wtxn.commit().unwrap(); | ||||
|         }; | ||||
|  | ||||
|         let check_ok = |index: &Index| { | ||||
|             let rtxn = index.read_txn().unwrap(); | ||||
|             let facets = index.faceted_fields(&rtxn).unwrap(); | ||||
|             assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue"))); | ||||
|  | ||||
|             let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); | ||||
|             let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); | ||||
|  | ||||
|             let bitmap_colour = | ||||
|                 index.facet_id_exists_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap(); | ||||
|             assert_eq!(bitmap_colour.into_iter().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 6, 7]); | ||||
|  | ||||
|             let bitmap_colour_green = index | ||||
|                 .facet_id_exists_docids | ||||
|                 .get(&rtxn, &BEU16::new(colour_green_id)) | ||||
|                 .unwrap() | ||||
|                 .unwrap(); | ||||
|             assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![6, 7]); | ||||
|         }; | ||||
|  | ||||
|         let index = make_index(); | ||||
|         add_documents(&index); | ||||
|         set_filterable_fields(&index); | ||||
|         check_ok(&index); | ||||
|  | ||||
|         let index = make_index(); | ||||
|         set_filterable_fields(&index); | ||||
|         add_documents(&index); | ||||
|         check_ok(&index); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn primary_key_must_not_contain_floats() { | ||||
|         let tmp = tempfile::tempdir().unwrap(); | ||||
|   | ||||
| @@ -35,6 +35,7 @@ pub(crate) enum TypedChunk { | ||||
|     WordPairProximityDocids(grenad::Reader<File>), | ||||
|     FieldIdFacetStringDocids(grenad::Reader<File>), | ||||
|     FieldIdFacetNumberDocids(grenad::Reader<File>), | ||||
|     FieldIdFacetExistsDocids(grenad::Reader<File>), | ||||
|     GeoPoints(grenad::Reader<File>), | ||||
| } | ||||
|  | ||||
| @@ -146,6 +147,17 @@ pub(crate) fn write_typed_chunk_into_index( | ||||
|             )?; | ||||
|             is_merged_database = true; | ||||
|         } | ||||
|         TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => { | ||||
|             append_entries_into_database( | ||||
|                 facet_id_exists_docids, | ||||
|                 &index.facet_id_exists_docids, | ||||
|                 wtxn, | ||||
|                 index_is_empty, | ||||
|                 |value, _buffer| Ok(value), | ||||
|                 merge_cbo_roaring_bitmaps, | ||||
|             )?; | ||||
|             is_merged_database = true; | ||||
|         } | ||||
|         TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { | ||||
|             append_entries_into_database( | ||||
|                 word_pair_proximity_docids_iter, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user