mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Reintroduce filter range search and facet extractors
This commit is contained in:
		
				
					committed by
					
						 Loïc Lecrenier
						Loïc Lecrenier
					
				
			
			
				
	
			
			
			
						parent
						
							22d80eeaf9
						
					
				
				
					commit
					39a4a0a362
				
			| @@ -15,7 +15,7 @@ use super::get_last_facet_value; | ||||
|  | ||||
| pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>( | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
|     db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, | ||||
|     db: &'t heed::Database<FacetKeyCodec<BoundCodec>, FacetGroupValueCodec>, | ||||
|     field_id: u16, | ||||
|     left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, | ||||
|     right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, | ||||
| @@ -48,13 +48,13 @@ where | ||||
|         } | ||||
|         Bound::Unbounded => Bound::Unbounded, | ||||
|     }; | ||||
|  | ||||
|     let db = db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(); | ||||
|     let mut docids = RoaringBitmap::new(); | ||||
|     let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids }; | ||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||
|     let mut f = FacetRangeSearch { rtxn, db: &db, field_id, left, right, docids: &mut docids }; | ||||
|     let highest_level = get_highest_level(rtxn, &db, field_id)?; | ||||
|  | ||||
|     if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? { | ||||
|         let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap(); | ||||
|     if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, &db, field_id)? { | ||||
|         let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, &db, field_id)?.unwrap(); | ||||
|         f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?; | ||||
|         Ok(docids) | ||||
|     } else { | ||||
|   | ||||
| @@ -1,22 +1,17 @@ | ||||
| use std::collections::HashSet; | ||||
| use std::fmt::{Debug, Display}; | ||||
| use std::ops::Bound::{self, Excluded, Included}; | ||||
| use std::ops::RangeBounds; | ||||
|  | ||||
| use either::Either; | ||||
| pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token}; | ||||
| use heed::types::DecodeIgnore; | ||||
| use heed::LazyDecode; | ||||
| use roaring::RoaringBitmap; | ||||
| use std::collections::HashSet; | ||||
| use std::fmt::{Debug, Display}; | ||||
| use std::ops::Bound::{self, Excluded, Included}; | ||||
|  | ||||
| // use super::FacetNumberRange; | ||||
| use crate::error::{Error, UserError}; | ||||
| use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec}; | ||||
| // use crate::heed_codec::facet::FacetLevelValueF64Codec; | ||||
| use crate::{ | ||||
|     distance_between_two_points, lat_lng_to_xyz, CboRoaringBitmapCodec, FieldId, Index, Result, | ||||
| }; | ||||
| use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result}; | ||||
|  | ||||
| use super::facet_range_search; | ||||
|  | ||||
| /// The maximum number of filters the filter AST can process. | ||||
| const MAX_FILTER_DEPTH: usize = 2000; | ||||
| @@ -147,158 +142,15 @@ impl<'a> Filter<'a> { | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn explore_facet_number_levels( | ||||
|     rtxn: &heed::RoTxn, | ||||
|     db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||
|     field_id: FieldId, | ||||
| ) { | ||||
| } | ||||
|  | ||||
| impl<'a> Filter<'a> { | ||||
|     /// Aggregates the documents ids that are part of the specified range automatically | ||||
|     /// going deeper through the levels. | ||||
|     fn explore_facet_number_levels( | ||||
|         rtxn: &heed::RoTxn, | ||||
|         db: heed::Database<FacetKeyCodec<OrderedF64Codec>, CboRoaringBitmapCodec>, | ||||
|         field_id: FieldId, | ||||
|         level: u8, | ||||
|         left: Bound<f64>, | ||||
|         right: Bound<f64>, | ||||
|         output: &mut RoaringBitmap, | ||||
|     ) -> Result<()> { | ||||
|         // level must be > 0, I'll create a separate function for level 0 | ||||
|         // if level == 0 { | ||||
|         //      call that function | ||||
|         //} | ||||
|         match (left, right) { | ||||
|             // If the request is an exact value we must go directly to the deepest level. | ||||
|             (Included(l), Included(r)) if l == r && level > 0 => { | ||||
|                 return Self::explore_facet_number_levels( | ||||
|                     rtxn, db, field_id, 0, left, right, output, | ||||
|                 ); | ||||
|             } | ||||
|             // lower TO upper when lower > upper must return no result | ||||
|             (Included(l), Included(r)) if l > r => return Ok(()), | ||||
|             (Included(l), Excluded(r)) if l >= r => return Ok(()), | ||||
|             (Excluded(l), Excluded(r)) if l >= r => return Ok(()), | ||||
|             (Excluded(l), Included(r)) if l >= r => return Ok(()), | ||||
|             (_, _) => (), | ||||
|         } | ||||
|         let range_start_key = FacetKey { | ||||
|             field_id, | ||||
|             level, | ||||
|             left_bound: match left { | ||||
|                 Included(l) => l, | ||||
|                 Excluded(l) => l, | ||||
|                 Bound::Unbounded => f64::MIN, | ||||
|             }, | ||||
|         }; | ||||
|         let mut range_iter = db | ||||
|             .remap_data_type::<LazyDecode<FacetGroupValueCodec>>() | ||||
|             .range(rtxn, &(range_start_key..))?; | ||||
|     pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> { | ||||
|         // to avoid doing this for each recursive call we're going to do it ONCE ahead of time | ||||
|         let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?; | ||||
|         let filterable_fields = index.filterable_fields(rtxn)?; | ||||
|  | ||||
|         let (mut previous_facet_key, mut previous_value) = range_iter.next().unwrap()?; | ||||
|         while let Some(el) = range_iter.next() { | ||||
|             let (facet_key, value) = el?; | ||||
|             let range = (Included(previous_facet_key.left_bound), Excluded(facet_key.left_bound)); | ||||
|             // if the current range intersects with the query range, then go deeper | ||||
|             // what does it mean for two ranges to intersect? | ||||
|             let gte_left = match left { | ||||
|                 Included(l) => previous_facet_key.left_bound >= l, | ||||
|                 Excluded(l) => previous_facet_key.left_bound > l, // TODO: not true? | ||||
|                 Bound::Unbounded => true, | ||||
|             }; | ||||
|             let lte_right = match right { | ||||
|                 Included(r) => facet_key.left_bound <= r, | ||||
|                 Excluded(r) => facet_key.left_bound < r, | ||||
|                 Bound::Unbounded => true, | ||||
|             }; | ||||
|         } | ||||
|         // at this point, previous_facet_key and previous_value are the last groups in the level | ||||
|         // we must also check whether we should visit this group | ||||
|  | ||||
|         todo!(); | ||||
|  | ||||
|         // let mut left_found = None; | ||||
|         // let mut right_found = None; | ||||
|  | ||||
|         // // We must create a custom iterator to be able to iterate over the | ||||
|         // // requested range as the range iterator cannot express some conditions. | ||||
|         // let iter = FacetNumberRange::new(rtxn, db, field_id, level, left, right)?; | ||||
|  | ||||
|         // debug!("Iterating between {:?} and {:?} (level {})", left, right, level); | ||||
|  | ||||
|         // for (i, result) in iter.enumerate() { | ||||
|         //     let ((_fid, level, l, r), docids) = result?; | ||||
|         //     debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len()); | ||||
|         //     *output |= docids; | ||||
|         //     // We save the leftest and rightest bounds we actually found at this level. | ||||
|         //     if i == 0 { | ||||
|         //         left_found = Some(l); | ||||
|         //     } | ||||
|         //     right_found = Some(r); | ||||
|         // } | ||||
|  | ||||
|         // // Can we go deeper? | ||||
|         // let deeper_level = match level.checked_sub(1) { | ||||
|         //     Some(level) => level, | ||||
|         //     None => return Ok(()), | ||||
|         // }; | ||||
|  | ||||
|         // // We must refine the left and right bounds of this range by retrieving the | ||||
|         // // missing part in a deeper level. | ||||
|         // match left_found.zip(right_found) { | ||||
|         //     Some((left_found, right_found)) => { | ||||
|         //         // If the bound is satisfied we avoid calling this function again. | ||||
|         //         if !matches!(left, Included(l) if l == left_found) { | ||||
|         //             let sub_right = Excluded(left_found); | ||||
|         //             debug!( | ||||
|         //                 "calling left with {:?} to {:?} (level {})", | ||||
|         //                 left, sub_right, deeper_level | ||||
|         //             ); | ||||
|         //             Self::explore_facet_number_levels( | ||||
|         //                 rtxn, | ||||
|         //                 db, | ||||
|         //                 field_id, | ||||
|         //                 deeper_level, | ||||
|         //                 left, | ||||
|         //                 sub_right, | ||||
|         //                 output, | ||||
|         //             )?; | ||||
|         //         } | ||||
|         //         if !matches!(right, Included(r) if r == right_found) { | ||||
|         //             let sub_left = Excluded(right_found); | ||||
|         //             debug!( | ||||
|         //                 "calling right with {:?} to {:?} (level {})", | ||||
|         //                 sub_left, right, deeper_level | ||||
|         //             ); | ||||
|         //             Self::explore_facet_number_levels( | ||||
|         //                 rtxn, | ||||
|         //                 db, | ||||
|         //                 field_id, | ||||
|         //                 deeper_level, | ||||
|         //                 sub_left, | ||||
|         //                 right, | ||||
|         //                 output, | ||||
|         //             )?; | ||||
|         //         } | ||||
|         //     } | ||||
|         //     None => { | ||||
|         //         // If we found nothing at this level it means that we must find | ||||
|         //         // the same bounds but at a deeper, more precise level. | ||||
|         //         Self::explore_facet_number_levels( | ||||
|         //             rtxn, | ||||
|         //             db, | ||||
|         //             field_id, | ||||
|         //             deeper_level, | ||||
|         //             left, | ||||
|         //             right, | ||||
|         //             output, | ||||
|         //         )?; | ||||
|         //     } | ||||
|         // } | ||||
|  | ||||
|         // Ok(()) | ||||
|         // and finally we delete all the soft_deleted_documents, again, only once at the very end | ||||
|         self.inner_evaluate(rtxn, index, &filterable_fields) | ||||
|             .map(|result| result - soft_deleted_documents) | ||||
|     } | ||||
|  | ||||
|     fn evaluate_operator( | ||||
| @@ -337,15 +189,15 @@ impl<'a> Filter<'a> { | ||||
|                     Some(n) => { | ||||
|                         let n = Included(n); | ||||
|                         let mut output = RoaringBitmap::new(); | ||||
|                         // Self::explore_facet_number_levels( | ||||
|                         //     rtxn, | ||||
|                         //     numbers_db, | ||||
|                         //     field_id, | ||||
|                         //     0, | ||||
|                         //     n, | ||||
|                         //     n, | ||||
|                         //     &mut output, | ||||
|                         // )?; | ||||
|                         Self::explore_facet_number_levels( | ||||
|                             rtxn, | ||||
|                             numbers_db, | ||||
|                             field_id, | ||||
|                             0, | ||||
|                             n, | ||||
|                             n, | ||||
|                             &mut output, | ||||
|                         )?; | ||||
|                         output | ||||
|                     } | ||||
|                     None => RoaringBitmap::new(), | ||||
| @@ -381,29 +233,53 @@ impl<'a> Filter<'a> { | ||||
|         match biggest_level { | ||||
|             Some(level) => { | ||||
|                 let mut output = RoaringBitmap::new(); | ||||
|                 // Self::explore_facet_number_levels( | ||||
|                 //     rtxn, | ||||
|                 //     numbers_db, | ||||
|                 //     field_id, | ||||
|                 //     level, | ||||
|                 //     left, | ||||
|                 //     right, | ||||
|                 //     &mut output, | ||||
|                 // )?; | ||||
|                 Self::explore_facet_number_levels( | ||||
|                     rtxn, | ||||
|                     numbers_db, | ||||
|                     field_id, | ||||
|                     level, | ||||
|                     left, | ||||
|                     right, | ||||
|                     &mut output, | ||||
|                 )?; | ||||
|                 Ok(output) | ||||
|             } | ||||
|             None => Ok(RoaringBitmap::new()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> { | ||||
|         // to avoid doing this for each recursive call we're going to do it ONCE ahead of time | ||||
|         let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?; | ||||
|         let filterable_fields = index.filterable_fields(rtxn)?; | ||||
|     /// Aggregates the documents ids that are part of the specified range automatically | ||||
|     /// going deeper through the levels. | ||||
|     fn explore_facet_number_levels( | ||||
|         rtxn: &heed::RoTxn, | ||||
|         db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||
|         field_id: FieldId, | ||||
|         level: u8, | ||||
|         left: Bound<f64>, | ||||
|         right: Bound<f64>, | ||||
|         output: &mut RoaringBitmap, | ||||
|     ) -> Result<()> { | ||||
|         match (left, right) { | ||||
|             // If the request is an exact value we must go directly to the deepest level. | ||||
|             (Included(l), Included(r)) if l == r && level > 0 => { | ||||
|                 return Self::explore_facet_number_levels( | ||||
|                     rtxn, db, field_id, 0, left, right, output, | ||||
|                 ); | ||||
|             } | ||||
|             // lower TO upper when lower > upper must return no result | ||||
|             (Included(l), Included(r)) if l > r => return Ok(()), | ||||
|             (Included(l), Excluded(r)) if l >= r => return Ok(()), | ||||
|             (Excluded(l), Excluded(r)) if l >= r => return Ok(()), | ||||
|             (Excluded(l), Included(r)) if l >= r => return Ok(()), | ||||
|             (_, _) => (), | ||||
|         } | ||||
|         let x = facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>( | ||||
|             rtxn, &db, field_id, &left, &right, | ||||
|         )?; | ||||
|         // TODO: the facet range search should take a mutable roaring bitmap as argument | ||||
|         *output = x; | ||||
|  | ||||
|         // and finally we delete all the soft_deleted_documents, again, only once at the very end | ||||
|         self.inner_evaluate(rtxn, index, &filterable_fields) | ||||
|             .map(|result| result - soft_deleted_documents) | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn inner_evaluate( | ||||
|   | ||||
| @@ -2,22 +2,20 @@ use std::collections::btree_map::Entry; | ||||
|  | ||||
| use fst::IntoStreamer; | ||||
| use heed::types::{ByteSlice, Str}; | ||||
| use heed::{BytesDecode, BytesEncode, Database}; | ||||
| use obkv::Key; | ||||
| use heed::Database; | ||||
| use roaring::RoaringBitmap; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use serde_json::Value; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| use super::{ClearDocuments, Facets}; | ||||
| use crate::error::{InternalError, SerializationError, UserError}; | ||||
| // use crate::heed_codec::facet::FacetStringZeroBoundsValueCodec; | ||||
| use crate::error::{InternalError, UserError}; | ||||
| use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; | ||||
| use crate::heed_codec::CboRoaringBitmapCodec; | ||||
| use crate::index::{db_name, main_key}; | ||||
| use crate::{ | ||||
|     fields_ids_map, DocumentId, ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, | ||||
|     FieldsIdsMap, Index, Result, RoaringBitmapCodec, SmallString32, BEU32, | ||||
|     DocumentId, ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, | ||||
|     RoaringBitmapCodec, SmallString32, BEU32, | ||||
| }; | ||||
|  | ||||
| pub struct DeleteDocuments<'t, 'u, 'i> { | ||||
|   | ||||
| @@ -6,6 +6,8 @@ use heed::{BytesDecode, BytesEncode}; | ||||
| use super::helpers::{ | ||||
|     create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters, | ||||
| }; | ||||
| use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; | ||||
| use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec}; | ||||
| use crate::heed_codec::facet::FieldDocIdFacetF64Codec; | ||||
| use crate::Result; | ||||
|  | ||||
| @@ -31,14 +33,13 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>( | ||||
|  | ||||
|     let mut cursor = docid_fid_facet_number.into_cursor()?; | ||||
|     while let Some((key_bytes, _)) = cursor.move_on_next()? { | ||||
|         todo!() | ||||
|         // let (field_id, document_id, number) = | ||||
|         //     FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap(); | ||||
|         let (field_id, document_id, number) = | ||||
|             FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap(); | ||||
|  | ||||
|         // let key = (field_id, 0, number, number); | ||||
|         // // let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap(); | ||||
|         let key = FacetKey { field_id, level: 0, left_bound: number }; | ||||
|         let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap(); | ||||
|  | ||||
|         // facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?; | ||||
|         facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?; | ||||
|     } | ||||
|  | ||||
|     sorter_into_reader(facet_number_docids_sorter, indexer) | ||||
|   | ||||
| @@ -1,13 +1,11 @@ | ||||
| use std::fs::File; | ||||
| use std::iter::FromIterator; | ||||
| use std::{io, str}; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; | ||||
| use crate::heed_codec::facet::new::str_ref::StrRefCodec; | ||||
| use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec}; | ||||
| use crate::update::index_documents::merge_cbo_roaring_bitmaps; | ||||
| // use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec}; | ||||
| use crate::{FieldId, Result}; | ||||
| use heed::BytesEncode; | ||||
| use std::fs::File; | ||||
| use std::io; | ||||
|  | ||||
| /// Extracts the facet string and the documents ids where this facet string appear. | ||||
| /// | ||||
| @@ -22,38 +20,26 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>( | ||||
|  | ||||
|     let mut facet_string_docids_sorter = create_sorter( | ||||
|         grenad::SortAlgorithm::Stable, | ||||
|         merge_cbo_roaring_bitmaps, // TODO: check | ||||
|         merge_cbo_roaring_bitmaps, // TODO: check that it is correct | ||||
|         indexer.chunk_compression_type, | ||||
|         indexer.chunk_compression_level, | ||||
|         indexer.max_nb_chunks, | ||||
|         max_memory, | ||||
|     ); | ||||
|  | ||||
|     let mut key_buffer = Vec::new(); | ||||
|     let mut value_buffer = Vec::new(); | ||||
|     let mut cursor = docid_fid_facet_string.into_cursor()?; | ||||
|     while let Some((key, original_value_bytes)) = cursor.move_on_next()? { | ||||
|     while let Some((key, _original_value_bytes)) = cursor.move_on_next()? { | ||||
|         let (field_id_bytes, bytes) = try_split_array_at(key).unwrap(); | ||||
|         let field_id = FieldId::from_be_bytes(field_id_bytes); | ||||
|         let (document_id_bytes, normalized_value_bytes) = try_split_array_at(bytes).unwrap(); | ||||
|         let document_id = u32::from_be_bytes(document_id_bytes); | ||||
|         let original_value = str::from_utf8(original_value_bytes)?; | ||||
|  | ||||
|         key_buffer.clear(); | ||||
|         // TODO | ||||
|         // FacetStringLevelZeroCodec::serialize_into( | ||||
|         //     field_id, | ||||
|         //     str::from_utf8(normalized_value_bytes)?, | ||||
|         //     &mut key_buffer, | ||||
|         // ); | ||||
|         let (document_id_bytes, normalized_value_bytes) = | ||||
|             try_split_array_at::<_, 4>(bytes).unwrap(); | ||||
|  | ||||
|         value_buffer.clear(); | ||||
|         // TODO | ||||
|         // encode_prefix_string(original_value, &mut value_buffer)?; | ||||
|         let bitmap = RoaringBitmap::from_iter(Some(document_id)); | ||||
|         bitmap.serialize_into(&mut value_buffer)?; | ||||
|         let normalised_value = std::str::from_utf8(normalized_value_bytes)?; | ||||
|         let key = FacetKey { field_id, level: 0, left_bound: normalised_value }; | ||||
|         let key_bytes = FacetKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap(); | ||||
|  | ||||
|         facet_string_docids_sorter.insert(&key_buffer, &value_buffer)?; | ||||
|         facet_string_docids_sorter.insert(&key_bytes, &document_id_bytes)?; | ||||
|     } | ||||
|  | ||||
|     sorter_into_reader(facet_string_docids_sorter, indexer) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user