mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Merge #4804
4804: Implements the experimental contains filter operator r=irevoire a=irevoire # Pull Request Related PRD: (private link) https://www.notion.so/meilisearch/Contains-Like-Filter-Operator-0d8ad53c6761466f913432eb1d843f1e Public usage page: https://meilisearch.notion.site/Contains-filter-operator-usage-3e7421b0aacf45f48ab09abe259a1de6 ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/3613 ## What does this PR do? - Extract the contains operator from this PR: https://github.com/meilisearch/meilisearch/pull/3751 - Gate it behind a feature flag - Add tests Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
		| @@ -38,6 +38,7 @@ heed = { version = "0.20.3", default-features = false, features = [ | ||||
| indexmap = { version = "2.2.6", features = ["serde"] } | ||||
| json-depth-checker = { path = "../json-depth-checker" } | ||||
| levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } | ||||
| memchr = "2.5.0" | ||||
| memmap2 = "0.9.4" | ||||
| obkv = "0.2.2" | ||||
| once_cell = "1.19.0" | ||||
|   | ||||
| @@ -2140,6 +2140,47 @@ pub(crate) mod tests { | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_contains() { | ||||
|         let index = TempIndex::new(); | ||||
|  | ||||
|         index | ||||
|             .update_settings(|settings| { | ||||
|                 settings.set_filterable_fields(hashset! { S("doggo") }); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|         index | ||||
|             .add_documents(documents!([ | ||||
|                 { "id": 0, "doggo": "kefir" }, | ||||
|                 { "id": 1, "doggo": "kefirounet" }, | ||||
|                 { "id": 2, "doggo": "kefkef" }, | ||||
|                 { "id": 3, "doggo": "fifir" }, | ||||
|                 { "id": 4, "doggo": "boubou" }, | ||||
|                 { "id": 5 }, | ||||
|             ])) | ||||
|             .unwrap(); | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let mut search = index.search(&rtxn); | ||||
|         let search_result = search | ||||
|             .filter(Filter::from_str("doggo CONTAINS kefir").unwrap().unwrap()) | ||||
|             .execute() | ||||
|             .unwrap(); | ||||
|         insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1]>"); | ||||
|         let mut search = index.search(&rtxn); | ||||
|         let search_result = search | ||||
|             .filter(Filter::from_str("doggo CONTAINS KEF").unwrap().unwrap()) | ||||
|             .execute() | ||||
|             .unwrap(); | ||||
|         insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2]>"); | ||||
|         let mut search = index.search(&rtxn); | ||||
|         let search_result = search | ||||
|             .filter(Filter::from_str("doggo NOT CONTAINS fir").unwrap().unwrap()) | ||||
|             .execute() | ||||
|             .unwrap(); | ||||
|         insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2, 4, 5]>"); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn replace_documents_external_ids_and_soft_deletion_check() { | ||||
|         use big_s::S; | ||||
|   | ||||
| @@ -4,6 +4,8 @@ use std::ops::Bound::{self, Excluded, Included}; | ||||
|  | ||||
| use either::Either; | ||||
| pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token}; | ||||
| use heed::types::LazyDecode; | ||||
| use memchr::memmem::Finder; | ||||
| use roaring::{MultiOps, RoaringBitmap}; | ||||
| use serde_json::Value; | ||||
|  | ||||
| @@ -12,7 +14,11 @@ use crate::error::{Error, UserError}; | ||||
| use crate::heed_codec::facet::{ | ||||
|     FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec, | ||||
| }; | ||||
| use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result}; | ||||
| use crate::index::db_name::FACET_ID_STRING_DOCIDS; | ||||
| use crate::{ | ||||
|     distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result, | ||||
|     SerializationError, | ||||
| }; | ||||
|  | ||||
| /// The maximum number of filters the filter AST can process. | ||||
| const MAX_FILTER_DEPTH: usize = 2000; | ||||
| @@ -218,6 +224,10 @@ impl<'a> Filter<'a> { | ||||
|  | ||||
|         Ok(Some(Self { condition })) | ||||
|     } | ||||
|  | ||||
|     pub fn use_contains_operator(&self) -> Option<&Token> { | ||||
|         self.condition.use_contains_operator() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> Filter<'a> { | ||||
| @@ -295,6 +305,41 @@ impl<'a> Filter<'a> { | ||||
|                 let all_ids = index.documents_ids(rtxn)?; | ||||
|                 return Ok(all_ids - docids); | ||||
|             } | ||||
|             Condition::Contains { keyword: _, word } => { | ||||
|                 let value = crate::normalize_facet(word.value()); | ||||
|                 let finder = Finder::new(&value); | ||||
|                 let base = FacetGroupKey { field_id, level: 0, left_bound: "" }; | ||||
|                 let docids = strings_db | ||||
|                     .prefix_iter(rtxn, &base)? | ||||
|                     .remap_data_type::<LazyDecode<FacetGroupValueCodec>>() | ||||
|                     .filter_map(|result| -> Option<Result<RoaringBitmap>> { | ||||
|                         match result { | ||||
|                             Ok((FacetGroupKey { left_bound, .. }, lazy_group_value)) => { | ||||
|                                 if finder.find(left_bound.as_bytes()).is_some() { | ||||
|                                     Some(lazy_group_value.decode().map(|gv| gv.bitmap).map_err( | ||||
|                                         |_| { | ||||
|                                             InternalError::from(SerializationError::Decoding { | ||||
|                                                 db_name: Some(FACET_ID_STRING_DOCIDS), | ||||
|                                             }) | ||||
|                                             .into() | ||||
|                                         }, | ||||
|                                     )) | ||||
|                                 } else { | ||||
|                                     None | ||||
|                                 } | ||||
|                             } | ||||
|                             Err(_e) => { | ||||
|                                 Some(Err(InternalError::from(SerializationError::Decoding { | ||||
|                                     db_name: Some(FACET_ID_STRING_DOCIDS), | ||||
|                                 }) | ||||
|                                 .into())) | ||||
|                             } | ||||
|                         } | ||||
|                     }) | ||||
|                     .union()?; | ||||
|  | ||||
|                 return Ok(docids); | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         let mut output = RoaringBitmap::new(); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user