mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Make the search and the indexing work
This commit is contained in:
		| @@ -491,6 +491,20 @@ pub fn perform_search( | ||||
|         tokenizer_builder.allow_list(&script_lang_map); | ||||
|     } | ||||
|  | ||||
|     let separators = index.allowed_separators(&rtxn)?; | ||||
|     let separators: Option<Vec<_>> = | ||||
|         separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|     if let Some(ref separators) = separators { | ||||
|         tokenizer_builder.separators(separators); | ||||
|     } | ||||
|  | ||||
|     let dictionary = index.dictionary(&rtxn)?; | ||||
|     let dictionary: Option<Vec<_>> = | ||||
|         dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|     if let Some(ref dictionary) = dictionary { | ||||
|         tokenizer_builder.words_dict(dictionary); | ||||
|     } | ||||
|  | ||||
|     let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build()); | ||||
|     formatter_builder.crop_marker(query.crop_marker); | ||||
|     formatter_builder.highlight_prefix(query.highlight_pre_tag); | ||||
|   | ||||
| @@ -52,3 +52,145 @@ async fn set_and_reset() { | ||||
|     snapshot!(json_string!(response["separatorTokens"]), @"[]"); | ||||
|     snapshot!(json_string!(response["dictionary"]), @"[]"); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn set_and_search() { | ||||
|     let documents = json!([ | ||||
|         { | ||||
|             "id": 1, | ||||
|             "content": "Mac & cheese", | ||||
|         }, | ||||
|         { | ||||
|             "id": 2, | ||||
|             "content": "G#D#G#D#G#C#D#G#C#", | ||||
|         }, | ||||
|         { | ||||
|             "id": 3, | ||||
|             "content": "Mac&sep&&sepcheese", | ||||
|         }, | ||||
|     ]); | ||||
|  | ||||
|     let server = Server::new().await; | ||||
|     let index = server.index("test"); | ||||
|  | ||||
|     index.add_documents(documents, None).await; | ||||
|     index.wait_task(0).await; | ||||
|  | ||||
|     let (_response, _code) = index | ||||
|         .update_settings(json!({ | ||||
|             "nonSeparatorTokens": ["#", "&"], | ||||
|             "separatorTokens": ["<br/>", "&sep"], | ||||
|             "dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"], | ||||
|         })) | ||||
|         .await; | ||||
|     index.wait_task(1).await; | ||||
|  | ||||
|     index | ||||
|         .search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| { | ||||
|             snapshot!(code, @"200 OK"); | ||||
|             snapshot!(json_string!(response["hits"]), @r###" | ||||
|             [ | ||||
|               { | ||||
|                 "id": 1, | ||||
|                 "content": "Mac & cheese", | ||||
|                 "_formatted": { | ||||
|                   "id": "1", | ||||
|                   "content": "Mac <em>&</em> cheese" | ||||
|                 } | ||||
|               }, | ||||
|               { | ||||
|                 "id": 3, | ||||
|                 "content": "Mac&sep&&sepcheese", | ||||
|                 "_formatted": { | ||||
|                   "id": "3", | ||||
|                   "content": "Mac&sep<em>&</em>&sepcheese" | ||||
|                 } | ||||
|               } | ||||
|             ] | ||||
|             "###); | ||||
|         }) | ||||
|         .await; | ||||
|  | ||||
|     index | ||||
|         .search( | ||||
|             json!({"q": "Mac & cheese", "attributesToHighlight": ["content"]}), | ||||
|             |response, code| { | ||||
|                 snapshot!(code, @"200 OK"); | ||||
|                 snapshot!(json_string!(response["hits"]), @r###" | ||||
|                 [ | ||||
|                   { | ||||
|                     "id": 1, | ||||
|                     "content": "Mac & cheese", | ||||
|                     "_formatted": { | ||||
|                       "id": "1", | ||||
|                       "content": "<em>Mac</em> <em>&</em> <em>cheese</em>" | ||||
|                     } | ||||
|                   }, | ||||
|                   { | ||||
|                     "id": 3, | ||||
|                     "content": "Mac&sep&&sepcheese", | ||||
|                     "_formatted": { | ||||
|                       "id": "3", | ||||
|                       "content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>" | ||||
|                     } | ||||
|                   } | ||||
|                 ] | ||||
|                 "###); | ||||
|             }, | ||||
|         ) | ||||
|         .await; | ||||
|  | ||||
|     index | ||||
|         .search( | ||||
|             json!({"q": "Mac&sep&&sepcheese", "attributesToHighlight": ["content"]}), | ||||
|             |response, code| { | ||||
|                 snapshot!(code, @"200 OK"); | ||||
|                 snapshot!(json_string!(response["hits"]), @r###" | ||||
|                 [ | ||||
|                   { | ||||
|                     "id": 1, | ||||
|                     "content": "Mac & cheese", | ||||
|                     "_formatted": { | ||||
|                       "id": "1", | ||||
|                       "content": "<em>Mac</em> <em>&</em> <em>cheese</em>" | ||||
|                     } | ||||
|                   }, | ||||
|                   { | ||||
|                     "id": 3, | ||||
|                     "content": "Mac&sep&&sepcheese", | ||||
|                     "_formatted": { | ||||
|                       "id": "3", | ||||
|                       "content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>" | ||||
|                     } | ||||
|                   } | ||||
|                 ] | ||||
|                 "###); | ||||
|             }, | ||||
|         ) | ||||
|         .await; | ||||
|  | ||||
|     index | ||||
|         .search(json!({"q": "C#D#G", "attributesToHighlight": ["content"]}), |response, code| { | ||||
|             snapshot!(code, @"200 OK"); | ||||
|             snapshot!(json_string!(response["hits"]), @r###" | ||||
|             [ | ||||
|               { | ||||
|                 "id": 2, | ||||
|                 "content": "G#D#G#D#G#C#D#G#C#", | ||||
|                 "_formatted": { | ||||
|                   "id": "2", | ||||
|                   "content": "<em>G</em>#<em>D#</em><em>G</em>#<em>D#</em><em>G</em>#<em>C#</em><em>D#</em><em>G</em>#<em>C#</em>" | ||||
|                 } | ||||
|               } | ||||
|             ] | ||||
|             "###); | ||||
|         }) | ||||
|         .await; | ||||
|  | ||||
|     index | ||||
|         .search(json!({"q": "#", "attributesToHighlight": ["content"]}), |response, code| { | ||||
|             snapshot!(code, @"200 OK"); | ||||
|             snapshot!(json_string!(response["hits"]), @"[]"); | ||||
|         }) | ||||
|         .await; | ||||
| } | ||||
|   | ||||
| @@ -1094,10 +1094,7 @@ impl Index { | ||||
|  | ||||
|     /* separators easing method */ | ||||
|  | ||||
|     pub(crate) fn allowed_separators<'t>( | ||||
|         &self, | ||||
|         rtxn: &'t RoTxn, | ||||
|     ) -> Result<Option<BTreeSet<String>>> { | ||||
|     pub fn allowed_separators<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<BTreeSet<String>>> { | ||||
|         let default_separators = | ||||
|             charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string()); | ||||
|         let mut separators: Option<BTreeSet<_>> = None; | ||||
|   | ||||
| @@ -479,6 +479,20 @@ pub fn execute_search( | ||||
|             tokbuilder.stop_words(stop_words); | ||||
|         } | ||||
|  | ||||
|         let separators = ctx.index.allowed_separators(ctx.txn)?; | ||||
|         let separators: Option<Vec<_>> = | ||||
|             separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|         if let Some(ref separators) = separators { | ||||
|             tokbuilder.separators(separators); | ||||
|         } | ||||
|  | ||||
|         let dictionary = ctx.index.dictionary(ctx.txn)?; | ||||
|         let dictionary: Option<Vec<_>> = | ||||
|             dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|         if let Some(ref dictionary) = dictionary { | ||||
|             tokbuilder.words_dict(dictionary); | ||||
|         } | ||||
|  | ||||
|         let script_lang_map = ctx.index.script_language(ctx.txn)?; | ||||
|         if !script_lang_map.is_empty() { | ||||
|             tokbuilder.allow_list(&script_lang_map); | ||||
|   | ||||
| @@ -28,6 +28,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>( | ||||
|     indexer: GrenadParameters, | ||||
|     searchable_fields: &Option<HashSet<FieldId>>, | ||||
|     stop_words: Option<&fst::Set<&[u8]>>, | ||||
|     allowed_separators: Option<&Vec<&str>>, | ||||
|     dictionary: Option<&Vec<&str>>, | ||||
|     max_positions_per_attributes: Option<u32>, | ||||
| ) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> { | ||||
|     puffin::profile_function!(); | ||||
| @@ -52,6 +54,14 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>( | ||||
|     if let Some(stop_words) = stop_words { | ||||
|         tokenizer_builder.stop_words(stop_words); | ||||
|     } | ||||
|     if let Some(dictionary) = dictionary { | ||||
|         // let dictionary: Vec<_> = dictionary.iter().map(String::as_str).collect(); | ||||
|         tokenizer_builder.words_dict(dictionary.as_slice()); | ||||
|     } | ||||
|     if let Some(separators) = allowed_separators { | ||||
|         // let separators: Vec<_> = separators.iter().map(String::as_str).collect(); | ||||
|         tokenizer_builder.separators(separators.as_slice()); | ||||
|     } | ||||
|     let tokenizer = tokenizer_builder.build(); | ||||
|  | ||||
|     let mut cursor = obkv_documents.into_cursor()?; | ||||
|   | ||||
| @@ -49,6 +49,8 @@ pub(crate) fn data_from_obkv_documents( | ||||
|     geo_fields_ids: Option<(FieldId, FieldId)>, | ||||
|     vectors_field_id: Option<FieldId>, | ||||
|     stop_words: Option<fst::Set<&[u8]>>, | ||||
|     allowed_separators: Option<Vec<&str>>, | ||||
|     dictionary: Option<Vec<&str>>, | ||||
|     max_positions_per_attributes: Option<u32>, | ||||
|     exact_attributes: HashSet<FieldId>, | ||||
| ) -> Result<()> { | ||||
| @@ -76,6 +78,8 @@ pub(crate) fn data_from_obkv_documents( | ||||
|                     geo_fields_ids, | ||||
|                     vectors_field_id, | ||||
|                     &stop_words, | ||||
|                     &allowed_separators, | ||||
|                     &dictionary, | ||||
|                     max_positions_per_attributes, | ||||
|                 ) | ||||
|             }) | ||||
| @@ -289,6 +293,8 @@ fn send_and_extract_flattened_documents_data( | ||||
|     geo_fields_ids: Option<(FieldId, FieldId)>, | ||||
|     vectors_field_id: Option<FieldId>, | ||||
|     stop_words: &Option<fst::Set<&[u8]>>, | ||||
|     allowed_separators: &Option<Vec<&str>>, | ||||
|     dictionary: &Option<Vec<&str>>, | ||||
|     max_positions_per_attributes: Option<u32>, | ||||
| ) -> Result<( | ||||
|     grenad::Reader<CursorClonableMmap>, | ||||
| @@ -344,6 +350,8 @@ fn send_and_extract_flattened_documents_data( | ||||
|                         indexer, | ||||
|                         searchable_fields, | ||||
|                         stop_words.as_ref(), | ||||
|                         allowed_separators.as_ref(), | ||||
|                         dictionary.as_ref(), | ||||
|                         max_positions_per_attributes, | ||||
|                     )?; | ||||
|  | ||||
|   | ||||
| @@ -316,6 +316,12 @@ where | ||||
|         let vectors_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vectors"); | ||||
|  | ||||
|         let stop_words = self.index.stop_words(self.wtxn)?; | ||||
|         let separators = self.index.allowed_separators(self.wtxn)?; | ||||
|         let separators: Option<Vec<_>> = | ||||
|             separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|         let dictionary = self.index.dictionary(self.wtxn)?; | ||||
|         let dictionary: Option<Vec<_>> = | ||||
|             dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|         let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?; | ||||
|  | ||||
|         let pool_params = GrenadParameters { | ||||
| @@ -353,6 +359,8 @@ where | ||||
|                     geo_fields_ids, | ||||
|                     vectors_field_id, | ||||
|                     stop_words, | ||||
|                     separators, | ||||
|                     dictionary, | ||||
|                     max_positions_per_attributes, | ||||
|                     exact_attributes, | ||||
|                 ) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user