mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Merge #1045
1045: Revert "Merge #1037" r=MarinPostma a=MarinPostma This reverts commit257f9fb2b2, reversing changes made to9bae7a35bf. The reason fo this is that de-unicoding is not always desirable (for example is the case of CJK documents). This cannot be handled correctly for now, and will necessitate work on the tokenizer. Co-authored-by: mpostma <postma.marin@protonmail.com>
This commit is contained in:
		| @@ -12,14 +12,18 @@ pub struct Synonyms { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl Synonyms { | impl Synonyms { | ||||||
|     pub(crate) fn put_synonyms<A>(self, writer: &mut heed::RwTxn<MainT>, word: &[u8], synonyms: &fst::Set<A>) -> ZResult<()> |     pub fn put_synonyms<A>(self, writer: &mut heed::RwTxn<MainT>, word: &[u8], synonyms: &fst::Set<A>) -> ZResult<()> | ||||||
|     where A: AsRef<[u8]>, |     where A: AsRef<[u8]>, | ||||||
|     { |     { | ||||||
|         let bytes = synonyms.as_fst().as_bytes(); |         let bytes = synonyms.as_fst().as_bytes(); | ||||||
|         self.synonyms.put(writer, word, bytes) |         self.synonyms.put(writer, word, bytes) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub(crate) fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> { |     pub fn del_synonyms(self, writer: &mut heed::RwTxn<MainT>, word: &[u8]) -> ZResult<bool> { | ||||||
|  |         self.synonyms.delete(writer, word) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> { | ||||||
|         self.synonyms.clear(writer) |         self.synonyms.clear(writer) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -126,7 +126,7 @@ pub fn apply_settings_update( | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.synonyms { |     match settings.synonyms { | ||||||
|         UpdateState::Update(synonyms) => apply_synonyms_update(writer, index, canonicalize_synonyms(synonyms))? , |         UpdateState::Update(synonyms) => apply_synonyms_update(writer, index, synonyms)?, | ||||||
|         UpdateState::Clear => apply_synonyms_update(writer, index, BTreeMap::new())?, |         UpdateState::Clear => apply_synonyms_update(writer, index, BTreeMap::new())?, | ||||||
|         UpdateState::Nothing => (), |         UpdateState::Nothing => (), | ||||||
|     } |     } | ||||||
| @@ -138,18 +138,6 @@ pub fn apply_settings_update( | |||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
| fn canonicalize_synonyms(synonyms: BTreeMap<String, Vec<String>>) -> BTreeMap<String, Vec<String>> { |  | ||||||
|     let mut canonicalized = BTreeMap::new(); |  | ||||||
|     for (key, values) in synonyms { |  | ||||||
|         let deunicoded = deunicode::deunicode(&key); |  | ||||||
|         canonicalized |  | ||||||
|             .entry(deunicoded) |  | ||||||
|             .or_insert_with(Vec::new) |  | ||||||
|             .extend_from_slice(&values); |  | ||||||
|     } |  | ||||||
|     canonicalized |  | ||||||
| } |  | ||||||
|  |  | ||||||
| fn apply_attributes_for_faceting_update( | fn apply_attributes_for_faceting_update( | ||||||
|     writer: &mut heed::RwTxn<MainT>, |     writer: &mut heed::RwTxn<MainT>, | ||||||
|     index: &store::Index, |     index: &store::Index, | ||||||
|   | |||||||
| @@ -1829,38 +1829,3 @@ async fn update_documents_with_facet_distribution() { | |||||||
|     let (response2, _) = server.search_post(search).await; |     let (response2, _) = server.search_post(search).await; | ||||||
|     assert_json_eq!(expected_facet_distribution, response2["facetsDistribution"].clone()); |     assert_json_eq!(expected_facet_distribution, response2["facetsDistribution"].clone()); | ||||||
| } | } | ||||||
|  |  | ||||||
| #[actix_rt::test] |  | ||||||
| async fn test_search_synonyms_unicased() { |  | ||||||
|     let mut server = common::Server::with_uid("test"); |  | ||||||
|     let body = json!({ "uid": "test" }); |  | ||||||
|     server.create_index(body).await; |  | ||||||
|     let settings = json!({ |  | ||||||
|         "synonyms": { |  | ||||||
|             "cáse": ["truc"], |  | ||||||
|             "case": ["machin"] |  | ||||||
|         } |  | ||||||
|     }); |  | ||||||
|     server.update_all_settings(settings).await; |  | ||||||
|  |  | ||||||
|     let (response, _) = server.get_synonyms().await; |  | ||||||
|     assert_json_eq!(response, json!({"case":["machin", "truc"]})); |  | ||||||
|  |  | ||||||
|     let update = json!([ |  | ||||||
|         { |  | ||||||
|             "id": "1", |  | ||||||
|             "title": "truc" |  | ||||||
|         }, |  | ||||||
|     ]); |  | ||||||
|     server.add_or_update_multiple_documents(update).await; |  | ||||||
|  |  | ||||||
|     let search = json!({ |  | ||||||
|         "q": "case", |  | ||||||
|     }); |  | ||||||
|     let (response, _) = server.search_post(search).await; |  | ||||||
|     assert_eq!(response["hits"].as_array().unwrap().len(), 1); |  | ||||||
|  |  | ||||||
|     server.delete_synonyms().await; |  | ||||||
|     let (response, _) = server.get_synonyms().await; |  | ||||||
|     assert_json_eq!(response, json!({})); |  | ||||||
| } |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user