mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	fix synonyms normalization
Synonyms needs to be indexed in ascendant order, and the new normalization step for synonyms potentially changes this order which break the indexation process because "Harry Potter" > "HP" but "harry potter" < "hp"
This commit is contained in:
		| @@ -1,7 +1,7 @@ | ||||
| use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}}; | ||||
|  | ||||
| use heed::Result as ZResult; | ||||
| use fst::{set::OpBuilder, SetBuilder}; | ||||
| use fst::{SetBuilder, set::OpBuilder}; | ||||
| use sdset::SetBuf; | ||||
| use meilisearch_schema::Schema; | ||||
| use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig}; | ||||
| @@ -299,15 +299,22 @@ pub fn apply_synonyms_update( | ||||
|             .fold(String::new(), |s, t| s + t.text()) | ||||
|     } | ||||
|      | ||||
|     // normalize synonyms and reorder them creating a BTreeMap | ||||
|     let synonyms: BTreeMap<String, Vec<String>> = synonyms.into_iter().map( |(word, alternatives)| { | ||||
|         let word = normalize(&analyzer, &word); | ||||
|         let alternatives = alternatives.into_iter().map(|text| normalize(&analyzer, &text)).collect(); | ||||
|  | ||||
|         (word, alternatives) | ||||
|     }).collect(); | ||||
|  | ||||
|     // index synonyms, | ||||
|     // synyonyms have to be ordered by key before indexation | ||||
|     let mut synonyms_builder = SetBuilder::memory(); | ||||
|     synonyms_store.clear(writer)?; | ||||
|     for (word, alternatives) in synonyms { | ||||
|         let word = normalize(&analyzer, &word); | ||||
|  | ||||
|         synonyms_builder.insert(&word)?; | ||||
|  | ||||
|         let alternatives = { | ||||
|             let alternatives = alternatives.iter().map(|text| normalize(&analyzer, &text)).collect(); | ||||
|             let alternatives = SetBuf::from_dirty(alternatives); | ||||
|             let mut alternatives_builder = SetBuilder::memory(); | ||||
|             alternatives_builder.extend_iter(alternatives)?; | ||||
|   | ||||
| @@ -171,6 +171,8 @@ async fn write_all_and_update() { | ||||
|         "synonyms": { | ||||
|             "road": ["street", "avenue"], | ||||
|             "street": ["avenue"], | ||||
|             "HP": ["Harry Potter"], | ||||
|             "Harry Potter": ["HP"] | ||||
|         }, | ||||
|         "attributesForFaceting": ["title"], | ||||
|     }); | ||||
| @@ -208,6 +210,8 @@ async fn write_all_and_update() { | ||||
|         "synonyms": { | ||||
|             "road": ["street", "avenue"], | ||||
|             "street": ["avenue"], | ||||
|             "hp": ["harry potter"], | ||||
|             "harry potter": ["hp"] | ||||
|         }, | ||||
|         "attributesForFaceting": ["title"], | ||||
|     }); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user