mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Merge #1184
1184: normalize synonyms during indexation r=MarinPostma a=LegendreM fix #1135 #964 Normalizes the synonyms before indexing them, so they are not case sensitive anymore. Then normalization also involves deunicoding is some cases, such as accents, so `été` and `ete` are considered equivalent in a search for synonyms. Co-authored-by: many <maxime@meilisearch.com> Co-authored-by: Many <legendre.maxime.isn@gmail.com>
This commit is contained in:
		| @@ -1,9 +1,10 @@ | |||||||
| use std::collections::{BTreeMap, BTreeSet}; | use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}}; | ||||||
|  |  | ||||||
| use heed::Result as ZResult; | use heed::Result as ZResult; | ||||||
| use fst::{set::OpBuilder, SetBuilder}; | use fst::{set::OpBuilder, SetBuilder}; | ||||||
| use sdset::SetBuf; | use sdset::SetBuf; | ||||||
| use meilisearch_schema::Schema; | use meilisearch_schema::Schema; | ||||||
|  | use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig}; | ||||||
|  |  | ||||||
| use crate::database::{MainT, UpdateT}; | use crate::database::{MainT, UpdateT}; | ||||||
| use crate::settings::{UpdateState, SettingsUpdate, RankingRule}; | use crate::settings::{UpdateState, SettingsUpdate, RankingRule}; | ||||||
| @@ -289,13 +290,24 @@ pub fn apply_synonyms_update( | |||||||
|  |  | ||||||
|     let main_store = index.main; |     let main_store = index.main; | ||||||
|     let synonyms_store = index.synonyms; |     let synonyms_store = index.synonyms; | ||||||
|  |     let stop_words = index.main.stop_words_fst(writer)?.map_data(Cow::into_owned)?; | ||||||
|  |     let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||||
|  |  | ||||||
|  |     fn normalize<T: AsRef<[u8]>>(analyzer: &Analyzer<T>, text: &str) -> String { | ||||||
|  |         analyzer.analyze(&text) | ||||||
|  |             .tokens() | ||||||
|  |             .fold(String::new(), |s, t| s + t.text()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     let mut synonyms_builder = SetBuilder::memory(); |     let mut synonyms_builder = SetBuilder::memory(); | ||||||
|     synonyms_store.clear(writer)?; |     synonyms_store.clear(writer)?; | ||||||
|     for (word, alternatives) in synonyms.clone() { |     for (word, alternatives) in synonyms { | ||||||
|  |         let word = normalize(&analyzer, &word); | ||||||
|  |  | ||||||
|         synonyms_builder.insert(&word)?; |         synonyms_builder.insert(&word)?; | ||||||
|  |  | ||||||
|         let alternatives = { |         let alternatives = { | ||||||
|  |             let alternatives = alternatives.iter().map(|text| normalize(&analyzer, &text)).collect(); | ||||||
|             let alternatives = SetBuf::from_dirty(alternatives); |             let alternatives = SetBuf::from_dirty(alternatives); | ||||||
|             let mut alternatives_builder = SetBuilder::memory(); |             let mut alternatives_builder = SetBuilder::memory(); | ||||||
|             alternatives_builder.extend_iter(alternatives)?; |             alternatives_builder.extend_iter(alternatives)?; | ||||||
|   | |||||||
| @@ -167,6 +167,89 @@ async fn search_with_settings_stop_words() { | |||||||
| async fn search_with_settings_synonyms() { | async fn search_with_settings_synonyms() { | ||||||
|     let mut server = common::Server::test_server().await; |     let mut server = common::Server::test_server().await; | ||||||
|  |  | ||||||
|  |     let config = json!({ | ||||||
|  |       "rankingRules": [ | ||||||
|  |         "typo", | ||||||
|  |         "words", | ||||||
|  |         "proximity", | ||||||
|  |         "attribute", | ||||||
|  |         "wordsPosition", | ||||||
|  |         "desc(age)", | ||||||
|  |         "exactness", | ||||||
|  |         "desc(balance)" | ||||||
|  |       ], | ||||||
|  |       "distinctAttribute": null, | ||||||
|  |       "searchableAttributes": [ | ||||||
|  |         "name", | ||||||
|  |         "age", | ||||||
|  |         "color", | ||||||
|  |         "gender", | ||||||
|  |         "email", | ||||||
|  |         "address", | ||||||
|  |         "about" | ||||||
|  |       ], | ||||||
|  |       "displayedAttributes": [ | ||||||
|  |         "name", | ||||||
|  |         "age", | ||||||
|  |         "gender", | ||||||
|  |         "color", | ||||||
|  |         "email", | ||||||
|  |         "phone", | ||||||
|  |         "address", | ||||||
|  |         "balance" | ||||||
|  |       ], | ||||||
|  |       "stopWords": null, | ||||||
|  |       "synonyms": { | ||||||
|  |           "Application": [ | ||||||
|  |               "Exercitation" | ||||||
|  |           ] | ||||||
|  |       }, | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |     server.update_all_settings(config).await; | ||||||
|  |  | ||||||
|  |     let query = "q=application&limit=3"; | ||||||
|  |     let expect = json!([ | ||||||
|  |       { | ||||||
|  |         "balance": "$1,921.58", | ||||||
|  |         "age": 31, | ||||||
|  |         "color": "Green", | ||||||
|  |         "name": "Harper Carson", | ||||||
|  |         "gender": "male", | ||||||
|  |         "email": "harpercarson@chorizon.com", | ||||||
|  |         "phone": "+1 (912) 430-3243", | ||||||
|  |         "address": "883 Dennett Place, Knowlton, New Mexico, 9219" | ||||||
|  |       }, | ||||||
|  |       { | ||||||
|  |         "balance": "$1,706.13", | ||||||
|  |         "age": 27, | ||||||
|  |         "color": "Green", | ||||||
|  |         "name": "Cherry Orr", | ||||||
|  |         "gender": "female", | ||||||
|  |         "email": "cherryorr@chorizon.com", | ||||||
|  |         "phone": "+1 (995) 479-3174", | ||||||
|  |         "address": "442 Beverly Road, Ventress, New Mexico, 3361" | ||||||
|  |       }, | ||||||
|  |       { | ||||||
|  |         "balance": "$1,476.39", | ||||||
|  |         "age": 28, | ||||||
|  |         "color": "brown", | ||||||
|  |         "name": "Maureen Dale", | ||||||
|  |         "gender": "female", | ||||||
|  |         "email": "maureendale@chorizon.com", | ||||||
|  |         "phone": "+1 (984) 538-3684", | ||||||
|  |         "address": "817 Newton Street, Bannock, Wyoming, 1468" | ||||||
|  |       } | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let (response, _status_code) = server.search_get(query).await; | ||||||
|  |     assert_json_eq!(expect, response["hits"].clone(), ordered: false); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn search_with_settings_normalized_synonyms() { | ||||||
|  |     let mut server = common::Server::test_server().await; | ||||||
|  |  | ||||||
|     let config = json!({ |     let config = json!({ | ||||||
|       "rankingRules": [ |       "rankingRules": [ | ||||||
|         "typo", |         "typo", | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user