1224: fix synonyms normalization r=MarinPostma a=LegendreM

Synonyms needs to be indexed in ascendant order,
and the new normalization step for synonyms potentially changes this order
which break the indexation process
because "Harry Potter" > "HP"  but "harry potter" < "hp"

Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
bors[bot]
2021-02-04 15:37:33 +00:00
committed by GitHub
2 changed files with 15 additions and 4 deletions

View File

@@ -1,7 +1,7 @@
use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}};
use heed::Result as ZResult;
use fst::{set::OpBuilder, SetBuilder};
use fst::{SetBuilder, set::OpBuilder};
use sdset::SetBuf;
use meilisearch_schema::Schema;
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
@@ -299,15 +299,22 @@ pub fn apply_synonyms_update(
.fold(String::new(), |s, t| s + t.text())
}
// normalize synonyms and reorder them creating a BTreeMap
let synonyms: BTreeMap<String, Vec<String>> = synonyms.into_iter().map( |(word, alternatives)| {
let word = normalize(&analyzer, &word);
let alternatives = alternatives.into_iter().map(|text| normalize(&analyzer, &text)).collect();
(word, alternatives)
}).collect();
// index synonyms,
// synyonyms have to be ordered by key before indexation
let mut synonyms_builder = SetBuilder::memory();
synonyms_store.clear(writer)?;
for (word, alternatives) in synonyms {
let word = normalize(&analyzer, &word);
synonyms_builder.insert(&word)?;
let alternatives = {
let alternatives = alternatives.iter().map(|text| normalize(&analyzer, &text)).collect();
let alternatives = SetBuf::from_dirty(alternatives);
let mut alternatives_builder = SetBuilder::memory();
alternatives_builder.extend_iter(alternatives)?;

View File

@@ -171,6 +171,8 @@ async fn write_all_and_update() {
"synonyms": {
"road": ["street", "avenue"],
"street": ["avenue"],
"HP": ["Harry Potter"],
"Harry Potter": ["HP"]
},
"attributesForFaceting": ["title"],
});
@@ -208,6 +210,8 @@ async fn write_all_and_update() {
"synonyms": {
"road": ["street", "avenue"],
"street": ["avenue"],
"hp": ["harry potter"],
"harry potter": ["hp"]
},
"attributesForFaceting": ["title"],
});