Merge pull request #5930 from meilisearch/synonym-performance-fix

Synonym performance fix
This commit is contained in:
Many the fish
2025-10-07 15:17:34 +00:00
committed by GitHub
3 changed files with 22 additions and 3 deletions

View File

@@ -21,7 +21,7 @@ mod vector_sort;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
use std::collections::HashSet; use std::collections::{HashMap, HashSet};
use std::ops::AddAssign; use std::ops::AddAssign;
use std::time::Duration; use std::time::Duration;
@@ -64,6 +64,12 @@ use crate::{
UserError, Weight, UserError, Weight,
}; };
/// Cache for synonyms to avoid repeated database access
#[derive(Default)]
pub struct SynonymCache {
pub cache: Option<HashMap<Vec<String>, Vec<Vec<String>>>>,
}
/// A structure used throughout the execution of a search query. /// A structure used throughout the execution of a search query.
pub struct SearchContext<'ctx> { pub struct SearchContext<'ctx> {
pub index: &'ctx Index, pub index: &'ctx Index,
@@ -73,6 +79,7 @@ pub struct SearchContext<'ctx> {
pub phrase_interner: DedupInterner<Phrase>, pub phrase_interner: DedupInterner<Phrase>,
pub term_interner: Interner<QueryTerm>, pub term_interner: Interner<QueryTerm>,
pub phrase_docids: PhraseDocIdsCache, pub phrase_docids: PhraseDocIdsCache,
pub synonym_cache: SynonymCache,
pub restricted_fids: Option<RestrictedFids>, pub restricted_fids: Option<RestrictedFids>,
pub prefix_search: PrefixSearch, pub prefix_search: PrefixSearch,
pub vector_store_stats: Option<VectorStoreStats>, pub vector_store_stats: Option<VectorStoreStats>,
@@ -103,6 +110,7 @@ impl<'ctx> SearchContext<'ctx> {
phrase_interner: <_>::default(), phrase_interner: <_>::default(),
term_interner: <_>::default(), term_interner: <_>::default(),
phrase_docids: <_>::default(), phrase_docids: <_>::default(),
synonym_cache: <_>::default(),
restricted_fids: None, restricted_fids: None,
prefix_search, prefix_search,
vector_store_stats: None, vector_store_stats: None,
@@ -113,6 +121,17 @@ impl<'ctx> SearchContext<'ctx> {
self.prefix_search != PrefixSearch::Disabled self.prefix_search != PrefixSearch::Disabled
} }
/// Get synonyms with caching to avoid repeated database access
pub fn get_synonyms(&mut self) -> Result<&HashMap<Vec<String>, Vec<Vec<String>>>> {
match self.synonym_cache.cache {
Some(ref synonyms) => Ok(synonyms),
None => {
let synonyms = self.index.synonyms(self.txn)?;
Ok(self.synonym_cache.cache.insert(synonyms))
}
}
}
pub fn attributes_to_search_on( pub fn attributes_to_search_on(
&mut self, &mut self,
attributes_to_search_on: &'ctx [String], attributes_to_search_on: &'ctx [String],

View File

@@ -214,7 +214,7 @@ pub fn partially_initialized_term_from_word(
if is_prefix && use_prefix_db.is_none() { if is_prefix && use_prefix_db.is_none() {
find_zero_typo_prefix_derivations(ctx, word_interned, &mut prefix_of)?; find_zero_typo_prefix_derivations(ctx, word_interned, &mut prefix_of)?;
} }
let synonyms = ctx.index.synonyms(ctx.txn)?; let synonyms = ctx.get_synonyms()?;
let mut synonym_word_count = 0; let mut synonym_word_count = 0;
let synonyms = synonyms let synonyms = synonyms
.get(&vec![word.to_owned()]) .get(&vec![word.to_owned()])

View File

@@ -258,7 +258,7 @@ pub fn make_ngram(
partially_initialized_term_from_word(ctx, &ngram_str, max_nbr_typos, is_prefix, true)?; partially_initialized_term_from_word(ctx, &ngram_str, max_nbr_typos, is_prefix, true)?;
// Now add the synonyms // Now add the synonyms
let index_synonyms = ctx.index.synonyms(ctx.txn)?; let index_synonyms = ctx.get_synonyms()?;
term.zero_typo.synonyms.extend( term.zero_typo.synonyms.extend(
index_synonyms.get(&words).cloned().unwrap_or_default().into_iter().map(|words| { index_synonyms.get(&words).cloned().unwrap_or_default().into_iter().map(|words| {