mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	feat: Introduce the synonyms concept to the Store trait
This commit is contained in:
		| @@ -349,7 +349,7 @@ mod tests { | |||||||
|     use std::iter::FromIterator; |     use std::iter::FromIterator; | ||||||
|  |  | ||||||
|     use sdset::SetBuf; |     use sdset::SetBuf; | ||||||
|     use fst::Set; |     use fst::{Set, IntoStreamer}; | ||||||
|  |  | ||||||
|     use crate::DocIndex; |     use crate::DocIndex; | ||||||
|     use crate::store::Store; |     use crate::store::Store; | ||||||
| @@ -357,18 +357,46 @@ mod tests { | |||||||
|     #[derive(Default)] |     #[derive(Default)] | ||||||
|     struct InMemorySetStore { |     struct InMemorySetStore { | ||||||
|         set: Set, |         set: Set, | ||||||
|  |         synonyms: Set, | ||||||
|         indexes: HashMap<Vec<u8>, SetBuf<DocIndex>>, |         indexes: HashMap<Vec<u8>, SetBuf<DocIndex>>, | ||||||
|  |         alternatives: HashMap<Vec<u8>, Set>, | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     impl Store for InMemorySetStore { |     fn set_from_stream<'f, I, S>(stream: I) -> Set | ||||||
|         type Error = std::io::Error; |     where | ||||||
|  |         I: for<'a> fst::IntoStreamer<'a, Into=S, Item=&'a [u8]>, | ||||||
|         fn words(&self) -> Result<&Set, Self::Error> { |         S: 'f + for<'a> fst::Streamer<'a, Item=&'a [u8]>, | ||||||
|             Ok(&self.set) |     { | ||||||
|  |         let mut builder = fst::SetBuilder::memory(); | ||||||
|  |         builder.extend_stream(stream); | ||||||
|  |         builder.into_inner().and_then(Set::from_bytes).unwrap() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|         fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> { |     fn insert_key(set: &Set, key: &[u8]) -> Set { | ||||||
|             Ok(self.indexes.get(word).cloned()) |         let unique_key = { | ||||||
|  |             let mut builder = fst::SetBuilder::memory(); | ||||||
|  |             builder.insert(key); | ||||||
|  |             builder.into_inner().and_then(Set::from_bytes).unwrap() | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let union_ = set.op().add(unique_key.into_stream()).r#union(); | ||||||
|  |  | ||||||
|  |         set_from_stream(union_) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn sdset_into_fstset(set: &sdset::Set<&str>) -> Set { | ||||||
|  |         let mut builder = fst::SetBuilder::memory(); | ||||||
|  |         builder.extend_iter(set.into_iter()); | ||||||
|  |         builder.into_inner().and_then(Set::from_bytes).unwrap() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     impl InMemorySetStore { | ||||||
|  |         pub fn add_synonym(&mut self, word: &str, new: SetBuf<&str>) { | ||||||
|  |             let alternatives = self.alternatives.entry(word.as_bytes().to_vec()).or_default(); | ||||||
|  |             let new = sdset_into_fstset(&new); | ||||||
|  |             *alternatives = set_from_stream(alternatives.op().add(new.into_stream()).r#union()); | ||||||
|  |  | ||||||
|  |             self.synonyms = insert_key(&self.synonyms, word.as_bytes()); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -384,11 +412,33 @@ mod tests { | |||||||
|  |  | ||||||
|             InMemorySetStore { |             InMemorySetStore { | ||||||
|                 set: Set::from_iter(tree).unwrap(), |                 set: Set::from_iter(tree).unwrap(), | ||||||
|  |                 synonyms: Set::default(), | ||||||
|                 indexes: map, |                 indexes: map, | ||||||
|  |                 alternatives: HashMap::new(), | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     impl Store for InMemorySetStore { | ||||||
|  |         type Error = std::io::Error; | ||||||
|  |  | ||||||
|  |         fn words(&self) -> Result<&Set, Self::Error> { | ||||||
|  |             Ok(&self.set) | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> { | ||||||
|  |             Ok(self.indexes.get(word).cloned()) | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         fn synonyms(&self) -> Result<&Set, Self::Error> { | ||||||
|  |             Ok(&self.synonyms) | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> { | ||||||
|  |             Ok(self.alternatives.get(word).map(|s| Set::from_bytes(s.as_fst().to_vec()).unwrap())) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     const fn doc_index(document_id: u64, word_index: u16) -> DocIndex { |     const fn doc_index(document_id: u64, word_index: u16) -> DocIndex { | ||||||
|         DocIndex { |         DocIndex { | ||||||
|             document_id: DocumentId(document_id), |             document_id: DocumentId(document_id), | ||||||
|   | |||||||
| @@ -8,6 +8,9 @@ pub trait Store { | |||||||
|  |  | ||||||
|     fn words(&self) -> Result<&Set, Self::Error>; |     fn words(&self) -> Result<&Set, Self::Error>; | ||||||
|     fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error>; |     fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error>; | ||||||
|  |  | ||||||
|  |     fn synonyms(&self) -> Result<&Set, Self::Error>; | ||||||
|  |     fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error>; | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<T> Store for &'_ T where T: Store { | impl<T> Store for &'_ T where T: Store { | ||||||
| @@ -20,4 +23,12 @@ impl<T> Store for &'_ T where T: Store { | |||||||
|     fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> { |     fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> { | ||||||
|         (*self).word_indexes(word) |         (*self).word_indexes(word) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn synonyms(&self) -> Result<&Set, Self::Error> { | ||||||
|  |         (*self).synonyms() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> { | ||||||
|  |         (*self).alternatives_to(word) | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user