mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Merge #4033
4033: Fix thai synonyms r=Kerollmops a=Kerollmops Fixes #4031 Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		@@ -226,9 +226,9 @@ fn process_tokens<'a>(
 | 
			
		||||
) -> impl Iterator<Item = (usize, Token<'a>)> {
 | 
			
		||||
    tokens
 | 
			
		||||
        .skip_while(|token| token.is_separator())
 | 
			
		||||
        .scan((0, None), |(offset, prev_kind), token| {
 | 
			
		||||
        .scan((0, None), |(offset, prev_kind), mut token| {
 | 
			
		||||
            match token.kind {
 | 
			
		||||
                TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
 | 
			
		||||
                TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
 | 
			
		||||
                    *offset += match *prev_kind {
 | 
			
		||||
                        Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
 | 
			
		||||
                        Some(_) => 1,
 | 
			
		||||
@@ -244,7 +244,7 @@ fn process_tokens<'a>(
 | 
			
		||||
                {
 | 
			
		||||
                    *prev_kind = Some(token.kind);
 | 
			
		||||
                }
 | 
			
		||||
                _ => (),
 | 
			
		||||
                _ => token.kind = TokenKind::Unknown,
 | 
			
		||||
            }
 | 
			
		||||
            Some((*offset, token))
 | 
			
		||||
        })
 | 
			
		||||
 
 | 
			
		||||
@@ -573,7 +573,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
 | 
			
		||||
                    tokenizer
 | 
			
		||||
                        .tokenize(text)
 | 
			
		||||
                        .filter_map(|token| {
 | 
			
		||||
                            if token.is_word() {
 | 
			
		||||
                            if token.is_word() && !token.lemma().is_empty() {
 | 
			
		||||
                                Some(token.lemma().to_string())
 | 
			
		||||
                            } else {
 | 
			
		||||
                                None
 | 
			
		||||
@@ -1422,6 +1422,43 @@ mod tests {
 | 
			
		||||
        assert!(result.documents_ids.is_empty());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn thai_synonyms() {
 | 
			
		||||
        let mut index = TempIndex::new();
 | 
			
		||||
        index.index_documents_config.autogenerate_docids = true;
 | 
			
		||||
 | 
			
		||||
        let mut wtxn = index.write_txn().unwrap();
 | 
			
		||||
        // Send 3 documents with ids from 1 to 3.
 | 
			
		||||
        index
 | 
			
		||||
            .add_documents_using_wtxn(
 | 
			
		||||
                &mut wtxn,
 | 
			
		||||
                documents!([
 | 
			
		||||
                    { "name": "ยี่ปุ่น" },
 | 
			
		||||
                    { "name": "ญี่ปุ่น" },
 | 
			
		||||
                ]),
 | 
			
		||||
            )
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
        // In the same transaction provide some synonyms
 | 
			
		||||
        index
 | 
			
		||||
            .update_settings_using_wtxn(&mut wtxn, |settings| {
 | 
			
		||||
                settings.set_synonyms(btreemap! {
 | 
			
		||||
                    "japanese".to_string() => vec![S("ญี่ปุ่น"), S("ยี่ปุ่น")],
 | 
			
		||||
                });
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
        wtxn.commit().unwrap();
 | 
			
		||||
 | 
			
		||||
        // Ensure synonyms are effectively stored
 | 
			
		||||
        let rtxn = index.read_txn().unwrap();
 | 
			
		||||
        let synonyms = index.synonyms(&rtxn).unwrap();
 | 
			
		||||
        assert!(!synonyms.is_empty()); // at this point the index should return something
 | 
			
		||||
 | 
			
		||||
        // Check that we can use synonyms
 | 
			
		||||
        let result = index.search(&rtxn).query("japanese").execute().unwrap();
 | 
			
		||||
        assert_eq!(result.documents_ids.len(), 2);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn setting_searchable_recomputes_other_settings() {
 | 
			
		||||
        let index = TempIndex::new();
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user