mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 01:46:28 +00:00 
			
		
		
		
	Fix bug in prefix DB indexing
Where the batch's information was not properly updated in cases where only the proximity changed between two consecutive word pair proximities. Closes https://github.com/meilisearch/meilisearch/issues/3043
This commit is contained in:
		@@ -238,4 +238,51 @@ mod tests {
 | 
			
		||||
        db_snap!(index, word_prefix_pair_proximity_docids, "update");
 | 
			
		||||
        db_snap!(index, prefix_word_pair_proximity_docids, "update");
 | 
			
		||||
    }
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn test_batch_bug_3034() {
 | 
			
		||||
        // https://github.com/meilisearch/meilisearch/issues/3043
 | 
			
		||||
        let mut index = TempIndex::new();
 | 
			
		||||
        index.index_documents_config.words_prefix_threshold = Some(50);
 | 
			
		||||
        index.index_documents_config.autogenerate_docids = true;
 | 
			
		||||
 | 
			
		||||
        index
 | 
			
		||||
            .update_settings(|settings| {
 | 
			
		||||
                settings.set_searchable_fields(vec!["text".to_owned()]);
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
        let batch_reader_from_documents = |documents| {
 | 
			
		||||
            let mut builder = DocumentsBatchBuilder::new(Vec::new());
 | 
			
		||||
            for object in documents {
 | 
			
		||||
                builder.append_json_object(&object).unwrap();
 | 
			
		||||
            }
 | 
			
		||||
            DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let mut documents = documents_with_enough_different_words_for_prefixes(&["y"]);
 | 
			
		||||
        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
 | 
			
		||||
        documents.push(
 | 
			
		||||
            serde_json::json!({
 | 
			
		||||
                "text": "x y"
 | 
			
		||||
            })
 | 
			
		||||
            .as_object()
 | 
			
		||||
            .unwrap()
 | 
			
		||||
            .clone(),
 | 
			
		||||
        );
 | 
			
		||||
        documents.push(
 | 
			
		||||
            serde_json::json!({
 | 
			
		||||
                "text": "x a y"
 | 
			
		||||
            })
 | 
			
		||||
            .as_object()
 | 
			
		||||
            .unwrap()
 | 
			
		||||
            .clone(),
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        let documents = batch_reader_from_documents(documents);
 | 
			
		||||
        index.add_documents(documents).unwrap();
 | 
			
		||||
 | 
			
		||||
        db_snap!(index, word_pair_proximity_docids);
 | 
			
		||||
        db_snap!(index, word_prefix_pair_proximity_docids);
 | 
			
		||||
        db_snap!(index, prefix_word_pair_proximity_docids);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -44,7 +44,7 @@ word2    : doggo
 | 
			
		||||
2. **Inner loop:** Then, we iterate over all the prefixes of `word2` that are
 | 
			
		||||
in the list of sorted prefixes. And we insert the key `prefix`
 | 
			
		||||
and the value (`docids`) to a sorted map which we call the “batch”. For example,
 | 
			
		||||
at the end of the first inner loop, we may have:
 | 
			
		||||
at the end of the first outer loop, we may have:
 | 
			
		||||
```text
 | 
			
		||||
Outer loop 1:
 | 
			
		||||
------------------------------
 | 
			
		||||
@@ -85,7 +85,7 @@ end of the batch.
 | 
			
		||||
 | 
			
		||||
4. On the third iteration of the outer loop, we have:
 | 
			
		||||
```text
 | 
			
		||||
Outer loop 4:
 | 
			
		||||
Outer loop 3:
 | 
			
		||||
------------------------------
 | 
			
		||||
proximity: 1
 | 
			
		||||
word1    : good
 | 
			
		||||
@@ -340,17 +340,16 @@ fn execute_on_word_pairs_and_prefixes<I>(
 | 
			
		||||
        if prox_different_than_prev || word1_different_than_prev || word2_start_different_than_prev
 | 
			
		||||
        {
 | 
			
		||||
            batch.flush(&mut merge_buffer, &mut insert)?;
 | 
			
		||||
            batch.proximity = proximity;
 | 
			
		||||
            // don't forget to reset the value of batch.word1 and prev_word2_start
 | 
			
		||||
            if word1_different_than_prev {
 | 
			
		||||
                prefix_search_start.0 = 0;
 | 
			
		||||
                batch.word1.clear();
 | 
			
		||||
                batch.word1.extend_from_slice(word1);
 | 
			
		||||
                batch.proximity = proximity;
 | 
			
		||||
            }
 | 
			
		||||
            if word2_start_different_than_prev {
 | 
			
		||||
                // word2_start_different_than_prev == true
 | 
			
		||||
                prev_word2_start = word2[0];
 | 
			
		||||
            }
 | 
			
		||||
            prefix_search_start.0 = 0;
 | 
			
		||||
            // Optimisation: find the search start in the prefix trie to iterate over the prefixes of word2
 | 
			
		||||
            empty_prefixes = !prefixes.set_search_start(word2, &mut prefix_search_start);
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user