mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 01:46:28 +00:00 
			
		
		
		
	Apply suggestions from code review
This commit is contained in:
		@@ -2,6 +2,7 @@ use std::borrow::Cow;
 | 
			
		||||
use std::collections::HashSet;
 | 
			
		||||
use std::io::BufReader;
 | 
			
		||||
 | 
			
		||||
use grenad::CompressionType;
 | 
			
		||||
use heed::types::ByteSlice;
 | 
			
		||||
 | 
			
		||||
use super::index_documents::{merge_cbo_roaring_bitmaps, CursorClonableMmap};
 | 
			
		||||
@@ -18,10 +19,24 @@ pub struct PrefixWordPairsProximityDocids<'t, 'u, 'i> {
 | 
			
		||||
    index: &'i Index,
 | 
			
		||||
    max_proximity: u8,
 | 
			
		||||
    max_prefix_length: usize,
 | 
			
		||||
    chunk_compression_type: CompressionType,
 | 
			
		||||
    chunk_compression_level: Option<u32>,
 | 
			
		||||
}
 | 
			
		||||
impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
 | 
			
		||||
    pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> Self {
 | 
			
		||||
        Self { wtxn, index, max_proximity: 4, max_prefix_length: 2 }
 | 
			
		||||
    pub fn new(
 | 
			
		||||
        wtxn: &'t mut heed::RwTxn<'i, 'u>,
 | 
			
		||||
        index: &'i Index,
 | 
			
		||||
        chunk_compression_type: CompressionType,
 | 
			
		||||
        chunk_compression_level: Option<u32>,
 | 
			
		||||
    ) -> Self {
 | 
			
		||||
        Self {
 | 
			
		||||
            wtxn,
 | 
			
		||||
            index,
 | 
			
		||||
            max_proximity: 4,
 | 
			
		||||
            max_prefix_length: 2,
 | 
			
		||||
            chunk_compression_type,
 | 
			
		||||
            chunk_compression_level,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    /// Set the maximum proximity required to make a prefix be part of the words prefixes
 | 
			
		||||
    /// database. If two words are too far from the threshold the associated documents will
 | 
			
		||||
@@ -42,6 +57,7 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
 | 
			
		||||
        self.max_prefix_length = value;
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[logging_timer::time("WordPrefixPairProximityDocids::{}")]
 | 
			
		||||
    pub fn execute<'a>(
 | 
			
		||||
        self,
 | 
			
		||||
@@ -60,6 +76,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
 | 
			
		||||
            new_prefix_fst_words,
 | 
			
		||||
            common_prefix_fst_words,
 | 
			
		||||
            del_prefix_fst_words,
 | 
			
		||||
            self.chunk_compression_type,
 | 
			
		||||
            self.chunk_compression_level,
 | 
			
		||||
        )?;
 | 
			
		||||
 | 
			
		||||
        index_prefix_word_database(
 | 
			
		||||
@@ -72,6 +90,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
 | 
			
		||||
            new_prefix_fst_words,
 | 
			
		||||
            common_prefix_fst_words,
 | 
			
		||||
            del_prefix_fst_words,
 | 
			
		||||
            self.chunk_compression_type,
 | 
			
		||||
            self.chunk_compression_level,
 | 
			
		||||
        )?;
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
 
 | 
			
		||||
@@ -23,6 +23,8 @@ pub fn index_prefix_word_database(
 | 
			
		||||
    new_prefix_fst_words: &[String],
 | 
			
		||||
    common_prefix_fst_words: &[&[String]],
 | 
			
		||||
    del_prefix_fst_words: &HashSet<Vec<u8>>,
 | 
			
		||||
    chunk_compression_type: CompressionType,
 | 
			
		||||
    chunk_compression_level: Option<u32>,
 | 
			
		||||
) -> Result<()> {
 | 
			
		||||
    let max_proximity = max_proximity - 1;
 | 
			
		||||
    debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
 | 
			
		||||
@@ -35,7 +37,7 @@ pub fn index_prefix_word_database(
 | 
			
		||||
        .filter(|s| s.len() <= max_prefix_length)
 | 
			
		||||
        .collect();
 | 
			
		||||
 | 
			
		||||
    for proximity in 1..=max_proximity - 1 {
 | 
			
		||||
    for proximity in 1..max_proximity {
 | 
			
		||||
        for prefix in common_prefixes.iter() {
 | 
			
		||||
            let mut prefix_key = vec![];
 | 
			
		||||
            prefix_key.push(proximity);
 | 
			
		||||
@@ -78,7 +80,8 @@ pub fn index_prefix_word_database(
 | 
			
		||||
 | 
			
		||||
    // Since we read the DB, we can't write to it directly, so we add each new (word1, prefix, proximity)
 | 
			
		||||
    // element in an intermediary grenad
 | 
			
		||||
    let mut writer = create_writer(CompressionType::None, None, tempfile::tempfile()?);
 | 
			
		||||
    let mut writer =
 | 
			
		||||
        create_writer(chunk_compression_type, chunk_compression_level, tempfile::tempfile()?);
 | 
			
		||||
 | 
			
		||||
    for proximity in 1..=max_proximity - 1 {
 | 
			
		||||
        for prefix in new_prefixes.iter() {
 | 
			
		||||
@@ -144,7 +147,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
 | 
			
		||||
    mut next_word2_and_docids: impl for<'a> FnMut(&'a mut I) -> Result<Option<(&'a [u8], &'a [u8])>>,
 | 
			
		||||
    mut insert: impl for<'a> FnMut(&'a [u8], &'a [u8]) -> Result<()>,
 | 
			
		||||
) -> Result<()> {
 | 
			
		||||
    let mut batch: BTreeMap<Vec<u8>, Vec<Cow<'static, [u8]>>> = <_>::default();
 | 
			
		||||
    let mut batch: BTreeMap<Vec<u8>, Vec<Cow<'static, [u8]>>> = BTreeMap::default();
 | 
			
		||||
 | 
			
		||||
    // Memory usage check:
 | 
			
		||||
    // The content of the loop will be called for each `word2` that follows a word beginning
 | 
			
		||||
 
 | 
			
		||||
@@ -187,6 +187,8 @@ pub fn index_word_prefix_database(
 | 
			
		||||
    new_prefix_fst_words: &[String],
 | 
			
		||||
    common_prefix_fst_words: &[&[String]],
 | 
			
		||||
    del_prefix_fst_words: &HashSet<Vec<u8>>,
 | 
			
		||||
    chunk_compression_type: CompressionType,
 | 
			
		||||
    chunk_compression_level: Option<u32>,
 | 
			
		||||
) -> Result<()> {
 | 
			
		||||
    debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
 | 
			
		||||
 | 
			
		||||
@@ -249,7 +251,8 @@ pub fn index_word_prefix_database(
 | 
			
		||||
 | 
			
		||||
        // Since we read the DB, we can't write to it directly, so we add each new (proximity, word1, prefix)
 | 
			
		||||
        // element in an intermediary grenad
 | 
			
		||||
        let mut writer = create_writer(CompressionType::None, None, tempfile::tempfile()?);
 | 
			
		||||
        let mut writer =
 | 
			
		||||
            create_writer(chunk_compression_type, chunk_compression_level, tempfile::tempfile()?);
 | 
			
		||||
 | 
			
		||||
        execute_on_word_pairs_and_prefixes(
 | 
			
		||||
            &mut db_iter,
 | 
			
		||||
@@ -325,7 +328,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
 | 
			
		||||
        };
 | 
			
		||||
        let word2_start_different_than_prev = word2[0] != prev_word2_start;
 | 
			
		||||
        // if there were no potential prefixes for the previous word2 based on its first letter,
 | 
			
		||||
        // and if the current word2 starts with the s`ame letter, then there is also no potential
 | 
			
		||||
        // and if the current word2 starts with the same letter, then there is also no potential
 | 
			
		||||
        // prefixes for the current word2, and we can skip to the next iteration
 | 
			
		||||
        if empty_prefixes && !word2_start_different_than_prev {
 | 
			
		||||
            continue;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user