mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Fix word pair proximity
This commit is contained in:
		
				
					committed by
					
						 Louis Dureuil
						Louis Dureuil
					
				
			
			
				
	
			
			
			
						parent
						
							96be85396d
						
					
				
				
					commit
					28a8d0ccda
				
			| @@ -1,4 +1,3 @@ | ||||
| use std::cmp::Ordering; | ||||
| use std::collections::{HashMap, VecDeque}; | ||||
| use std::fs::File; | ||||
| use std::io::BufReader; | ||||
| @@ -12,7 +11,7 @@ use super::helpers::{ | ||||
| }; | ||||
| use crate::error::SerializationError; | ||||
| use crate::index::db_name::DOCID_WORD_POSITIONS; | ||||
| use crate::proximity::{positions_proximity, MAX_DISTANCE}; | ||||
| use crate::proximity::{index_proximity, MAX_DISTANCE}; | ||||
| use crate::{DocumentId, Result}; | ||||
|  | ||||
| /// Extracts the best proximity between pairs of words and the documents ids where this pair appear. | ||||
| @@ -71,7 +70,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>( | ||||
|         for (position, word) in KvReaderU16::new(&value).iter() { | ||||
|             // drain the proximity window until the head word is considered close to the word we are inserting. | ||||
|             while word_positions.get(0).map_or(false, |(_w, p)| { | ||||
|                 positions_proximity(*p as u32, position as u32) > MAX_DISTANCE | ||||
|                 index_proximity(*p as u32, position as u32) >= MAX_DISTANCE | ||||
|             }) { | ||||
|                 word_positions_into_word_pair_proximity( | ||||
|                     &mut word_positions, | ||||
| @@ -109,6 +108,7 @@ fn document_word_positions_into_sorter( | ||||
|     word_pair_proximity: &HashMap<(String, String), u8>, | ||||
|     word_pair_proximity_docids_sorter: &mut grenad::Sorter<MergeFn>, | ||||
| ) -> Result<()> { | ||||
|     puffin::profile_function!(); | ||||
|     let mut key_buffer = Vec::new(); | ||||
|     for ((w1, w2), prox) in word_pair_proximity { | ||||
|         key_buffer.clear(); | ||||
| @@ -127,9 +127,10 @@ fn word_positions_into_word_pair_proximity( | ||||
|     word_positions: &mut VecDeque<(String, u16)>, | ||||
|     word_pair_proximity: &mut HashMap<(String, String), u8>, | ||||
| ) -> Result<()> { | ||||
|     puffin::profile_function!(); | ||||
|     let (head_word, head_position) = word_positions.pop_front().unwrap(); | ||||
|     for (word, position) in word_positions.iter() { | ||||
|         let prox = positions_proximity(head_position as u32, *position as u32) as u8; | ||||
|         let prox = index_proximity(head_position as u32, *position as u32) as u8; | ||||
|         word_pair_proximity | ||||
|             .entry((head_word.clone(), word.clone())) | ||||
|             .and_modify(|p| { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user