mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Fix word pair proximity
This commit is contained in:
		
				
					committed by
					
						 Louis Dureuil
						Louis Dureuil
					
				
			
			
				
	
			
			
			
						parent
						
							96be85396d
						
					
				
				
					commit
					28a8d0ccda
				
			| @@ -1,4 +1,3 @@ | |||||||
| use std::cmp::Ordering; |  | ||||||
| use std::collections::{HashMap, VecDeque}; | use std::collections::{HashMap, VecDeque}; | ||||||
| use std::fs::File; | use std::fs::File; | ||||||
| use std::io::BufReader; | use std::io::BufReader; | ||||||
| @@ -12,7 +11,7 @@ use super::helpers::{ | |||||||
| }; | }; | ||||||
| use crate::error::SerializationError; | use crate::error::SerializationError; | ||||||
| use crate::index::db_name::DOCID_WORD_POSITIONS; | use crate::index::db_name::DOCID_WORD_POSITIONS; | ||||||
| use crate::proximity::{positions_proximity, MAX_DISTANCE}; | use crate::proximity::{index_proximity, MAX_DISTANCE}; | ||||||
| use crate::{DocumentId, Result}; | use crate::{DocumentId, Result}; | ||||||
|  |  | ||||||
| /// Extracts the best proximity between pairs of words and the documents ids where this pair appear. | /// Extracts the best proximity between pairs of words and the documents ids where this pair appear. | ||||||
| @@ -71,7 +70,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>( | |||||||
|         for (position, word) in KvReaderU16::new(&value).iter() { |         for (position, word) in KvReaderU16::new(&value).iter() { | ||||||
|             // drain the proximity window until the head word is considered close to the word we are inserting. |             // drain the proximity window until the head word is considered close to the word we are inserting. | ||||||
|             while word_positions.get(0).map_or(false, |(_w, p)| { |             while word_positions.get(0).map_or(false, |(_w, p)| { | ||||||
|                 positions_proximity(*p as u32, position as u32) > MAX_DISTANCE |                 index_proximity(*p as u32, position as u32) >= MAX_DISTANCE | ||||||
|             }) { |             }) { | ||||||
|                 word_positions_into_word_pair_proximity( |                 word_positions_into_word_pair_proximity( | ||||||
|                     &mut word_positions, |                     &mut word_positions, | ||||||
| @@ -109,6 +108,7 @@ fn document_word_positions_into_sorter( | |||||||
|     word_pair_proximity: &HashMap<(String, String), u8>, |     word_pair_proximity: &HashMap<(String, String), u8>, | ||||||
|     word_pair_proximity_docids_sorter: &mut grenad::Sorter<MergeFn>, |     word_pair_proximity_docids_sorter: &mut grenad::Sorter<MergeFn>, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|  |     puffin::profile_function!(); | ||||||
|     let mut key_buffer = Vec::new(); |     let mut key_buffer = Vec::new(); | ||||||
|     for ((w1, w2), prox) in word_pair_proximity { |     for ((w1, w2), prox) in word_pair_proximity { | ||||||
|         key_buffer.clear(); |         key_buffer.clear(); | ||||||
| @@ -127,9 +127,10 @@ fn word_positions_into_word_pair_proximity( | |||||||
|     word_positions: &mut VecDeque<(String, u16)>, |     word_positions: &mut VecDeque<(String, u16)>, | ||||||
|     word_pair_proximity: &mut HashMap<(String, String), u8>, |     word_pair_proximity: &mut HashMap<(String, String), u8>, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|  |     puffin::profile_function!(); | ||||||
|     let (head_word, head_position) = word_positions.pop_front().unwrap(); |     let (head_word, head_position) = word_positions.pop_front().unwrap(); | ||||||
|     for (word, position) in word_positions.iter() { |     for (word, position) in word_positions.iter() { | ||||||
|         let prox = positions_proximity(head_position as u32, *position as u32) as u8; |         let prox = index_proximity(head_position as u32, *position as u32) as u8; | ||||||
|         word_pair_proximity |         word_pair_proximity | ||||||
|             .entry((head_word.clone(), word.clone())) |             .entry((head_word.clone(), word.clone())) | ||||||
|             .and_modify(|p| { |             .and_modify(|p| { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user