mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Take the words-prefixes into account while computing the biggest values
This commit is contained in:
		| @@ -321,6 +321,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho | |||||||
|  |  | ||||||
|     let main_name = "main"; |     let main_name = "main"; | ||||||
|     let word_docids_name = "word_docids"; |     let word_docids_name = "word_docids"; | ||||||
|  |     let word_prefix_docids_name = "word_prefix_docids"; | ||||||
|     let docid_word_positions_name = "docid_word_positions"; |     let docid_word_positions_name = "docid_word_positions"; | ||||||
|     let word_pair_proximity_docids_name = "word_pair_proximity_docids"; |     let word_pair_proximity_docids_name = "word_pair_proximity_docids"; | ||||||
|     let facet_field_id_value_docids_name = "facet_field_id_value_docids"; |     let facet_field_id_value_docids_name = "facet_field_id_value_docids"; | ||||||
| @@ -329,8 +330,16 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho | |||||||
|     let mut heap = BinaryHeap::with_capacity(limit + 1); |     let mut heap = BinaryHeap::with_capacity(limit + 1); | ||||||
|  |  | ||||||
|     if limit > 0 { |     if limit > 0 { | ||||||
|  |         // Fetch the words FST | ||||||
|         let words_fst = index.words_fst(rtxn)?; |         let words_fst = index.words_fst(rtxn)?; | ||||||
|         heap.push(Reverse((words_fst.as_fst().as_bytes().len(), format!("words-fst"), main_name))); |         let length = words_fst.as_fst().as_bytes().len(); | ||||||
|  |         heap.push(Reverse((length, format!("words-fst"), main_name))); | ||||||
|  |         if heap.len() > limit { heap.pop(); } | ||||||
|  |  | ||||||
|  |         // Fetch the word prefix FST | ||||||
|  |         let words_prefixes_fst = index.words_prefixes_fst(rtxn)?; | ||||||
|  |         let length = words_prefixes_fst.as_fst().as_bytes().len(); | ||||||
|  |         heap.push(Reverse((length, format!("words-prefixes-fst"), main_name))); | ||||||
|         if heap.len() > limit { heap.pop(); } |         if heap.len() > limit { heap.pop(); } | ||||||
|  |  | ||||||
|         if let Some(documents_ids) = main.get::<_, Str, ByteSlice>(rtxn, "documents-ids")? { |         if let Some(documents_ids) = main.get::<_, Str, ByteSlice>(rtxn, "documents-ids")? { | ||||||
| @@ -344,6 +353,12 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho | |||||||
|             if heap.len() > limit { heap.pop(); } |             if heap.len() > limit { heap.pop(); } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         for result in word_prefix_docids.remap_data_type::<ByteSlice>().iter(rtxn)? { | ||||||
|  |             let (word, value) = result?; | ||||||
|  |             heap.push(Reverse((value.len(), word.to_string(), word_prefix_docids_name))); | ||||||
|  |             if heap.len() > limit { heap.pop(); } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         for result in docid_word_positions.remap_data_type::<ByteSlice>().iter(rtxn)? { |         for result in docid_word_positions.remap_data_type::<ByteSlice>().iter(rtxn)? { | ||||||
|             let ((docid, word), value) = result?; |             let ((docid, word), value) = result?; | ||||||
|             let key = format!("{} {}", docid, word); |             let key = format!("{} {}", docid, word); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user