mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	clean PR warnings
This commit is contained in:
		
				
					committed by
					
						 Louis Dureuil
						Louis Dureuil
					
				
			
			
				
	
			
			
			
						parent
						
							66c2c82a18
						
					
				
				
					commit
					1c5705c164
				
			| @@ -11,9 +11,7 @@ use super::interner::Interned; | |||||||
| use super::Word; | use super::Word; | ||||||
| use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec}; | use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec}; | ||||||
| use crate::update::{merge_cbo_roaring_bitmaps, MergeFn}; | use crate::update::{merge_cbo_roaring_bitmaps, MergeFn}; | ||||||
| use crate::{ | use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext}; | ||||||
|     CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /// A cache storing pointers to values in the LMDB databases. | /// A cache storing pointers to values in the LMDB databases. | ||||||
| /// | /// | ||||||
|   | |||||||
| @@ -16,9 +16,7 @@ use crate::facet::FacetType; | |||||||
| use crate::heed_codec::facet::FieldDocIdFacetCodec; | use crate::heed_codec::facet::FieldDocIdFacetCodec; | ||||||
| use crate::heed_codec::CboRoaringBitmapCodec; | use crate::heed_codec::CboRoaringBitmapCodec; | ||||||
| use crate::index::Hnsw; | use crate::index::Hnsw; | ||||||
| use crate::{ | use crate::{ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, BEU32}; | ||||||
|     ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, BEU32, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| pub struct DeleteDocuments<'t, 'u, 'i> { | pub struct DeleteDocuments<'t, 'u, 'i> { | ||||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, |     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||||
|   | |||||||
| @@ -12,9 +12,7 @@ use serde_json::Value; | |||||||
| use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters}; | use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters}; | ||||||
| use crate::error::{InternalError, SerializationError}; | use crate::error::{InternalError, SerializationError}; | ||||||
| use crate::update::index_documents::MergeFn; | use crate::update::index_documents::MergeFn; | ||||||
| use crate::{ | use crate::{FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH}; | ||||||
|     absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), RoaringBitmap>; | pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), RoaringBitmap>; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,11 +5,10 @@ use std::iter::FromIterator; | |||||||
|  |  | ||||||
| use heed::BytesDecode; | use heed::BytesDecode; | ||||||
| use obkv::KvReaderU16; | use obkv::KvReaderU16; | ||||||
| use roaring::RoaringBitmap; |  | ||||||
|  |  | ||||||
| use super::helpers::{ | use super::helpers::{ | ||||||
|     create_sorter, create_writer, merge_cbo_roaring_bitmaps, serialize_roaring_bitmap, |     create_sorter, create_writer, merge_cbo_roaring_bitmaps, sorter_into_reader, | ||||||
|     sorter_into_reader, try_split_array_at, writer_into_reader, GrenadParameters, |     try_split_array_at, writer_into_reader, GrenadParameters, | ||||||
| }; | }; | ||||||
| use crate::error::SerializationError; | use crate::error::SerializationError; | ||||||
| use crate::heed_codec::StrBEU16Codec; | use crate::heed_codec::StrBEU16Codec; | ||||||
| @@ -47,7 +46,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>( | |||||||
|         max_memory.map(|x| x / 3), |         max_memory.map(|x| x / 3), | ||||||
|     ); |     ); | ||||||
|     let mut key_buffer = Vec::new(); |     let mut key_buffer = Vec::new(); | ||||||
|     let mut value_buffer = Vec::new(); |  | ||||||
|     let mut words = BTreeSet::new(); |     let mut words = BTreeSet::new(); | ||||||
|     let mut cursor = docid_word_positions.into_cursor()?; |     let mut cursor = docid_word_positions.into_cursor()?; | ||||||
|     while let Some((key, value)) = cursor.move_on_next()? { |     while let Some((key, value)) = cursor.move_on_next()? { | ||||||
| @@ -66,7 +64,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>( | |||||||
|             document_id, |             document_id, | ||||||
|             fid, |             fid, | ||||||
|             &mut key_buffer, |             &mut key_buffer, | ||||||
|             &mut value_buffer, |  | ||||||
|             &mut words, |             &mut words, | ||||||
|             &mut word_fid_docids_sorter, |             &mut word_fid_docids_sorter, | ||||||
|         )?; |         )?; | ||||||
| @@ -124,7 +121,6 @@ fn words_into_sorter( | |||||||
|     document_id: DocumentId, |     document_id: DocumentId, | ||||||
|     fid: FieldId, |     fid: FieldId, | ||||||
|     key_buffer: &mut Vec<u8>, |     key_buffer: &mut Vec<u8>, | ||||||
|     value_buffer: &mut Vec<u8>, |  | ||||||
|     words: &mut BTreeSet<Vec<u8>>, |     words: &mut BTreeSet<Vec<u8>>, | ||||||
|     word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>, |     word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|   | |||||||
| @@ -1,53 +0,0 @@ | |||||||
| use std::fs::File; |  | ||||||
| use std::io::{self, BufReader}; |  | ||||||
|  |  | ||||||
| use super::helpers::{ |  | ||||||
|     create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader, |  | ||||||
|     try_split_array_at, GrenadParameters, |  | ||||||
| }; |  | ||||||
| use crate::error::SerializationError; |  | ||||||
| use crate::index::db_name::DOCID_WORD_POSITIONS; |  | ||||||
| use crate::{relative_from_absolute_position, DocumentId, Result}; |  | ||||||
|  |  | ||||||
| /// Extracts the word, field id, and the documents ids where this word appear at this field id. |  | ||||||
| #[logging_timer::time] |  | ||||||
| pub fn extract_word_fid_docids<R: io::Read + io::Seek>( |  | ||||||
|     docid_word_positions: grenad::Reader<R>, |  | ||||||
|     indexer: GrenadParameters, |  | ||||||
| ) -> Result<grenad::Reader<BufReader<File>>> { |  | ||||||
|     puffin::profile_function!(); |  | ||||||
|  |  | ||||||
|     todo!("remove me"); |  | ||||||
|  |  | ||||||
|     let max_memory = indexer.max_memory_by_thread(); |  | ||||||
|  |  | ||||||
|     let mut word_fid_docids_sorter = create_sorter( |  | ||||||
|         grenad::SortAlgorithm::Unstable, |  | ||||||
|         merge_cbo_roaring_bitmaps, |  | ||||||
|         indexer.chunk_compression_type, |  | ||||||
|         indexer.chunk_compression_level, |  | ||||||
|         indexer.max_nb_chunks, |  | ||||||
|         max_memory, |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     let mut key_buffer = Vec::new(); |  | ||||||
|     let mut cursor = docid_word_positions.into_cursor()?; |  | ||||||
|     while let Some((key, value)) = cursor.move_on_next()? { |  | ||||||
|         let (document_id_bytes, word_bytes) = try_split_array_at(key) |  | ||||||
|             .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; |  | ||||||
|         let document_id = DocumentId::from_be_bytes(document_id_bytes); |  | ||||||
|  |  | ||||||
|         for position in read_u32_ne_bytes(value) { |  | ||||||
|             key_buffer.clear(); |  | ||||||
|             key_buffer.extend_from_slice(word_bytes); |  | ||||||
|             key_buffer.push(0); |  | ||||||
|             let (fid, _) = relative_from_absolute_position(position); |  | ||||||
|             key_buffer.extend_from_slice(&fid.to_be_bytes()); |  | ||||||
|             word_fid_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     let word_fid_docids_reader = sorter_into_reader(word_fid_docids_sorter, indexer)?; |  | ||||||
|  |  | ||||||
|     Ok(word_fid_docids_reader) |  | ||||||
| } |  | ||||||
| @@ -6,7 +6,6 @@ mod extract_fid_word_count_docids; | |||||||
| mod extract_geo_points; | mod extract_geo_points; | ||||||
| mod extract_vector_points; | mod extract_vector_points; | ||||||
| mod extract_word_docids; | mod extract_word_docids; | ||||||
| mod extract_word_fid_docids; |  | ||||||
| mod extract_word_pair_proximity_docids; | mod extract_word_pair_proximity_docids; | ||||||
| mod extract_word_position_docids; | mod extract_word_position_docids; | ||||||
|  |  | ||||||
| @@ -26,12 +25,11 @@ use self::extract_fid_word_count_docids::extract_fid_word_count_docids; | |||||||
| use self::extract_geo_points::extract_geo_points; | use self::extract_geo_points::extract_geo_points; | ||||||
| use self::extract_vector_points::extract_vector_points; | use self::extract_vector_points::extract_vector_points; | ||||||
| use self::extract_word_docids::extract_word_docids; | use self::extract_word_docids::extract_word_docids; | ||||||
| use self::extract_word_fid_docids::extract_word_fid_docids; |  | ||||||
| use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids; | use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids; | ||||||
| use self::extract_word_position_docids::extract_word_position_docids; | use self::extract_word_position_docids::extract_word_position_docids; | ||||||
| use super::helpers::{ | use super::helpers::{ | ||||||
|     as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap, |     as_cloneable_grenad, merge_cbo_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn, | ||||||
|     GrenadParameters, MergeFn, MergeableReader, |     MergeableReader, | ||||||
| }; | }; | ||||||
| use super::{helpers, TypedChunk}; | use super::{helpers, TypedChunk}; | ||||||
| use crate::{FieldId, Result}; | use crate::{FieldId, Result}; | ||||||
| @@ -206,15 +204,6 @@ pub(crate) fn data_from_obkv_documents( | |||||||
|         TypedChunk::WordPositionDocids, |         TypedChunk::WordPositionDocids, | ||||||
|         "word-position-docids", |         "word-position-docids", | ||||||
|     ); |     ); | ||||||
|     // spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>( |  | ||||||
|     //     docid_word_positions_chunks, |  | ||||||
|     //     indexer, |  | ||||||
|     //     lmdb_writer_sx.clone(), |  | ||||||
|     //     extract_word_fid_docids, |  | ||||||
|     //     merge_cbo_roaring_bitmaps, |  | ||||||
|     //     TypedChunk::WordFidDocids, |  | ||||||
|     //     "word-fid-docids", |  | ||||||
|     // ); |  | ||||||
|  |  | ||||||
|     spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>( |     spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>( | ||||||
|         docid_fid_facet_strings_chunks, |         docid_fid_facet_strings_chunks, | ||||||
|   | |||||||
| @@ -11,6 +11,7 @@ use crate::Result; | |||||||
|  |  | ||||||
| pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>; | pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>; | ||||||
|  |  | ||||||
|  | #[allow(unused)] | ||||||
| pub fn concat_u32s_array<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> { | pub fn concat_u32s_array<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> { | ||||||
|     if values.len() == 1 { |     if values.len() == 1 { | ||||||
|         Ok(values[0].clone()) |         Ok(values[0].clone()) | ||||||
|   | |||||||
| @@ -44,6 +44,7 @@ where | |||||||
|     Some((head, tail)) |     Some((head, tail)) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[allow(unused)] | ||||||
| pub fn read_u32_ne_bytes(bytes: &[u8]) -> impl Iterator<Item = u32> + '_ { | pub fn read_u32_ne_bytes(bytes: &[u8]) -> impl Iterator<Item = u32> + '_ { | ||||||
|     bytes.chunks_exact(4).flat_map(TryInto::try_into).map(u32::from_ne_bytes) |     bytes.chunks_exact(4).flat_map(TryInto::try_into).map(u32::from_ne_bytes) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -38,7 +38,7 @@ use crate::update::{ | |||||||
|     self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, |     self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, | ||||||
|     WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, |     WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, | ||||||
| }; | }; | ||||||
| use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec}; | use crate::{CboRoaringBitmapCodec, Index, Result}; | ||||||
|  |  | ||||||
| static MERGED_DATABASE_COUNT: usize = 7; | static MERGED_DATABASE_COUNT: usize = 7; | ||||||
| static PREFIX_DATABASE_COUNT: usize = 5; | static PREFIX_DATABASE_COUNT: usize = 5; | ||||||
| @@ -434,11 +434,6 @@ where | |||||||
|                     word_position_docids = Some(cloneable_chunk); |                     word_position_docids = Some(cloneable_chunk); | ||||||
|                     TypedChunk::WordPositionDocids(chunk) |                     TypedChunk::WordPositionDocids(chunk) | ||||||
|                 } |                 } | ||||||
|                 TypedChunk::WordFidDocids(chunk) => { |  | ||||||
|                     let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? }; |  | ||||||
|                     word_fid_docids = Some(cloneable_chunk); |  | ||||||
|                     TypedChunk::WordFidDocids(chunk) |  | ||||||
|                 } |  | ||||||
|                 otherwise => otherwise, |                 otherwise => otherwise, | ||||||
|             }; |             }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -35,7 +35,6 @@ pub(crate) enum TypedChunk { | |||||||
|         word_fid_docids_reader: grenad::Reader<BufReader<File>>, |         word_fid_docids_reader: grenad::Reader<BufReader<File>>, | ||||||
|     }, |     }, | ||||||
|     WordPositionDocids(grenad::Reader<BufReader<File>>), |     WordPositionDocids(grenad::Reader<BufReader<File>>), | ||||||
|     WordFidDocids(grenad::Reader<BufReader<File>>), |  | ||||||
|     WordPairProximityDocids(grenad::Reader<BufReader<File>>), |     WordPairProximityDocids(grenad::Reader<BufReader<File>>), | ||||||
|     FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>), |     FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>), | ||||||
|     FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>), |     FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>), | ||||||
| @@ -78,9 +77,6 @@ impl TypedChunk { | |||||||
|             TypedChunk::WordPositionDocids(grenad) => { |             TypedChunk::WordPositionDocids(grenad) => { | ||||||
|                 format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len()) |                 format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len()) | ||||||
|             } |             } | ||||||
|             TypedChunk::WordFidDocids(grenad) => { |  | ||||||
|                 format!("WordFidDocids {{ number_of_entries: {} }}", grenad.len()) |  | ||||||
|             } |  | ||||||
|             TypedChunk::WordPairProximityDocids(grenad) => { |             TypedChunk::WordPairProximityDocids(grenad) => { | ||||||
|                 format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len()) |                 format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len()) | ||||||
|             } |             } | ||||||
| @@ -202,17 +198,6 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|             )?; |             )?; | ||||||
|             is_merged_database = true; |             is_merged_database = true; | ||||||
|         } |         } | ||||||
|         TypedChunk::WordFidDocids(word_fid_docids_iter) => { |  | ||||||
|             append_entries_into_database( |  | ||||||
|                 word_fid_docids_iter, |  | ||||||
|                 &index.word_fid_docids, |  | ||||||
|                 wtxn, |  | ||||||
|                 index_is_empty, |  | ||||||
|                 |value, _buffer| Ok(value), |  | ||||||
|                 merge_cbo_roaring_bitmaps, |  | ||||||
|             )?; |  | ||||||
|             is_merged_database = true; |  | ||||||
|         } |  | ||||||
|         TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => { |         TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => { | ||||||
|             let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter); |             let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter); | ||||||
|             indexer.execute(wtxn)?; |             indexer.execute(wtxn)?; | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ use crate::update::index_documents::{ | |||||||
|     create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key, |     create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key, | ||||||
|     CursorClonableMmap, MergeFn, |     CursorClonableMmap, MergeFn, | ||||||
| }; | }; | ||||||
| use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec}; | use crate::{CboRoaringBitmapCodec, Result}; | ||||||
|  |  | ||||||
| pub struct WordPrefixDocids<'t, 'u, 'i> { | pub struct WordPrefixDocids<'t, 'u, 'i> { | ||||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, |     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user