mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Prefer using a stable than a random hash builder
This commit is contained in:
		| @@ -2,6 +2,7 @@ use std::collections::{BTreeMap, BTreeSet}; | ||||
|  | ||||
| use bumparaw_collections::RawMap; | ||||
| use heed::RoTxn; | ||||
| use rustc_hash::FxBuildHasher; | ||||
| use serde_json::value::RawValue; | ||||
|  | ||||
| use super::vector_document::VectorDocument; | ||||
| @@ -385,12 +386,12 @@ pub type Entry<'doc> = (&'doc str, &'doc RawValue); | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct Versions<'doc> { | ||||
|     data: RawMap<'doc>, | ||||
|     data: RawMap<'doc, FxBuildHasher>, | ||||
| } | ||||
|  | ||||
| impl<'doc> Versions<'doc> { | ||||
|     pub fn multiple( | ||||
|         mut versions: impl Iterator<Item = Result<RawMap<'doc>>>, | ||||
|         mut versions: impl Iterator<Item = Result<RawMap<'doc, FxBuildHasher>>>, | ||||
|     ) -> Result<Option<Self>> { | ||||
|         let Some(data) = versions.next() else { return Ok(None) }; | ||||
|         let mut data = data?; | ||||
| @@ -403,7 +404,7 @@ impl<'doc> Versions<'doc> { | ||||
|         Ok(Some(Self::single(data))) | ||||
|     } | ||||
|  | ||||
|     pub fn single(version: RawMap<'doc>) -> Self { | ||||
|     pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self { | ||||
|         Self { data: version } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -179,6 +179,7 @@ mod test { | ||||
|     use bumparaw_collections::RawMap; | ||||
|     use charabia::TokenizerBuilder; | ||||
|     use meili_snap::snapshot; | ||||
|     use rustc_hash::FxBuildHasher; | ||||
|     use serde_json::json; | ||||
|     use serde_json::value::RawValue; | ||||
|  | ||||
| @@ -234,7 +235,7 @@ mod test { | ||||
|  | ||||
|         let bump = Bump::new(); | ||||
|         let document: &RawValue = serde_json::from_str(&document).unwrap(); | ||||
|         let document = RawMap::from_raw_value(document, &bump).unwrap(); | ||||
|         let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, &bump).unwrap(); | ||||
|  | ||||
|         let document = Versions::single(document); | ||||
|         let document = DocumentFromVersions::new(&document); | ||||
|   | ||||
| @@ -2,6 +2,7 @@ use std::ops::ControlFlow; | ||||
|  | ||||
| use bumpalo::Bump; | ||||
| use bumparaw_collections::RawVec; | ||||
| use rustc_hash::FxBuildHasher; | ||||
| use serde::de::{DeserializeSeed, Deserializer as _, Visitor}; | ||||
| use serde_json::value::RawValue; | ||||
|  | ||||
| @@ -394,7 +395,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> { | ||||
| } | ||||
|  | ||||
| pub struct DeserrRawMap<'a> { | ||||
|     map: bumparaw_collections::RawMap<'a>, | ||||
|     map: bumparaw_collections::RawMap<'a, FxBuildHasher>, | ||||
|     alloc: &'a Bump, | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,7 @@ use hashbrown::hash_map::Entry; | ||||
| use heed::RoTxn; | ||||
| use memmap2::Mmap; | ||||
| use rayon::slice::ParallelSlice; | ||||
| use rustc_hash::FxBuildHasher; | ||||
| use serde_json::value::RawValue; | ||||
| use serde_json::Deserializer; | ||||
|  | ||||
| @@ -166,8 +167,9 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>( | ||||
|  | ||||
|         // Only guess the primary key if it is the first document | ||||
|         let retrieved_primary_key = if previous_offset == 0 { | ||||
|             let doc = | ||||
|                 RawMap::from_raw_value(doc, indexer).map(Some).map_err(UserError::SerdeJson)?; | ||||
|             let doc = RawMap::from_raw_value_and_hasher(doc, FxBuildHasher, indexer) | ||||
|                 .map(Some) | ||||
|                 .map_err(UserError::SerdeJson)?; | ||||
|  | ||||
|             let result = retrieve_or_guess_primary_key( | ||||
|                 rtxn, | ||||
| @@ -546,7 +548,8 @@ impl MergeChanges for MergeDocumentForReplacement { | ||||
|             Some(InnerDocOp::Addition(DocumentOffset { content })) => { | ||||
|                 let document = serde_json::from_slice(content).unwrap(); | ||||
|                 let document = | ||||
|                     RawMap::from_raw_value(document, doc_alloc).map_err(UserError::SerdeJson)?; | ||||
|                     RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) | ||||
|                         .map_err(UserError::SerdeJson)?; | ||||
|  | ||||
|                 if is_new { | ||||
|                     Ok(Some(DocumentChange::Insertion(Insertion::create( | ||||
| @@ -633,7 +636,8 @@ impl MergeChanges for MergeDocumentForUpdates { | ||||
|                 }; | ||||
|                 let document = serde_json::from_slice(content).unwrap(); | ||||
|                 let document = | ||||
|                     RawMap::from_raw_value(document, doc_alloc).map_err(UserError::SerdeJson)?; | ||||
|                     RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) | ||||
|                         .map_err(UserError::SerdeJson)?; | ||||
|  | ||||
|                 Some(Versions::single(document)) | ||||
|             } | ||||
| @@ -647,8 +651,9 @@ impl MergeChanges for MergeDocumentForUpdates { | ||||
|                     }; | ||||
|  | ||||
|                     let document = serde_json::from_slice(content).unwrap(); | ||||
|                     let document = RawMap::from_raw_value(document, doc_alloc) | ||||
|                         .map_err(UserError::SerdeJson)?; | ||||
|                     let document = | ||||
|                         RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) | ||||
|                             .map_err(UserError::SerdeJson)?; | ||||
|                     Ok(document) | ||||
|                 }); | ||||
|                 Versions::multiple(versions)? | ||||
|   | ||||
| @@ -14,6 +14,7 @@ use heed::{RoTxn, RwTxn}; | ||||
| use itertools::{merge_join_by, EitherOrBoth}; | ||||
| pub use partial_dump::PartialDump; | ||||
| use rand::SeedableRng as _; | ||||
| use rustc_hash::FxBuildHasher; | ||||
| use time::OffsetDateTime; | ||||
| pub use update_by_function::UpdateByFunction; | ||||
|  | ||||
| @@ -776,7 +777,7 @@ pub fn retrieve_or_guess_primary_key<'a>( | ||||
|     index: &Index, | ||||
|     new_fields_ids_map: &mut FieldsIdsMap, | ||||
|     primary_key_from_op: Option<&'a str>, | ||||
|     first_document: Option<RawMap<'a>>, | ||||
|     first_document: Option<RawMap<'a, FxBuildHasher>>, | ||||
| ) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> { | ||||
|     // make sure that we have a declared primary key, either fetching it from the index or attempting to guess it. | ||||
|  | ||||
|   | ||||
| @@ -2,6 +2,7 @@ use std::ops::DerefMut; | ||||
|  | ||||
| use bumparaw_collections::RawMap; | ||||
| use rayon::iter::IndexedParallelIterator; | ||||
| use rustc_hash::FxBuildHasher; | ||||
| use serde_json::value::RawValue; | ||||
|  | ||||
| use super::document_changes::{DocumentChangeContext, DocumentChanges}; | ||||
| @@ -76,8 +77,8 @@ where | ||||
|             self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?; | ||||
|         let external_document_id = external_document_id.to_de(); | ||||
|  | ||||
|         let document = | ||||
|             RawMap::from_raw_value(document, doc_alloc).map_err(InternalError::SerdeJson)?; | ||||
|         let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) | ||||
|             .map_err(InternalError::SerdeJson)?; | ||||
|  | ||||
|         let insertion = Insertion::create(docid, external_document_id, Versions::single(document)); | ||||
|         Ok(Some(DocumentChange::Insertion(insertion))) | ||||
|   | ||||
| @@ -3,6 +3,7 @@ use rayon::iter::IndexedParallelIterator; | ||||
| use rayon::slice::ParallelSlice as _; | ||||
| use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST}; | ||||
| use roaring::RoaringBitmap; | ||||
| use rustc_hash::FxBuildHasher; | ||||
|  | ||||
| use super::document_changes::DocumentChangeContext; | ||||
| use super::DocumentChanges; | ||||
| @@ -160,8 +161,12 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> { | ||||
|                         if document_id != new_document_id { | ||||
|                             Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey)) | ||||
|                         } else { | ||||
|                             let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc) | ||||
|                                 .map_err(InternalError::SerdeJson)?; | ||||
|                             let raw_new_doc = RawMap::from_raw_value_and_hasher( | ||||
|                                 raw_new_doc, | ||||
|                                 FxBuildHasher, | ||||
|                                 doc_alloc, | ||||
|                             ) | ||||
|                             .map_err(InternalError::SerdeJson)?; | ||||
|  | ||||
|                             Ok(Some(DocumentChange::Update(Update::create( | ||||
|                                 docid, | ||||
|   | ||||
| @@ -4,6 +4,7 @@ use bumpalo::Bump; | ||||
| use bumparaw_collections::RawMap; | ||||
| use deserr::{Deserr, IntoValue}; | ||||
| use heed::RoTxn; | ||||
| use rustc_hash::FxBuildHasher; | ||||
| use serde::Serialize; | ||||
| use serde_json::value::RawValue; | ||||
|  | ||||
| @@ -84,7 +85,7 @@ pub struct VectorDocumentFromDb<'t> { | ||||
|     docid: DocumentId, | ||||
|     embedding_config: Vec<IndexEmbeddingConfig>, | ||||
|     index: &'t Index, | ||||
|     vectors_field: Option<RawMap<'t>>, | ||||
|     vectors_field: Option<RawMap<'t, FxBuildHasher>>, | ||||
|     rtxn: &'t RoTxn<'t>, | ||||
|     doc_alloc: &'t Bump, | ||||
| } | ||||
| @@ -102,9 +103,10 @@ impl<'t> VectorDocumentFromDb<'t> { | ||||
|         }; | ||||
|         let vectors = document.vectors_field()?; | ||||
|         let vectors_field = match vectors { | ||||
|             Some(vectors) => { | ||||
|                 Some(RawMap::from_raw_value(vectors, doc_alloc).map_err(InternalError::SerdeJson)?) | ||||
|             } | ||||
|             Some(vectors) => Some( | ||||
|                 RawMap::from_raw_value_and_hasher(vectors, FxBuildHasher, doc_alloc) | ||||
|                     .map_err(InternalError::SerdeJson)?, | ||||
|             ), | ||||
|             None => None, | ||||
|         }; | ||||
|  | ||||
| @@ -220,7 +222,7 @@ fn entry_from_raw_value( | ||||
|  | ||||
| pub struct VectorDocumentFromVersions<'doc> { | ||||
|     external_document_id: &'doc str, | ||||
|     vectors: RawMap<'doc>, | ||||
|     vectors: RawMap<'doc, FxBuildHasher>, | ||||
|     embedders: &'doc EmbeddingConfigs, | ||||
| } | ||||
|  | ||||
| @@ -233,8 +235,8 @@ impl<'doc> VectorDocumentFromVersions<'doc> { | ||||
|     ) -> Result<Option<Self>> { | ||||
|         let document = DocumentFromVersions::new(versions); | ||||
|         if let Some(vectors_field) = document.vectors_field()? { | ||||
|             let vectors = | ||||
|                 RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?; | ||||
|             let vectors = RawMap::from_raw_value_and_hasher(vectors_field, FxBuildHasher, bump) | ||||
|                 .map_err(UserError::SerdeJson)?; | ||||
|             Ok(Some(Self { external_document_id, vectors, embedders })) | ||||
|         } else { | ||||
|             Ok(None) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user