mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	fix: Transform the identifier value into a string before hashing it
This commit is contained in:
		| @@ -15,7 +15,8 @@ meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } | |||||||
| ordered-float = { version = "1.0.2", features = ["serde"] } | ordered-float = { version = "1.0.2", features = ["serde"] } | ||||||
| rocksdb = { version = "0.12.2", default-features = false } | rocksdb = { version = "0.12.2", default-features = false } | ||||||
| sdset = "0.3.2" | sdset = "0.3.2" | ||||||
| serde = { version = "1.0.91", features = ["derive"] } | serde = { version = "1.0.99", features = ["derive"] } | ||||||
|  | serde_json = "1.0.40" | ||||||
| siphasher = "0.3.0" | siphasher = "0.3.0" | ||||||
| zerocopy = "0.2.2" | zerocopy = "0.2.2" | ||||||
|  |  | ||||||
| @@ -29,4 +30,3 @@ branch = "arc-byte-slice" | |||||||
|  |  | ||||||
| [dev-dependencies] | [dev-dependencies] | ||||||
| tempfile = "3.0.7" | tempfile = "3.0.7" | ||||||
| serde_json = "1.0.39" |  | ||||||
|   | |||||||
| @@ -2,6 +2,7 @@ use std::hash::{Hash, Hasher}; | |||||||
|  |  | ||||||
| use meilidb_core::DocumentId; | use meilidb_core::DocumentId; | ||||||
| use serde::{ser, Serialize}; | use serde::{ser, Serialize}; | ||||||
|  | use serde_json::Value; | ||||||
| use siphasher::sip::SipHasher; | use siphasher::sip::SipHasher; | ||||||
|  |  | ||||||
| use super::{SerializerError, ConvertToString}; | use super::{SerializerError, ConvertToString}; | ||||||
| @@ -16,7 +17,18 @@ where D: serde::Serialize, | |||||||
|     document.serialize(serializer) |     document.serialize(serializer) | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn compute_document_id<T: Hash>(t: &T) -> DocumentId { | pub fn value_to_string(value: &Value) -> Option<String> { | ||||||
|  |     match value { | ||||||
|  |         Value::Null => None, | ||||||
|  |         Value::Bool(_) => None, | ||||||
|  |         Value::Number(value) => Some(value.to_string()), | ||||||
|  |         Value::String(value) => Some(value.to_string()), | ||||||
|  |         Value::Array(_) => None, | ||||||
|  |         Value::Object(_) => None, | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub fn compute_document_id<H: Hash>(t: H) -> DocumentId { | ||||||
|     let mut s = SipHasher::new(); |     let mut s = SipHasher::new(); | ||||||
|     t.hash(&mut s); |     t.hash(&mut s); | ||||||
|     let hash = s.finish(); |     let hash = s.finish(); | ||||||
| @@ -213,10 +225,11 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> { | |||||||
|         let key = key.serialize(ConvertToString)?; |         let key = key.serialize(ConvertToString)?; | ||||||
|  |  | ||||||
|         if self.identifier == key { |         if self.identifier == key { | ||||||
|             // TODO is it possible to have multiple ids? |             let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?; | ||||||
|             let id = bincode::serialize(value).unwrap(); |             match value_to_string(&value).map(|s| compute_document_id(&s)) { | ||||||
|             let document_id = compute_document_id(&id); |                 Some(document_id) => self.document_id = Some(document_id), | ||||||
|             self.document_id = Some(document_id); |                 None => return Err(SerializerError::InvalidDocumentIdType), | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
| @@ -244,10 +257,11 @@ impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> { | |||||||
|     where T: Serialize, |     where T: Serialize, | ||||||
|     { |     { | ||||||
|         if self.identifier == key { |         if self.identifier == key { | ||||||
|             // TODO can it be possible to have multiple ids? |             let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?; | ||||||
|             let id = bincode::serialize(value).unwrap(); |             match value_to_string(&value).map(compute_document_id) { | ||||||
|             let document_id = compute_document_id(&id); |                 Some(document_id) => self.document_id = Some(document_id), | ||||||
|             self.document_id = Some(document_id); |                 None => return Err(SerializerError::InvalidDocumentIdType), | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
|   | |||||||
| @@ -28,6 +28,7 @@ use std::{fmt, error::Error}; | |||||||
| use meilidb_core::DocumentId; | use meilidb_core::DocumentId; | ||||||
| use meilidb_schema::SchemaAttr; | use meilidb_schema::SchemaAttr; | ||||||
| use rmp_serde::encode::Error as RmpError; | use rmp_serde::encode::Error as RmpError; | ||||||
|  | use serde_json::Error as SerdeJsonError; | ||||||
| use serde::ser; | use serde::ser; | ||||||
|  |  | ||||||
| use crate::number::ParseNumberError; | use crate::number::ParseNumberError; | ||||||
| @@ -35,7 +36,9 @@ use crate::number::ParseNumberError; | |||||||
| #[derive(Debug)] | #[derive(Debug)] | ||||||
| pub enum SerializerError { | pub enum SerializerError { | ||||||
|     DocumentIdNotFound, |     DocumentIdNotFound, | ||||||
|  |     InvalidDocumentIdType, | ||||||
|     RmpError(RmpError), |     RmpError(RmpError), | ||||||
|  |     SerdeJsonError(SerdeJsonError), | ||||||
|     RocksdbError(rocksdb::Error), |     RocksdbError(rocksdb::Error), | ||||||
|     ParseNumberError(ParseNumberError), |     ParseNumberError(ParseNumberError), | ||||||
|     UnserializableType { type_name: &'static str }, |     UnserializableType { type_name: &'static str }, | ||||||
| @@ -55,8 +58,12 @@ impl fmt::Display for SerializerError { | |||||||
|         match self { |         match self { | ||||||
|             SerializerError::DocumentIdNotFound => { |             SerializerError::DocumentIdNotFound => { | ||||||
|                 write!(f, "serialized document does not have an id according to the schema") |                 write!(f, "serialized document does not have an id according to the schema") | ||||||
|             } |             }, | ||||||
|  |             SerializerError::InvalidDocumentIdType => { | ||||||
|  |                 write!(f, "document identifier can only be of type string or number") | ||||||
|  |             }, | ||||||
|             SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e), |             SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e), | ||||||
|  |             SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e), | ||||||
|             SerializerError::RocksdbError(e) => write!(f, "RocksDB related error: {}", e), |             SerializerError::RocksdbError(e) => write!(f, "RocksDB related error: {}", e), | ||||||
|             SerializerError::ParseNumberError(e) => { |             SerializerError::ParseNumberError(e) => { | ||||||
|                 write!(f, "error while trying to parse a number: {}", e) |                 write!(f, "error while trying to parse a number: {}", e) | ||||||
| @@ -89,6 +96,12 @@ impl From<RmpError> for SerializerError { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl From<SerdeJsonError> for SerializerError { | ||||||
|  |     fn from(error: SerdeJsonError) -> SerializerError { | ||||||
|  |         SerializerError::SerdeJsonError(error) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| impl From<rocksdb::Error> for SerializerError { | impl From<rocksdb::Error> for SerializerError { | ||||||
|     fn from(error: rocksdb::Error) -> SerializerError { |     fn from(error: rocksdb::Error) -> SerializerError { | ||||||
|         SerializerError::RocksdbError(error) |         SerializerError::RocksdbError(error) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user