mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 13:36:27 +00:00 
			
		
		
		
	feat: Create a strong DocumentId type
Forcing it to be something internal will permit to avoid possible miss comparisons to be done with other types.
This commit is contained in:
		| @@ -10,6 +10,7 @@ use meilidb::database::schema::{Schema, SchemaBuilder, STORED, INDEXED}; | ||||
| use meilidb::database::update::PositiveUpdateBuilder; | ||||
| use meilidb::tokenizer::DefaultBuilder; | ||||
| use meilidb::database::Database; | ||||
| use meilidb::DocumentId; | ||||
|  | ||||
| #[derive(Debug, StructOpt)] | ||||
| pub struct Opt { | ||||
| @@ -67,7 +68,7 @@ fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result<D | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         let document_id = calculate_hash(&document.id); | ||||
|         let document_id = DocumentId(calculate_hash(&document.id)); | ||||
|         update.update(document_id, &document).unwrap(); | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -156,13 +156,16 @@ unsafe fn into_u8_slice<T>(slice: &[T]) -> &[u8] { | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|  | ||||
|     use std::error::Error; | ||||
|  | ||||
|     use crate::DocumentId; | ||||
|  | ||||
|     #[test] | ||||
|     fn builder_serialize_deserialize() -> Result<(), Box<Error>> { | ||||
|         let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; | ||||
|         let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; | ||||
|         let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; | ||||
|         let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 }; | ||||
|         let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 }; | ||||
|         let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 }; | ||||
|  | ||||
|         let mut builder = DocIndexesBuilder::memory(); | ||||
|  | ||||
| @@ -183,9 +186,9 @@ mod tests { | ||||
|  | ||||
|     #[test] | ||||
|     fn serialize_deserialize() -> Result<(), Box<Error>> { | ||||
|         let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; | ||||
|         let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; | ||||
|         let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; | ||||
|         let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 }; | ||||
|         let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 }; | ||||
|         let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 }; | ||||
|  | ||||
|         let mut builder = DocIndexesBuilder::memory(); | ||||
|  | ||||
|   | ||||
| @@ -201,13 +201,16 @@ impl<W: Write, X: Write> PositiveBlobBuilder<W, X> { | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|  | ||||
|     use std::error::Error; | ||||
|  | ||||
|     use crate::DocumentId; | ||||
|  | ||||
|     #[test] | ||||
|     fn serialize_deserialize() -> Result<(), Box<Error>> { | ||||
|         let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; | ||||
|         let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; | ||||
|         let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; | ||||
|         let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 }; | ||||
|         let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 }; | ||||
|         let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 }; | ||||
|  | ||||
|         let mut builder = PositiveBlobBuilder::memory(); | ||||
|  | ||||
| @@ -228,9 +231,9 @@ mod tests { | ||||
|  | ||||
|     #[test] | ||||
|     fn serde_serialize_deserialize() -> Result<(), Box<Error>> { | ||||
|         let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 }; | ||||
|         let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 }; | ||||
|         let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 }; | ||||
|         let a = DocIndex { document_id: DocumentId(0), attribute: 3, attribute_index: 11 }; | ||||
|         let b = DocIndex { document_id: DocumentId(1), attribute: 4, attribute_index: 21 }; | ||||
|         let c = DocIndex { document_id: DocumentId(2), attribute: 8, attribute_index: 2 }; | ||||
|  | ||||
|         let mut builder = PositiveBlobBuilder::memory(); | ||||
|  | ||||
|   | ||||
| @@ -100,7 +100,7 @@ where D: Deref<Target=DB> | ||||
| { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         let mut options = ReadOptions::new(); | ||||
|         let lower = DocumentKey::new(0); | ||||
|         let lower = DocumentKey::new(DocumentId(0)); | ||||
|         options.set_iterate_lower_bound(lower.as_ref()); | ||||
|  | ||||
|         let mut iter = self.snapshot.iter_opt(options); | ||||
|   | ||||
| @@ -19,7 +19,7 @@ impl DocumentKey { | ||||
|  | ||||
|         let mut wtr = Cursor::new(&mut buffer[..]); | ||||
|         wtr.write_all(b"doc-").unwrap(); | ||||
|         wtr.write_u64::<NativeEndian>(id).unwrap(); | ||||
|         wtr.write_u64::<NativeEndian>(id.0).unwrap(); | ||||
|  | ||||
|         DocumentKey(buffer) | ||||
|     } | ||||
| @@ -43,7 +43,8 @@ impl DocumentKey { | ||||
|     } | ||||
|  | ||||
|     pub fn document_id(&self) -> DocumentId { | ||||
|         (&self.0[4..]).read_u64::<NativeEndian>().unwrap() | ||||
|         let id = (&self.0[4..]).read_u64::<NativeEndian>().unwrap(); | ||||
|         DocumentId(id) | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -88,7 +89,8 @@ impl DocumentKeyAttr { | ||||
|     } | ||||
|  | ||||
|     pub fn document_id(&self) -> DocumentId { | ||||
|         (&self.0[4..]).read_u64::<NativeEndian>().unwrap() | ||||
|         let id = (&self.0[4..]).read_u64::<NativeEndian>().unwrap(); | ||||
|         DocumentId(id) | ||||
|     } | ||||
|  | ||||
|     pub fn attribute(&self) -> SchemaAttr { | ||||
|   | ||||
| @@ -194,6 +194,7 @@ mod tests { | ||||
|     use serde_derive::{Serialize, Deserialize}; | ||||
|     use tempfile::tempdir; | ||||
|  | ||||
|     use crate::DocumentId; | ||||
|     use crate::tokenizer::DefaultBuilder; | ||||
|     use crate::database::update::PositiveUpdateBuilder; | ||||
|     use crate::database::schema::{SchemaBuilder, STORED, INDEXED}; | ||||
| @@ -238,8 +239,8 @@ mod tests { | ||||
|         let mut update = { | ||||
|             let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder); | ||||
|  | ||||
|             builder.update(0, &doc0).unwrap(); | ||||
|             builder.update(1, &doc1).unwrap(); | ||||
|             builder.update(DocumentId(0), &doc0).unwrap(); | ||||
|             builder.update(DocumentId(1), &doc1).unwrap(); | ||||
|  | ||||
|             builder.build()? | ||||
|         }; | ||||
| @@ -248,8 +249,8 @@ mod tests { | ||||
|         database.ingest_update_file(update)?; | ||||
|         let view = database.view(); | ||||
|  | ||||
|         let de_doc0: SimpleDoc = view.retrieve_document(0)?; | ||||
|         let de_doc1: SimpleDoc = view.retrieve_document(1)?; | ||||
|         let de_doc0: SimpleDoc = view.retrieve_document(DocumentId(0))?; | ||||
|         let de_doc1: SimpleDoc = view.retrieve_document(DocumentId(1))?; | ||||
|  | ||||
|         assert_eq!(doc0, de_doc0); | ||||
|         assert_eq!(doc1, de_doc1); | ||||
|   | ||||
| @@ -30,7 +30,7 @@ impl<W: io::Write> UnorderedNegativeBlobBuilder<W> { | ||||
|  | ||||
|     pub fn into_inner(mut self) -> io::Result<W> { | ||||
|         for id in self.doc_ids { | ||||
|             self.wrt.write_u64::<NativeEndian>(id)?; | ||||
|             self.wrt.write_u64::<NativeEndian>(id.0)?; | ||||
|         } | ||||
|         Ok(self.wrt) | ||||
|     } | ||||
|   | ||||
| @@ -11,7 +11,12 @@ pub use rocksdb; | ||||
| pub use self::tokenizer::Tokenizer; | ||||
| pub use self::common_words::CommonWords; | ||||
|  | ||||
| pub type DocumentId = u64; | ||||
| /// Represent an internally generated document unique identifier. | ||||
| /// | ||||
| /// It is used to inform the database the document you want to deserialize. | ||||
| /// Helpful for custom ranking. | ||||
| #[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] | ||||
| pub struct DocumentId(pub u64); | ||||
|  | ||||
| /// This structure represent the position of a word | ||||
| /// in a document and its attributes. | ||||
|   | ||||
| @@ -44,6 +44,8 @@ where D: Deref<Target=DB> | ||||
| mod tests { | ||||
|     use super::*; | ||||
|  | ||||
|     use crate::DocumentId; | ||||
|  | ||||
|     // typing: "Geox CEO" | ||||
|     // | ||||
|     // doc0: "Geox SpA: CEO and Executive" | ||||
| @@ -56,7 +58,7 @@ mod tests { | ||||
|                 Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 id: 0, | ||||
|                 id: DocumentId(0), | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
| @@ -67,7 +69,7 @@ mod tests { | ||||
|                 Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 id: 1, | ||||
|                 id: DocumentId(1), | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
| @@ -89,7 +91,7 @@ mod tests { | ||||
|                 Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 1, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 id: 0, | ||||
|                 id: DocumentId(0), | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
| @@ -99,7 +101,7 @@ mod tests { | ||||
|                 Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 id: 1, | ||||
|                 id: DocumentId(1), | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
| @@ -121,7 +123,7 @@ mod tests { | ||||
|                 Match { query_index: 1, distance: 1, attribute: 0, attribute_index: 1, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 id: 0, | ||||
|                 id: DocumentId(0), | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
| @@ -131,7 +133,7 @@ mod tests { | ||||
|                 Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 id: 1, | ||||
|                 id: DocumentId(1), | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user