mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Reduce the DocumentId size from 64 to 32bits
This commit is contained in:
		| @@ -191,6 +191,6 @@ mod tests { | |||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn docindex_mem_size() { |     fn docindex_mem_size() { | ||||||
|         assert_eq!(mem::size_of::<DocIndex>(), 16); |         assert_eq!(mem::size_of::<DocIndex>(), 12); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -228,7 +228,7 @@ mod tests { | |||||||
|         builder.into_inner().and_then(Set::from_bytes).unwrap() |         builder.into_inner().and_then(Set::from_bytes).unwrap() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const fn doc_index(document_id: u64, word_index: u16) -> DocIndex { |     const fn doc_index(document_id: u32, word_index: u16) -> DocIndex { | ||||||
|         DocIndex { |         DocIndex { | ||||||
|             document_id: DocumentId(document_id), |             document_id: DocumentId(document_id), | ||||||
|             attribute: 0, |             attribute: 0, | ||||||
| @@ -238,7 +238,7 @@ mod tests { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const fn doc_char_index(document_id: u64, word_index: u16, char_index: u16) -> DocIndex { |     const fn doc_char_index(document_id: u32, word_index: u16, char_index: u16) -> DocIndex { | ||||||
|         DocIndex { |         DocIndex { | ||||||
|             document_id: DocumentId(document_id), |             document_id: DocumentId(document_id), | ||||||
|             attribute: 0, |             attribute: 0, | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| use super::BEU64; | use super::BEU32; | ||||||
| use crate::database::MainT; | use crate::database::MainT; | ||||||
| use crate::DocumentId; | use crate::DocumentId; | ||||||
| use heed::types::{ByteSlice, OwnedType}; | use heed::types::{ByteSlice, OwnedType}; | ||||||
| @@ -7,7 +7,7 @@ use std::sync::Arc; | |||||||
|  |  | ||||||
| #[derive(Copy, Clone)] | #[derive(Copy, Clone)] | ||||||
| pub struct DocsWords { | pub struct DocsWords { | ||||||
|     pub(crate) docs_words: heed::Database<OwnedType<BEU64>, ByteSlice>, |     pub(crate) docs_words: heed::Database<OwnedType<BEU32>, ByteSlice>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl DocsWords { | impl DocsWords { | ||||||
| @@ -17,13 +17,13 @@ impl DocsWords { | |||||||
|         document_id: DocumentId, |         document_id: DocumentId, | ||||||
|         words: &fst::Set, |         words: &fst::Set, | ||||||
|     ) -> ZResult<()> { |     ) -> ZResult<()> { | ||||||
|         let document_id = BEU64::new(document_id.0); |         let document_id = BEU32::new(document_id.0); | ||||||
|         let bytes = words.as_fst().as_bytes(); |         let bytes = words.as_fst().as_bytes(); | ||||||
|         self.docs_words.put(writer, &document_id, bytes) |         self.docs_words.put(writer, &document_id, bytes) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn del_doc_words(self, writer: &mut heed::RwTxn<MainT>, document_id: DocumentId) -> ZResult<bool> { |     pub fn del_doc_words(self, writer: &mut heed::RwTxn<MainT>, document_id: DocumentId) -> ZResult<bool> { | ||||||
|         let document_id = BEU64::new(document_id.0); |         let document_id = BEU32::new(document_id.0); | ||||||
|         self.docs_words.delete(writer, &document_id) |         self.docs_words.delete(writer, &document_id) | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -36,7 +36,7 @@ impl DocsWords { | |||||||
|         reader: &heed::RoTxn<MainT>, |         reader: &heed::RoTxn<MainT>, | ||||||
|         document_id: DocumentId, |         document_id: DocumentId, | ||||||
|     ) -> ZResult<Option<fst::Set>> { |     ) -> ZResult<Option<fst::Set>> { | ||||||
|         let document_id = BEU64::new(document_id.0); |         let document_id = BEU32::new(document_id.0); | ||||||
|         match self.docs_words.get(reader, &document_id)? { |         match self.docs_words.get(reader, &document_id)? { | ||||||
|             Some(bytes) => { |             Some(bytes) => { | ||||||
|                 let len = bytes.len(); |                 let len = bytes.len(); | ||||||
|   | |||||||
| @@ -26,16 +26,16 @@ impl<'a> BytesDecode<'a> for DocumentsIds { | |||||||
|  |  | ||||||
| pub struct DiscoverIds<'a> { | pub struct DiscoverIds<'a> { | ||||||
|     ids_iter: std::slice::Iter<'a, DocumentId>, |     ids_iter: std::slice::Iter<'a, DocumentId>, | ||||||
|     left_id: Option<u64>, |     left_id: Option<u32>, | ||||||
|     right_id: Option<u64>, |     right_id: Option<u32>, | ||||||
|     available_range: std::ops::Range<u64>, |     available_range: std::ops::Range<u32>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl DiscoverIds<'_> { | impl DiscoverIds<'_> { | ||||||
|     pub fn new(ids: &Set<DocumentId>) -> DiscoverIds { |     pub fn new(ids: &Set<DocumentId>) -> DiscoverIds { | ||||||
|         let mut ids_iter = ids.iter(); |         let mut ids_iter = ids.iter(); | ||||||
|         let right_id = ids_iter.next().map(|id| id.0); |         let right_id = ids_iter.next().map(|id| id.0); | ||||||
|         let available_range = 0..right_id.unwrap_or(u64::max_value()); |         let available_range = 0..right_id.unwrap_or(u32::max_value()); | ||||||
|         DiscoverIds { ids_iter, left_id: None, right_id, available_range } |         DiscoverIds { ids_iter, left_id: None, right_id, available_range } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -49,7 +49,7 @@ impl Iterator for DiscoverIds<'_> { | |||||||
|                 // The available range gives us a new id, we return it. |                 // The available range gives us a new id, we return it. | ||||||
|                 Some(id) => return Some(DocumentId(id)), |                 Some(id) => return Some(DocumentId(id)), | ||||||
|                 // The available range is exhausted, we need to find the next one. |                 // The available range is exhausted, we need to find the next one. | ||||||
|                 None if self.available_range.end == u64::max_value() => return None, |                 None if self.available_range.end == u32::max_value() => return None, | ||||||
|                 None => loop { |                 None => loop { | ||||||
|                     self.left_id = self.right_id.take(); |                     self.left_id = self.right_id.take(); | ||||||
|                     self.right_id = self.ids_iter.next().map(|id| id.0); |                     self.right_id = self.ids_iter.next().map(|id| id.0); | ||||||
| @@ -61,9 +61,9 @@ impl Iterator for DiscoverIds<'_> { | |||||||
|                             break; |                             break; | ||||||
|                         }, |                         }, | ||||||
|                         // The last used id has been reached, we can use all ids |                         // The last used id has been reached, we can use all ids | ||||||
|                         // until u64 MAX |                         // until u32 MAX | ||||||
|                         (Some(l), None) => { |                         (Some(l), None) => { | ||||||
|                             self.available_range = l.saturating_add(1)..u64::max_value(); |                             self.available_range = l.saturating_add(1)..u32::max_value(); | ||||||
|                             break; |                             break; | ||||||
|                         }, |                         }, | ||||||
|                         _ => (), |                         _ => (), | ||||||
|   | |||||||
| @@ -153,7 +153,7 @@ impl Main { | |||||||
|  |  | ||||||
|     pub fn user_to_internal_id(self, reader: &heed::RoTxn<MainT>, userid: &str) -> ZResult<Option<DocumentId>> { |     pub fn user_to_internal_id(self, reader: &heed::RoTxn<MainT>, userid: &str) -> ZResult<Option<DocumentId>> { | ||||||
|         let user_ids = self.user_ids(reader)?; |         let user_ids = self.user_ids(reader)?; | ||||||
|         Ok(user_ids.get(userid).map(DocumentId)) |         Ok(user_ids.get(userid).map(|id| DocumentId(id as u32))) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn put_words_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> { |     pub fn put_words_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> { | ||||||
|   | |||||||
| @@ -45,20 +45,21 @@ use crate::serde::Deserializer; | |||||||
| use crate::settings::SettingsUpdate; | use crate::settings::SettingsUpdate; | ||||||
| use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult}; | use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult}; | ||||||
|  |  | ||||||
|  | type BEU32 = zerocopy::U32<byteorder::BigEndian>; | ||||||
| type BEU64 = zerocopy::U64<byteorder::BigEndian>; | type BEU64 = zerocopy::U64<byteorder::BigEndian>; | ||||||
| pub type BEU16 = zerocopy::U16<byteorder::BigEndian>; | pub type BEU16 = zerocopy::U16<byteorder::BigEndian>; | ||||||
|  |  | ||||||
| #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] | #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] | ||||||
| #[repr(C)] | #[repr(C)] | ||||||
| pub struct DocumentFieldIndexedKey { | pub struct DocumentFieldIndexedKey { | ||||||
|     docid: BEU64, |     docid: BEU32, | ||||||
|     indexed_pos: BEU16, |     indexed_pos: BEU16, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl DocumentFieldIndexedKey { | impl DocumentFieldIndexedKey { | ||||||
|     fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey { |     fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey { | ||||||
|         DocumentFieldIndexedKey { |         DocumentFieldIndexedKey { | ||||||
|             docid: BEU64::new(docid.0), |             docid: BEU32::new(docid.0), | ||||||
|             indexed_pos: BEU16::new(indexed_pos.0), |             indexed_pos: BEU16::new(indexed_pos.0), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -67,14 +68,14 @@ impl DocumentFieldIndexedKey { | |||||||
| #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] | #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] | ||||||
| #[repr(C)] | #[repr(C)] | ||||||
| pub struct DocumentFieldStoredKey { | pub struct DocumentFieldStoredKey { | ||||||
|     docid: BEU64, |     docid: BEU32, | ||||||
|     field_id: BEU16, |     field_id: BEU16, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl DocumentFieldStoredKey { | impl DocumentFieldStoredKey { | ||||||
|     fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey { |     fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey { | ||||||
|         DocumentFieldStoredKey { |         DocumentFieldStoredKey { | ||||||
|             docid: BEU64::new(docid.0), |             docid: BEU32::new(docid.0), | ||||||
|             field_id: BEU16::new(field_id.0), |             field_id: BEU16::new(field_id.0), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -98,7 +99,7 @@ impl<'a> BytesEncode<'a> for PostingsCodec { | |||||||
|  |  | ||||||
|         let mut buffer = Vec::with_capacity(u64_size + docids_size + matches_size); |         let mut buffer = Vec::with_capacity(u64_size + docids_size + matches_size); | ||||||
|  |  | ||||||
|         let docids_len = item.docids.len(); |         let docids_len = item.docids.len() as u64; | ||||||
|         buffer.extend_from_slice(&docids_len.to_be_bytes()); |         buffer.extend_from_slice(&docids_len.to_be_bytes()); | ||||||
|         buffer.extend_from_slice(item.docids.as_bytes()); |         buffer.extend_from_slice(item.docids.as_bytes()); | ||||||
|         buffer.extend_from_slice(item.matches.as_bytes()); |         buffer.extend_from_slice(item.matches.as_bytes()); | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ use heed::types::{OwnedType, CowSlice}; | |||||||
| use heed::Result as ZResult; | use heed::Result as ZResult; | ||||||
| use zerocopy::{AsBytes, FromBytes}; | use zerocopy::{AsBytes, FromBytes}; | ||||||
|  |  | ||||||
| use super::BEU64; | use super::{BEU64, BEU32}; | ||||||
| use crate::{DocumentId, Highlight}; | use crate::{DocumentId, Highlight}; | ||||||
| use crate::database::MainT; | use crate::database::MainT; | ||||||
|  |  | ||||||
| @@ -13,15 +13,15 @@ use crate::database::MainT; | |||||||
| pub struct PrefixKey { | pub struct PrefixKey { | ||||||
|     prefix: [u8; 4], |     prefix: [u8; 4], | ||||||
|     index: BEU64, |     index: BEU64, | ||||||
|     docid: BEU64, |     docid: BEU32, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl PrefixKey { | impl PrefixKey { | ||||||
|     pub fn new(prefix: [u8; 4], index: u64, docid: u64) -> PrefixKey { |     pub fn new(prefix: [u8; 4], index: u64, docid: u32) -> PrefixKey { | ||||||
|         PrefixKey { |         PrefixKey { | ||||||
|             prefix, |             prefix, | ||||||
|             index: BEU64::new(index), |             index: BEU64::new(index), | ||||||
|             docid: BEU64::new(docid), |             docid: BEU32::new(docid), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -54,7 +54,7 @@ impl PrefixDocumentsCache { | |||||||
|         prefix: [u8; 4], |         prefix: [u8; 4], | ||||||
|     ) -> ZResult<PrefixDocumentsIter<'txn>> { |     ) -> ZResult<PrefixDocumentsIter<'txn>> { | ||||||
|         let start = PrefixKey::new(prefix, 0, 0); |         let start = PrefixKey::new(prefix, 0, 0); | ||||||
|         let end = PrefixKey::new(prefix, u64::max_value(), u64::max_value()); |         let end = PrefixKey::new(prefix, u64::max_value(), u32::max_value()); | ||||||
|         let iter = self.prefix_documents_cache.range(reader, &(start..=end))?; |         let iter = self.prefix_documents_cache.range(reader, &(start..=end))?; | ||||||
|         Ok(PrefixDocumentsIter { iter }) |         Ok(PrefixDocumentsIter { iter }) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -242,7 +242,7 @@ pub fn apply_addition<'a, 'b>( | |||||||
|  |  | ||||||
|     index.main.put_schema(writer, &schema)?; |     index.main.put_schema(writer, &schema)?; | ||||||
|  |  | ||||||
|     let new_user_ids = fst::Map::from_iter(new_user_ids)?; |     let new_user_ids = fst::Map::from_iter(new_user_ids.iter().map(|(u, i)| (u, *i as u64)))?; | ||||||
|     let new_internal_ids = sdset::SetBuf::from_dirty(new_internal_ids); |     let new_internal_ids = sdset::SetBuf::from_dirty(new_internal_ids); | ||||||
|     index.main.merge_user_ids(writer, &new_user_ids)?; |     index.main.merge_user_ids(writer, &new_user_ids)?; | ||||||
|     index.main.merge_internal_ids(writer, &new_internal_ids)?; |     index.main.merge_internal_ids(writer, &new_internal_ids)?; | ||||||
|   | |||||||
| @@ -80,7 +80,7 @@ pub fn apply_documents_deletion( | |||||||
|         let user_ids = index.main.user_ids(writer)?; |         let user_ids = index.main.user_ids(writer)?; | ||||||
|         for userid in new_user_ids.as_slice() { |         for userid in new_user_ids.as_slice() { | ||||||
|             if let Some(id) = user_ids.get(userid) { |             if let Some(id) = user_ids.get(userid) { | ||||||
|                 internal_ids.push(DocumentId(id)); |                 internal_ids.push(DocumentId(id as u32)); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -105,7 +105,7 @@ pub fn discover_document_id( | |||||||
| { | { | ||||||
|     if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { |     if userid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { | ||||||
|         match user_ids.get(userid) { |         match user_ids.get(userid) { | ||||||
|             Some(internal_id) => Ok(DocumentId(internal_id)), |             Some(id) => Ok(DocumentId(id as u32)), | ||||||
|             None => { |             None => { | ||||||
|                 let internal_id = available_ids.next().expect("no more ids available"); |                 let internal_id = available_ids.next().expect("no more ids available"); | ||||||
|                 Ok(internal_id) |                 Ok(internal_id) | ||||||
|   | |||||||
| @@ -22,7 +22,7 @@ pub enum ResponseError { | |||||||
|     NotFound(String), |     NotFound(String), | ||||||
|     OpenIndex(String), |     OpenIndex(String), | ||||||
|     FilterParsing(String), |     FilterParsing(String), | ||||||
|     RetrieveDocument(u64, String), |     RetrieveDocument(u32, String), | ||||||
|     SearchDocuments(String), |     SearchDocuments(String), | ||||||
|     PayloadTooLarge, |     PayloadTooLarge, | ||||||
|     UnsupportedMediaType, |     UnsupportedMediaType, | ||||||
| @@ -116,7 +116,7 @@ impl ResponseError { | |||||||
|         ResponseError::Maintenance |         ResponseError::Maintenance | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn retrieve_document(doc_id: u64, err: impl fmt::Display) -> ResponseError { |     pub fn retrieve_document(doc_id: u32, err: impl fmt::Display) -> ResponseError { | ||||||
|         ResponseError::RetrieveDocument(doc_id, err.to_string()) |         ResponseError::RetrieveDocument(doc_id, err.to_string()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; | |||||||
| #[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] | #[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] | ||||||
| #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||||||
| #[repr(C)] | #[repr(C)] | ||||||
| pub struct DocumentId(pub u64); | pub struct DocumentId(pub u32); | ||||||
|  |  | ||||||
| /// This structure represent the position of a word | /// This structure represent the position of a word | ||||||
| /// in a document and its attributes. | /// in a document and its attributes. | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user