mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 04:56:28 +00:00 
			
		
		
		
	Create a new database on index and add a specialized codec for it
This commit is contained in:
		| @@ -8,6 +8,7 @@ mod roaring_bitmap_length; | |||||||
| mod str_beu32_codec; | mod str_beu32_codec; | ||||||
| mod str_ref; | mod str_ref; | ||||||
| mod str_str_u8_codec; | mod str_str_u8_codec; | ||||||
|  | mod script_language_codec; | ||||||
|  |  | ||||||
| pub use byte_slice_ref::ByteSliceRefCodec; | pub use byte_slice_ref::ByteSliceRefCodec; | ||||||
| pub use str_ref::StrRefCodec; | pub use str_ref::StrRefCodec; | ||||||
| @@ -21,3 +22,4 @@ pub use self::roaring_bitmap_length::{ | |||||||
| }; | }; | ||||||
| pub use self::str_beu32_codec::StrBEU32Codec; | pub use self::str_beu32_codec::StrBEU32Codec; | ||||||
| pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; | pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; | ||||||
|  | pub use self::script_language_codec::ScriptLanguageCodec; | ||||||
|   | |||||||
							
								
								
									
										43
									
								
								milli/src/heed_codec/script_language_codec.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								milli/src/heed_codec/script_language_codec.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,43 @@ | |||||||
|  | use std::borrow::Cow; | ||||||
|  |  | ||||||
|  | use std::mem::size_of; | ||||||
|  | use std::str; | ||||||
|  |  | ||||||
|  | use charabia::{Language, Script}; | ||||||
|  |  | ||||||
|  | pub struct ScriptLanguageCodec; | ||||||
|  |  | ||||||
|  | impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec { | ||||||
|  |     type DItem = (Script, Language); | ||||||
|  |  | ||||||
|  |     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||||
|  |         let footer_len = size_of::<u32>(); | ||||||
|  |  | ||||||
|  |         if bytes.len() < footer_len { | ||||||
|  |             return None; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let (script, bytes) = bytes.split_at(bytes.len() - footer_len); | ||||||
|  |         let script = str::from_utf8(script).ok()?; | ||||||
|  |         let script_name = Script::from_name(script); | ||||||
|  |         let lan = str::from_utf8(bytes).ok()?; | ||||||
|  |         let lan_name = Language::from_name(lan); | ||||||
|  |  | ||||||
|  |         Some((script_name, lan_name)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec { | ||||||
|  |     type EItem = (Script, Language); | ||||||
|  |  | ||||||
|  |     fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> { | ||||||
|  |         let script_name = script.name(); | ||||||
|  |         let lan_name = lan.name(); | ||||||
|  |  | ||||||
|  |         let mut bytes = Vec::with_capacity(script_name.len() + lan_name.len()); | ||||||
|  |         bytes.extend_from_slice(script_name.as_bytes()); | ||||||
|  |         bytes.extend_from_slice(lan_name.as_bytes()); | ||||||
|  |  | ||||||
|  |         Some(Cow::Owned(bytes)) | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -14,6 +14,7 @@ use time::OffsetDateTime; | |||||||
| use crate::error::{InternalError, UserError}; | use crate::error::{InternalError, UserError}; | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::fields_ids_map::FieldsIdsMap; | use crate::fields_ids_map::FieldsIdsMap; | ||||||
|  | use crate::heed_codec::ScriptLanguageCodec; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, |     FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, | ||||||
|     FieldIdCodec, OrderedF64Codec, |     FieldIdCodec, OrderedF64Codec, | ||||||
| @@ -83,6 +84,7 @@ pub mod db_name { | |||||||
|     pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; |     pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; | ||||||
|     pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; |     pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; | ||||||
|     pub const DOCUMENTS: &str = "documents"; |     pub const DOCUMENTS: &str = "documents"; | ||||||
|  |     pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids"; | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Clone)] | #[derive(Clone)] | ||||||
| @@ -122,6 +124,9 @@ pub struct Index { | |||||||
|     /// Maps the position of a word prefix with all the docids where this prefix appears. |     /// Maps the position of a word prefix with all the docids where this prefix appears. | ||||||
|     pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>, |     pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>, | ||||||
|  |  | ||||||
|  |     /// Maps the script and language with all the docids that corresponds to it.  | ||||||
|  |     pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>, | ||||||
|  |  | ||||||
|     /// Maps the facet field id and the docids for which this field exists |     /// Maps the facet field id and the docids for which this field exists | ||||||
|     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, |     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, | ||||||
|  |  | ||||||
| @@ -159,6 +164,7 @@ impl Index { | |||||||
|         let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?; |         let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?; | ||||||
|         let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?; |         let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?; | ||||||
|         let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?; |         let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?; | ||||||
|  |         let script_language_docids = env.create_database(Some(SCRIPT_LANGUAGE_DOCIDS))?; | ||||||
|         let word_prefix_pair_proximity_docids = |         let word_prefix_pair_proximity_docids = | ||||||
|             env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?; |             env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?; | ||||||
|         let prefix_word_pair_proximity_docids = |         let prefix_word_pair_proximity_docids = | ||||||
| @@ -186,6 +192,7 @@ impl Index { | |||||||
|             exact_word_prefix_docids, |             exact_word_prefix_docids, | ||||||
|             docid_word_positions, |             docid_word_positions, | ||||||
|             word_pair_proximity_docids, |             word_pair_proximity_docids, | ||||||
|  |             script_language_docids, | ||||||
|             word_prefix_pair_proximity_docids, |             word_prefix_pair_proximity_docids, | ||||||
|             prefix_word_pair_proximity_docids, |             prefix_word_pair_proximity_docids, | ||||||
|             word_position_docids, |             word_position_docids, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user