mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Rename user ids into external docids
This commit is contained in:
		| @@ -18,8 +18,9 @@ const ATTRIBUTES_FOR_FACETING_KEY: &str = "attributes-for-faceting"; | ||||
| const CREATED_AT_KEY: &str = "created-at"; | ||||
| const CUSTOMS_KEY: &str = "customs"; | ||||
| const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute"; | ||||
| const EXTERNAL_DOCIDS_KEY: &str = "external-docids"; | ||||
| const FIELDS_FREQUENCY_KEY: &str = "fields-frequency"; | ||||
| const INTERNAL_IDS_KEY: &str = "internal-ids"; | ||||
| const INTERNAL_DOCIDS_KEY: &str = "internal-docids"; | ||||
| const NAME_KEY: &str = "name"; | ||||
| const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents"; | ||||
| const RANKED_MAP_KEY: &str = "ranked-map"; | ||||
| @@ -28,7 +29,6 @@ const SCHEMA_KEY: &str = "schema"; | ||||
| const STOP_WORDS_KEY: &str = "stop-words"; | ||||
| const SYNONYMS_KEY: &str = "synonyms"; | ||||
| const UPDATED_AT_KEY: &str = "updated-at"; | ||||
| const USER_IDS_KEY: &str = "user-ids"; | ||||
| const WORDS_KEY: &str = "words"; | ||||
|  | ||||
| pub type FreqsMap = HashMap<String, usize>; | ||||
| @@ -74,73 +74,73 @@ impl Main { | ||||
|         self.main.get::<_, Str, SerdeDatetime>(reader, UPDATED_AT_KEY) | ||||
|     } | ||||
|  | ||||
|     pub fn put_internal_ids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> ZResult<()> { | ||||
|         self.main.put::<_, Str, DocumentsIds>(writer, INTERNAL_IDS_KEY, ids) | ||||
|     pub fn put_internal_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> ZResult<()> { | ||||
|         self.main.put::<_, Str, DocumentsIds>(writer, INTERNAL_DOCIDS_KEY, ids) | ||||
|     } | ||||
|  | ||||
|     pub fn internal_ids<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> ZResult<Cow<'txn, sdset::Set<DocumentId>>> { | ||||
|         match self.main.get::<_, Str, DocumentsIds>(reader, INTERNAL_IDS_KEY)? { | ||||
|     pub fn internal_docids<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> ZResult<Cow<'txn, sdset::Set<DocumentId>>> { | ||||
|         match self.main.get::<_, Str, DocumentsIds>(reader, INTERNAL_DOCIDS_KEY)? { | ||||
|             Some(ids) => Ok(ids), | ||||
|             None => Ok(Cow::default()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn merge_internal_ids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &sdset::Set<DocumentId>) -> ZResult<()> { | ||||
|     pub fn merge_internal_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &sdset::Set<DocumentId>) -> ZResult<()> { | ||||
|         use sdset::SetOperation; | ||||
|  | ||||
|         // We do an union of the old and new internal ids. | ||||
|         let internal_ids = self.internal_ids(writer)?; | ||||
|         let internal_ids = sdset::duo::Union::new(&internal_ids, new_ids).into_set_buf(); | ||||
|         self.put_internal_ids(writer, &internal_ids) | ||||
|         let internal_docids = self.internal_docids(writer)?; | ||||
|         let internal_docids = sdset::duo::Union::new(&internal_docids, new_ids).into_set_buf(); | ||||
|         self.put_internal_docids(writer, &internal_docids) | ||||
|     } | ||||
|  | ||||
|     pub fn remove_internal_ids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> ZResult<()> { | ||||
|     pub fn remove_internal_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> ZResult<()> { | ||||
|         use sdset::SetOperation; | ||||
|  | ||||
|         // We do a difference of the old and new internal ids. | ||||
|         let internal_ids = self.internal_ids(writer)?; | ||||
|         let internal_ids = sdset::duo::Difference::new(&internal_ids, ids).into_set_buf(); | ||||
|         self.put_internal_ids(writer, &internal_ids) | ||||
|         let internal_docids = self.internal_docids(writer)?; | ||||
|         let internal_docids = sdset::duo::Difference::new(&internal_docids, ids).into_set_buf(); | ||||
|         self.put_internal_docids(writer, &internal_docids) | ||||
|     } | ||||
|  | ||||
|     pub fn put_user_ids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> { | ||||
|         self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, ids.as_fst().as_bytes()) | ||||
|     pub fn put_external_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> { | ||||
|         self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes()) | ||||
|     } | ||||
|  | ||||
|     pub fn merge_user_ids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &fst::Map) -> ZResult<()> { | ||||
|     pub fn merge_external_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &fst::Map) -> ZResult<()> { | ||||
|         use fst::{Streamer, IntoStreamer}; | ||||
|  | ||||
|         // Do an union of the old and the new set of user ids. | ||||
|         let user_ids = self.user_ids(writer)?; | ||||
|         let mut op = user_ids.op().add(new_ids.into_stream()).r#union(); | ||||
|         let external_docids = self.external_docids(writer)?; | ||||
|         let mut op = external_docids.op().add(new_ids.into_stream()).r#union(); | ||||
|         let mut build = fst::MapBuilder::memory(); | ||||
|         while let Some((userid, values)) = op.next() { | ||||
|             build.insert(userid, values[0].value).unwrap(); | ||||
|         } | ||||
|         let user_ids = build.into_inner().unwrap(); | ||||
|         let external_docids = build.into_inner().unwrap(); | ||||
|  | ||||
|         // TODO prefer using self.put_user_ids | ||||
|         self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, user_ids.as_slice()) | ||||
|         self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, external_docids.as_slice()) | ||||
|     } | ||||
|  | ||||
|     pub fn remove_user_ids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> { | ||||
|     pub fn remove_external_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map) -> ZResult<()> { | ||||
|         use fst::{Streamer, IntoStreamer}; | ||||
|  | ||||
|         // Do an union of the old and the new set of user ids. | ||||
|         let user_ids = self.user_ids(writer)?; | ||||
|         let mut op = user_ids.op().add(ids.into_stream()).difference(); | ||||
|         let external_docids = self.external_docids(writer)?; | ||||
|         let mut op = external_docids.op().add(ids.into_stream()).difference(); | ||||
|         let mut build = fst::MapBuilder::memory(); | ||||
|         while let Some((userid, values)) = op.next() { | ||||
|             build.insert(userid, values[0].value).unwrap(); | ||||
|         } | ||||
|         let user_ids = build.into_inner().unwrap(); | ||||
|         let external_docids = build.into_inner().unwrap(); | ||||
|  | ||||
|         // TODO prefer using self.put_user_ids | ||||
|         self.main.put::<_, Str, ByteSlice>(writer, USER_IDS_KEY, user_ids.as_slice()) | ||||
|         // TODO prefer using self.put_external_docids | ||||
|         self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, external_docids.as_slice()) | ||||
|     } | ||||
|  | ||||
|     pub fn user_ids(self, reader: &heed::RoTxn<MainT>) -> ZResult<fst::Map> { | ||||
|         match self.main.get::<_, Str, ByteSlice>(reader, USER_IDS_KEY)? { | ||||
|     pub fn external_docids(self, reader: &heed::RoTxn<MainT>) -> ZResult<fst::Map> { | ||||
|         match self.main.get::<_, Str, ByteSlice>(reader, EXTERNAL_DOCIDS_KEY)? { | ||||
|             Some(bytes) => { | ||||
|                 let len = bytes.len(); | ||||
|                 let bytes = Arc::new(bytes.to_owned()); | ||||
| @@ -151,9 +151,9 @@ impl Main { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn user_to_internal_id(self, reader: &heed::RoTxn<MainT>, userid: &str) -> ZResult<Option<DocumentId>> { | ||||
|         let user_ids = self.user_ids(reader)?; | ||||
|         Ok(user_ids.get(userid).map(|id| DocumentId(id as u32))) | ||||
|     pub fn external_to_internal_docid(self, reader: &heed::RoTxn<MainT>, external_docid: &str) -> ZResult<Option<DocumentId>> { | ||||
|         let external_ids = self.external_docids(reader)?; | ||||
|         Ok(external_ids.get(external_docid).map(|id| DocumentId(id as u32))) | ||||
|     } | ||||
|  | ||||
|     pub fn put_words_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> { | ||||
|   | ||||
| @@ -7,8 +7,8 @@ pub fn apply_clear_all( | ||||
|     index: &store::Index, | ||||
| ) -> MResult<()> { | ||||
|     index.main.put_words_fst(writer, &fst::Set::default())?; | ||||
|     index.main.put_user_ids(writer, &fst::Map::default())?; | ||||
|     index.main.put_internal_ids(writer, &sdset::SetBuf::default())?; | ||||
|     index.main.put_external_docids(writer, &fst::Map::default())?; | ||||
|     index.main.put_internal_docids(writer, &sdset::SetBuf::default())?; | ||||
|     index.main.put_ranked_map(writer, &RankedMap::default())?; | ||||
|     index.main.put_number_of_documents(writer, |_| 0)?; | ||||
|     index.documents_fields.clear(writer)?; | ||||
|   | ||||
| @@ -150,8 +150,8 @@ pub fn apply_addition<'a, 'b>( | ||||
|     partial: bool | ||||
| ) -> MResult<()> { | ||||
|     let mut documents_additions = HashMap::new(); | ||||
|     let mut new_user_ids = BTreeMap::new(); | ||||
|     let mut new_internal_ids = Vec::with_capacity(new_documents.len()); | ||||
|     let mut new_external_docids = BTreeMap::new(); | ||||
|     let mut new_internal_docids = Vec::with_capacity(new_documents.len()); | ||||
|  | ||||
|     let mut schema = match index.main.schema(writer)? { | ||||
|         Some(schema) => schema, | ||||
| @@ -159,17 +159,17 @@ pub fn apply_addition<'a, 'b>( | ||||
|     }; | ||||
|  | ||||
|     // Retrieve the documents ids related structures | ||||
|     let user_ids = index.main.user_ids(writer)?; | ||||
|     let internal_ids = index.main.internal_ids(writer)?; | ||||
|     let mut available_ids = DiscoverIds::new(&internal_ids); | ||||
|     let external_docids = index.main.external_docids(writer)?; | ||||
|     let internal_docids = index.main.internal_docids(writer)?; | ||||
|     let mut available_ids = DiscoverIds::new(&internal_docids); | ||||
|  | ||||
|     let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; | ||||
|  | ||||
|     // 1. store documents ids for future deletion | ||||
|     for mut document in new_documents { | ||||
|         let (document_id, userid) = extract_document_id(&primary_key, &document, &user_ids, &mut available_ids)?; | ||||
|         new_user_ids.insert(userid, document_id.0); | ||||
|         new_internal_ids.push(document_id); | ||||
|         let (document_id, userid) = extract_document_id(&primary_key, &document, &external_docids, &mut available_ids)?; | ||||
|         new_external_docids.insert(userid, document_id.0); | ||||
|         new_internal_docids.push(document_id); | ||||
|  | ||||
|         if partial { | ||||
|             let mut deserializer = Deserializer { | ||||
| @@ -192,7 +192,7 @@ pub fn apply_addition<'a, 'b>( | ||||
|  | ||||
|     // 2. remove the documents postings lists | ||||
|     let number_of_inserted_documents = documents_additions.len(); | ||||
|     let documents_ids = new_user_ids.iter().map(|(userid, _)| userid.clone()).collect(); | ||||
|     let documents_ids = new_external_docids.iter().map(|(id, _)| id.clone()).collect(); | ||||
|     apply_documents_deletion(writer, index, documents_ids)?; | ||||
|  | ||||
|     let mut ranked_map = match index.main.ranked_map(writer)? { | ||||
| @@ -242,10 +242,10 @@ pub fn apply_addition<'a, 'b>( | ||||
|  | ||||
|     index.main.put_schema(writer, &schema)?; | ||||
|  | ||||
|     let new_user_ids = fst::Map::from_iter(new_user_ids.iter().map(|(u, i)| (u, *i as u64)))?; | ||||
|     let new_internal_ids = sdset::SetBuf::from_dirty(new_internal_ids); | ||||
|     index.main.merge_user_ids(writer, &new_user_ids)?; | ||||
|     index.main.merge_internal_ids(writer, &new_internal_ids)?; | ||||
|     let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(u, i)| (u, *i as u64)))?; | ||||
|     let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids); | ||||
|     index.main.merge_external_docids(writer, &new_external_docids)?; | ||||
|     index.main.merge_internal_docids(writer, &new_internal_docids)?; | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|   | ||||
| @@ -31,7 +31,7 @@ impl DocumentsDeletion { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn delete_document_by_user_id(&mut self, document_id: String) { | ||||
|     pub fn delete_document_by_external_docid(&mut self, document_id: String) { | ||||
|         self.documents.push(document_id); | ||||
|     } | ||||
|  | ||||
| @@ -73,19 +73,19 @@ pub fn apply_documents_deletion( | ||||
|     deletion: Vec<String>, | ||||
| ) -> MResult<()> | ||||
| { | ||||
|     let (user_ids, internal_ids) = { | ||||
|         let new_user_ids = SetBuf::from_dirty(deletion); | ||||
|         let mut internal_ids = Vec::new(); | ||||
|     let (external_docids, internal_docids) = { | ||||
|         let new_external_docids = SetBuf::from_dirty(deletion); | ||||
|         let mut internal_docids = Vec::new(); | ||||
|  | ||||
|         let user_ids = index.main.user_ids(writer)?; | ||||
|         for userid in new_user_ids.as_slice() { | ||||
|         let user_ids = index.main.external_docids(writer)?; | ||||
|         for userid in new_external_docids.as_slice() { | ||||
|             if let Some(id) = user_ids.get(userid) { | ||||
|                 internal_ids.push(DocumentId(id as u32)); | ||||
|                 internal_docids.push(DocumentId(id as u32)); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let new_user_ids = fst::Map::from_iter(new_user_ids.into_iter().map(|k| (k, 0))).unwrap(); | ||||
|         (new_user_ids, SetBuf::from_dirty(internal_ids)) | ||||
|         let new_external_docids = fst::Map::from_iter(new_external_docids.into_iter().map(|k| (k, 0))).unwrap(); | ||||
|         (new_external_docids, SetBuf::from_dirty(internal_docids)) | ||||
|     }; | ||||
|  | ||||
|     let schema = match index.main.schema(writer)? { | ||||
| @@ -100,7 +100,7 @@ pub fn apply_documents_deletion( | ||||
|  | ||||
|     // facet filters deletion | ||||
|     if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? { | ||||
|         let facet_map = facets::facet_map_from_docids(writer, &index, &internal_ids, &attributes_for_facetting)?; | ||||
|         let facet_map = facets::facet_map_from_docids(writer, &index, &internal_docids, &attributes_for_facetting)?; | ||||
|         index.facets.remove(writer, facet_map)?; | ||||
|     } | ||||
|  | ||||
| @@ -108,7 +108,7 @@ pub fn apply_documents_deletion( | ||||
|     let ranked_fields = schema.ranked(); | ||||
|  | ||||
|     let mut words_document_ids = HashMap::new(); | ||||
|     for id in internal_ids.iter().cloned() { | ||||
|     for id in internal_docids.iter().cloned() { | ||||
|         // remove all the ranked attributes from the ranked_map | ||||
|         for ranked_attr in ranked_fields { | ||||
|             ranked_map.remove(id, *ranked_attr); | ||||
| @@ -179,8 +179,8 @@ pub fn apply_documents_deletion( | ||||
|     index.main.put_number_of_documents(writer, |old| old - deleted_documents_len)?; | ||||
|  | ||||
|     // We apply the changes to the user and internal ids | ||||
|     index.main.remove_user_ids(writer, &user_ids)?; | ||||
|     index.main.remove_internal_ids(writer, &internal_ids)?; | ||||
|     index.main.remove_external_docids(writer, &external_docids)?; | ||||
|     index.main.remove_internal_docids(writer, &internal_docids)?; | ||||
|  | ||||
|     compute_short_prefixes(writer, index)?; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user