| 
							
							
							
						 |  |  | @@ -1,14 +1,19 @@ | 
		
	
		
			
				|  |  |  |  | use std::cell::RefCell; | 
		
	
		
			
				|  |  |  |  | use std::marker::PhantomData; | 
		
	
		
			
				|  |  |  |  | use std::sync::atomic::{AtomicUsize, Ordering}; | 
		
	
		
			
				|  |  |  |  | use std::num::NonZeroU16; | 
		
	
		
			
				|  |  |  |  | use std::{mem, slice}; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | use bbqueue::framed::{FrameGrantR, FrameProducer}; | 
		
	
		
			
				|  |  |  |  | use bytemuck::{NoUninit, CheckedBitPattern}; | 
		
	
		
			
				|  |  |  |  | use crossbeam::sync::{Parker, Unparker}; | 
		
	
		
			
				|  |  |  |  | use crossbeam_channel::{IntoIter, Receiver, SendError, Sender}; | 
		
	
		
			
				|  |  |  |  | use crossbeam_channel::{IntoIter, Receiver, SendError}; | 
		
	
		
			
				|  |  |  |  | use heed::types::Bytes; | 
		
	
		
			
				|  |  |  |  | use heed::BytesDecode; | 
		
	
		
			
				|  |  |  |  | use memmap2::Mmap; | 
		
	
		
			
				|  |  |  |  | use roaring::RoaringBitmap; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | use super::extract::FacetKind; | 
		
	
		
			
				|  |  |  |  | use super::ref_cell_ext::RefCellExt; | 
		
	
		
			
				|  |  |  |  | use super::thread_local::{FullySend, ThreadLocal}; | 
		
	
		
			
				|  |  |  |  | use super::StdResult; | 
		
	
		
			
				|  |  |  |  | use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec}; | 
		
	
	
		
			
				
					
					|  |  |  | @@ -16,7 +21,7 @@ use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY}; | 
		
	
		
			
				|  |  |  |  | use crate::index::{db_name, IndexEmbeddingConfig}; | 
		
	
		
			
				|  |  |  |  | use crate::update::new::KvReaderFieldId; | 
		
	
		
			
				|  |  |  |  | use crate::vector::Embedding; | 
		
	
		
			
				|  |  |  |  | use crate::{DocumentId, Index}; | 
		
	
		
			
				|  |  |  |  | use crate::{CboRoaringBitmapCodec, DocumentId, Index}; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | /// Creates a tuple of producer/receivers to be used by | 
		
	
		
			
				|  |  |  |  | /// the extractors and the writer loop. | 
		
	
	
		
			
				
					
					|  |  |  | @@ -26,125 +31,97 @@ use crate::{DocumentId, Index}; | 
		
	
		
			
				|  |  |  |  | /// Panics if the number of provided bbqueue is not exactly equal | 
		
	
		
			
				|  |  |  |  | /// to the number of available threads in the rayon threadpool. | 
		
	
		
			
				|  |  |  |  | pub fn extractor_writer_bbqueue( | 
		
	
		
			
				|  |  |  |  |     bbqueue: &[bbqueue::BBBuffer], | 
		
	
		
			
				|  |  |  |  |     bbbuffers: &[bbqueue::BBBuffer], | 
		
	
		
			
				|  |  |  |  | ) -> (ExtractorBbqueueSender, WriterBbqueueReceiver) { | 
		
	
		
			
				|  |  |  |  |     assert_eq!( | 
		
	
		
			
				|  |  |  |  |         bbqueue.len(), | 
		
	
		
			
				|  |  |  |  |         bbbuffers.len(), | 
		
	
		
			
				|  |  |  |  |         rayon::current_num_threads(), | 
		
	
		
			
				|  |  |  |  |         "You must provide as many BBBuffer as the available number of threads to extract" | 
		
	
		
			
				|  |  |  |  |     ); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     let capacity = bbbuffers.first().unwrap().capacity(); | 
		
	
		
			
				|  |  |  |  |     let parker = Parker::new(); | 
		
	
		
			
				|  |  |  |  |     let extractors = ThreadLocal::with_capacity(bbqueue.len()); | 
		
	
		
			
				|  |  |  |  |     let extractors = ThreadLocal::with_capacity(bbbuffers.len()); | 
		
	
		
			
				|  |  |  |  |     let producers = rayon::broadcast(|bi| { | 
		
	
		
			
				|  |  |  |  |         let bbqueue = &bbqueue[bi.index()]; | 
		
	
		
			
				|  |  |  |  |         let bbqueue = &bbbuffers[bi.index()]; | 
		
	
		
			
				|  |  |  |  |         let (producer, consumer) = bbqueue.try_split_framed().unwrap(); | 
		
	
		
			
				|  |  |  |  |         extractors.get_or(|| FullySend(producer)); | 
		
	
		
			
				|  |  |  |  |         extractors.get_or(|| FullySend(RefCell::new(producer))); | 
		
	
		
			
				|  |  |  |  |         consumer | 
		
	
		
			
				|  |  |  |  |     }); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     ( | 
		
	
		
			
				|  |  |  |  |         ExtractorBbqueueSender { inner: extractors, unparker: parker.unparker().clone() }, | 
		
	
		
			
				|  |  |  |  |         ExtractorBbqueueSender { | 
		
	
		
			
				|  |  |  |  |             inner: extractors, | 
		
	
		
			
				|  |  |  |  |             capacity: capacity.checked_sub(9).unwrap(), | 
		
	
		
			
				|  |  |  |  |             unparker: parker.unparker().clone(), | 
		
	
		
			
				|  |  |  |  |         }, | 
		
	
		
			
				|  |  |  |  |         WriterBbqueueReceiver { inner: producers, parker }, | 
		
	
		
			
				|  |  |  |  |     ) | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct ExtractorBbqueueSender<'a> { | 
		
	
		
			
				|  |  |  |  |     inner: ThreadLocal<FullySend<bbqueue::framed::FrameProducer<'a>>>, | 
		
	
		
			
				|  |  |  |  |     /// Used to wake up the receiver thread, | 
		
	
		
			
				|  |  |  |  |     /// Used everytime we write something in the producer. | 
		
	
		
			
				|  |  |  |  |     unparker: Unparker, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct WriterBbqueueReceiver<'a> { | 
		
	
		
			
				|  |  |  |  |     inner: Vec<bbqueue::framed::FrameConsumer<'a>>, | 
		
	
		
			
				|  |  |  |  |     /// Used to park when no more work is required | 
		
	
		
			
				|  |  |  |  |     parker: Parker, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | /// The capacity of the channel is currently in number of messages. | 
		
	
		
			
				|  |  |  |  | pub fn extractor_writer_channel(cap: usize) -> (ExtractorSender, WriterReceiver) { | 
		
	
		
			
				|  |  |  |  |     let (sender, receiver) = crossbeam_channel::bounded(cap); | 
		
	
		
			
				|  |  |  |  |     ( | 
		
	
		
			
				|  |  |  |  |         ExtractorSender { | 
		
	
		
			
				|  |  |  |  |             sender, | 
		
	
		
			
				|  |  |  |  |             send_count: Default::default(), | 
		
	
		
			
				|  |  |  |  |             writer_contentious_count: Default::default(), | 
		
	
		
			
				|  |  |  |  |             extractor_contentious_count: Default::default(), | 
		
	
		
			
				|  |  |  |  |         }, | 
		
	
		
			
				|  |  |  |  |         WriterReceiver(receiver), | 
		
	
		
			
				|  |  |  |  |     ) | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub enum KeyValueEntry { | 
		
	
		
			
				|  |  |  |  |     Small { key_length: usize, data: Box<[u8]> }, | 
		
	
		
			
				|  |  |  |  |     Large { key_entry: KeyEntry, data: Mmap }, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl KeyValueEntry { | 
		
	
		
			
				|  |  |  |  |     pub fn from_small_key_value(key: &[u8], value: &[u8]) -> Self { | 
		
	
		
			
				|  |  |  |  |         let mut data = Vec::with_capacity(key.len() + value.len()); | 
		
	
		
			
				|  |  |  |  |         data.extend_from_slice(key); | 
		
	
		
			
				|  |  |  |  |         data.extend_from_slice(value); | 
		
	
		
			
				|  |  |  |  |         KeyValueEntry::Small { key_length: key.len(), data: data.into_boxed_slice() } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     fn from_large_key_value(key: &[u8], value: Mmap) -> Self { | 
		
	
		
			
				|  |  |  |  |         KeyValueEntry::Large { key_entry: KeyEntry::from_key(key), data: value } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn key(&self) -> &[u8] { | 
		
	
		
			
				|  |  |  |  |         match self { | 
		
	
		
			
				|  |  |  |  |             KeyValueEntry::Small { key_length, data } => &data[..*key_length], | 
		
	
		
			
				|  |  |  |  |             KeyValueEntry::Large { key_entry, data: _ } => key_entry.entry(), | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn value(&self) -> &[u8] { | 
		
	
		
			
				|  |  |  |  |         match self { | 
		
	
		
			
				|  |  |  |  |             KeyValueEntry::Small { key_length, data } => &data[*key_length..], | 
		
	
		
			
				|  |  |  |  |             KeyValueEntry::Large { key_entry: _, data } => &data[..], | 
		
	
		
			
				|  |  |  |  | impl<'a> WriterBbqueueReceiver<'a> { | 
		
	
		
			
				|  |  |  |  |     pub fn read(&mut self) -> Option<FrameWithHeader<'a>> { | 
		
	
		
			
				|  |  |  |  |         loop { | 
		
	
		
			
				|  |  |  |  |             for consumer in &mut self.inner { | 
		
	
		
			
				|  |  |  |  |                 // mark the frame as auto release | 
		
	
		
			
				|  |  |  |  |                 if let Some() = consumer.read() | 
		
	
		
			
				|  |  |  |  |             } | 
		
	
		
			
				|  |  |  |  |             break None; | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct KeyEntry { | 
		
	
		
			
				|  |  |  |  |     data: Box<[u8]>, | 
		
	
		
			
				|  |  |  |  | struct FrameWithHeader<'a> { | 
		
	
		
			
				|  |  |  |  |     header: EntryHeader, | 
		
	
		
			
				|  |  |  |  |     frame: FrameGrantR<'a>, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl KeyEntry { | 
		
	
		
			
				|  |  |  |  |     pub fn from_key(key: &[u8]) -> Self { | 
		
	
		
			
				|  |  |  |  |         KeyEntry { data: key.to_vec().into_boxed_slice() } | 
		
	
		
			
				|  |  |  |  | #[derive(Debug, Clone, Copy, CheckedBitPattern)] | 
		
	
		
			
				|  |  |  |  | #[repr(u8)] | 
		
	
		
			
				|  |  |  |  | enum EntryHeader { | 
		
	
		
			
				|  |  |  |  |     /// Wether a put of the key/value pair or a delete of the given key. | 
		
	
		
			
				|  |  |  |  |     DbOperation { | 
		
	
		
			
				|  |  |  |  |         /// The database on which to perform the operation. | 
		
	
		
			
				|  |  |  |  |         database: Database, | 
		
	
		
			
				|  |  |  |  |         /// The key length in the buffer. | 
		
	
		
			
				|  |  |  |  |         /// | 
		
	
		
			
				|  |  |  |  |         /// If None it means that the buffer is dedicated | 
		
	
		
			
				|  |  |  |  |         /// to the key and it is therefore a deletion operation. | 
		
	
		
			
				|  |  |  |  |         key_length: Option<NonZeroU16>, | 
		
	
		
			
				|  |  |  |  |     }, | 
		
	
		
			
				|  |  |  |  |     ArroyDeleteVector { | 
		
	
		
			
				|  |  |  |  |         docid: DocumentId, | 
		
	
		
			
				|  |  |  |  |     }, | 
		
	
		
			
				|  |  |  |  |     /// The embedding is the remaining space and represents a non-aligned [f32]. | 
		
	
		
			
				|  |  |  |  |     ArroySetVector { | 
		
	
		
			
				|  |  |  |  |         docid: DocumentId, | 
		
	
		
			
				|  |  |  |  |         embedder_id: u8, | 
		
	
		
			
				|  |  |  |  |     }, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl EntryHeader { | 
		
	
		
			
				|  |  |  |  |     fn delete_key_size(key_length: u16) -> usize { | 
		
	
		
			
				|  |  |  |  |         mem::size_of::<Self>() + key_length as usize | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn entry(&self) -> &[u8] { | 
		
	
		
			
				|  |  |  |  |         self.data.as_ref() | 
		
	
		
			
				|  |  |  |  |     fn put_key_value_size(key_length: u16, value_length: usize) -> usize { | 
		
	
		
			
				|  |  |  |  |         mem::size_of::<Self>() + key_length as usize + value_length | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     fn bytes_of(&self) -> &[u8] { | 
		
	
		
			
				|  |  |  |  |         /// TODO do the variant matching ourselves | 
		
	
		
			
				|  |  |  |  |         todo!() | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub enum EntryOperation { | 
		
	
		
			
				|  |  |  |  |     Delete(KeyEntry), | 
		
	
		
			
				|  |  |  |  |     Write(KeyValueEntry), | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub enum WriterOperation { | 
		
	
		
			
				|  |  |  |  |     DbOperation(DbOperation), | 
		
	
		
			
				|  |  |  |  |     ArroyOperation(ArroyOperation), | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub enum ArroyOperation { | 
		
	
		
			
				|  |  |  |  |     DeleteVectors { docid: DocumentId }, | 
		
	
		
			
				|  |  |  |  |     SetVectors { docid: DocumentId, embedder_id: u8, embeddings: Vec<Embedding> }, | 
		
	
		
			
				|  |  |  |  |     SetVector { docid: DocumentId, embedder_id: u8, embedding: Embedding }, | 
		
	
		
			
				|  |  |  |  |     Finish { configs: Vec<IndexEmbeddingConfig> }, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct DbOperation { | 
		
	
		
			
				|  |  |  |  |     database: Database, | 
		
	
		
			
				|  |  |  |  |     entry: EntryOperation, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | #[derive(Debug)] | 
		
	
		
			
				|  |  |  |  | #[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)] | 
		
	
		
			
				|  |  |  |  | #[repr(u32)] | 
		
	
		
			
				|  |  |  |  | pub enum Database { | 
		
	
		
			
				|  |  |  |  |     Main, | 
		
	
		
			
				|  |  |  |  |     Documents, | 
		
	
	
		
			
				
					
					|  |  |  | @@ -220,82 +197,46 @@ impl From<FacetKind> for Database { | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl DbOperation { | 
		
	
		
			
				|  |  |  |  |     pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> { | 
		
	
		
			
				|  |  |  |  |         self.database.database(index) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn database_name(&self) -> &'static str { | 
		
	
		
			
				|  |  |  |  |         self.database.database_name() | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn entry(self) -> EntryOperation { | 
		
	
		
			
				|  |  |  |  |         self.entry | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | pub struct ExtractorBbqueueSender<'a> { | 
		
	
		
			
				|  |  |  |  |     inner: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>, | 
		
	
		
			
				|  |  |  |  |     /// The capacity of this frame producer, will never be able to store more than that. | 
		
	
		
			
				|  |  |  |  |     /// | 
		
	
		
			
				|  |  |  |  |     /// Note that the FrameProducer requires up to 9 bytes to encode the length, | 
		
	
		
			
				|  |  |  |  |     /// the capacity has been shrinked accordingly. | 
		
	
		
			
				|  |  |  |  |     /// | 
		
	
		
			
				|  |  |  |  |     /// <https://docs.rs/bbqueue/latest/bbqueue/framed/index.html#frame-header> | 
		
	
		
			
				|  |  |  |  |     capacity: usize, | 
		
	
		
			
				|  |  |  |  |     /// Used to wake up the receiver thread, | 
		
	
		
			
				|  |  |  |  |     /// Used everytime we write something in the producer. | 
		
	
		
			
				|  |  |  |  |     unparker: Unparker, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct WriterReceiver(Receiver<WriterOperation>); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl IntoIterator for WriterReceiver { | 
		
	
		
			
				|  |  |  |  |     type Item = WriterOperation; | 
		
	
		
			
				|  |  |  |  |     type IntoIter = IntoIter<Self::Item>; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     fn into_iter(self) -> Self::IntoIter { | 
		
	
		
			
				|  |  |  |  |         self.0.into_iter() | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct ExtractorSender { | 
		
	
		
			
				|  |  |  |  |     sender: Sender<WriterOperation>, | 
		
	
		
			
				|  |  |  |  |     /// The number of message we sent in total in the channel. | 
		
	
		
			
				|  |  |  |  |     send_count: AtomicUsize, | 
		
	
		
			
				|  |  |  |  |     /// The number of times we sent something in a channel that was full. | 
		
	
		
			
				|  |  |  |  |     writer_contentious_count: AtomicUsize, | 
		
	
		
			
				|  |  |  |  |     /// The number of times we sent something in a channel that was empty. | 
		
	
		
			
				|  |  |  |  |     extractor_contentious_count: AtomicUsize, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl Drop for ExtractorSender { | 
		
	
		
			
				|  |  |  |  |     fn drop(&mut self) { | 
		
	
		
			
				|  |  |  |  |         let send_count = *self.send_count.get_mut(); | 
		
	
		
			
				|  |  |  |  |         let writer_contentious_count = *self.writer_contentious_count.get_mut(); | 
		
	
		
			
				|  |  |  |  |         let extractor_contentious_count = *self.extractor_contentious_count.get_mut(); | 
		
	
		
			
				|  |  |  |  |         tracing::debug!( | 
		
	
		
			
				|  |  |  |  |             "Extractor channel stats: {send_count} sends, \ | 
		
	
		
			
				|  |  |  |  |             {writer_contentious_count} writer contentions ({}%), \ | 
		
	
		
			
				|  |  |  |  |             {extractor_contentious_count} extractor contentions ({}%)", | 
		
	
		
			
				|  |  |  |  |             (writer_contentious_count as f32 / send_count as f32) * 100.0, | 
		
	
		
			
				|  |  |  |  |             (extractor_contentious_count as f32 / send_count as f32) * 100.0 | 
		
	
		
			
				|  |  |  |  |         ) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl ExtractorSender { | 
		
	
		
			
				|  |  |  |  |     pub fn docids<D: DatabaseType>(&self) -> WordDocidsSender<'_, D> { | 
		
	
		
			
				|  |  |  |  | impl<'b> ExtractorBbqueueSender<'b> { | 
		
	
		
			
				|  |  |  |  |     pub fn docids<'a, D: DatabaseType>(&'a self) -> WordDocidsSender<'a, 'b, D> { | 
		
	
		
			
				|  |  |  |  |         WordDocidsSender { sender: self, _marker: PhantomData } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn facet_docids(&self) -> FacetDocidsSender<'_> { | 
		
	
		
			
				|  |  |  |  |     pub fn facet_docids<'a>(&'a self) -> FacetDocidsSender<'a, 'b> { | 
		
	
		
			
				|  |  |  |  |         FacetDocidsSender { sender: self } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn field_id_docid_facet_sender(&self) -> FieldIdDocidFacetSender<'_> { | 
		
	
		
			
				|  |  |  |  |         FieldIdDocidFacetSender(self) | 
		
	
		
			
				|  |  |  |  |     pub fn field_id_docid_facet_sender<'a>(&'a self) -> FieldIdDocidFacetSender<'a, 'b> { | 
		
	
		
			
				|  |  |  |  |         FieldIdDocidFacetSender(&self) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn documents(&self) -> DocumentsSender<'_> { | 
		
	
		
			
				|  |  |  |  |         DocumentsSender(self) | 
		
	
		
			
				|  |  |  |  |     pub fn documents<'a>(&'a self) -> DocumentsSender<'a, 'b> { | 
		
	
		
			
				|  |  |  |  |         DocumentsSender(&self) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn embeddings(&self) -> EmbeddingSender<'_> { | 
		
	
		
			
				|  |  |  |  |         EmbeddingSender(&self.sender) | 
		
	
		
			
				|  |  |  |  |     pub fn embeddings<'a>(&'a self) -> EmbeddingSender<'a, 'b> { | 
		
	
		
			
				|  |  |  |  |         EmbeddingSender(&self) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn geo(&self) -> GeoSender<'_> { | 
		
	
		
			
				|  |  |  |  |         GeoSender(&self.sender) | 
		
	
		
			
				|  |  |  |  |     pub fn geo<'a>(&'a self) -> GeoSender<'a, 'b> { | 
		
	
		
			
				|  |  |  |  |         GeoSender(&self) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     fn send_delete_vector(&self, docid: DocumentId) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |     fn send_delete_vector(&self, docid: DocumentId) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         match self | 
		
	
		
			
				|  |  |  |  |             .sender | 
		
	
		
			
				|  |  |  |  |             .send(WriterOperation::ArroyOperation(ArroyOperation::DeleteVectors { docid })) | 
		
	
	
		
			
				
					
					|  |  |  | @@ -305,18 +246,69 @@ impl ExtractorSender { | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     fn send_db_operation(&self, op: DbOperation) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |         if self.sender.is_full() { | 
		
	
		
			
				|  |  |  |  |             self.writer_contentious_count.fetch_add(1, Ordering::SeqCst); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |         if self.sender.is_empty() { | 
		
	
		
			
				|  |  |  |  |             self.extractor_contentious_count.fetch_add(1, Ordering::SeqCst); | 
		
	
		
			
				|  |  |  |  |     fn write_key_value(&self, database: Database, key: &[u8], value: &[u8]) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         let capacity = self.capacity; | 
		
	
		
			
				|  |  |  |  |         let refcell = self.inner.get().unwrap(); | 
		
	
		
			
				|  |  |  |  |         let mut producer = refcell.0.borrow_mut_or_yield(); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let key_length = key.len().try_into().unwrap(); | 
		
	
		
			
				|  |  |  |  |         let value_length = value.len(); | 
		
	
		
			
				|  |  |  |  |         let total_length = EntryHeader::put_key_value_size(key_length, value_length); | 
		
	
		
			
				|  |  |  |  |         if total_length > capacity { | 
		
	
		
			
				|  |  |  |  |             unreachable!("entry larger that the bbqueue capacity"); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         self.send_count.fetch_add(1, Ordering::SeqCst); | 
		
	
		
			
				|  |  |  |  |         match self.sender.send(WriterOperation::DbOperation(op)) { | 
		
	
		
			
				|  |  |  |  |             Ok(()) => Ok(()), | 
		
	
		
			
				|  |  |  |  |             Err(SendError(_)) => Err(SendError(())), | 
		
	
		
			
				|  |  |  |  |         let payload_header = | 
		
	
		
			
				|  |  |  |  |             EntryHeader::DbOperation { database, key_length: NonZeroU16::new(key_length) }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         loop { | 
		
	
		
			
				|  |  |  |  |             let mut grant = match producer.grant(total_length) { | 
		
	
		
			
				|  |  |  |  |                 Ok(grant) => grant, | 
		
	
		
			
				|  |  |  |  |                 Err(bbqueue::Error::InsufficientSize) => continue, | 
		
	
		
			
				|  |  |  |  |                 Err(e) => unreachable!("{e:?}"), | 
		
	
		
			
				|  |  |  |  |             }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>()); | 
		
	
		
			
				|  |  |  |  |             header.copy_from_slice(payload_header.bytes_of()); | 
		
	
		
			
				|  |  |  |  |             let (key_out, value_out) = remaining.split_at_mut(key.len()); | 
		
	
		
			
				|  |  |  |  |             key_out.copy_from_slice(key); | 
		
	
		
			
				|  |  |  |  |             value_out.copy_from_slice(value); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             // We could commit only the used memory. | 
		
	
		
			
				|  |  |  |  |             grant.commit(total_length); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             break Ok(()); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     fn delete_entry(&self, database: Database, key: &[u8]) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         let capacity = self.capacity; | 
		
	
		
			
				|  |  |  |  |         let refcell = self.inner.get().unwrap(); | 
		
	
		
			
				|  |  |  |  |         let mut producer = refcell.0.borrow_mut_or_yield(); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let key_length = key.len().try_into().unwrap(); | 
		
	
		
			
				|  |  |  |  |         let total_length = EntryHeader::delete_key_size(key_length); | 
		
	
		
			
				|  |  |  |  |         if total_length > capacity { | 
		
	
		
			
				|  |  |  |  |             unreachable!("entry larger that the bbqueue capacity"); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let payload_header = EntryHeader::DbOperation { database, key_length: None }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         loop { | 
		
	
		
			
				|  |  |  |  |             let mut grant = match producer.grant(total_length) { | 
		
	
		
			
				|  |  |  |  |                 Ok(grant) => grant, | 
		
	
		
			
				|  |  |  |  |                 Err(bbqueue::Error::InsufficientSize) => continue, | 
		
	
		
			
				|  |  |  |  |                 Err(e) => unreachable!("{e:?}"), | 
		
	
		
			
				|  |  |  |  |             }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>()); | 
		
	
		
			
				|  |  |  |  |             header.copy_from_slice(payload_header.bytes_of()); | 
		
	
		
			
				|  |  |  |  |             remaining.copy_from_slice(key); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             // We could commit only the used memory. | 
		
	
		
			
				|  |  |  |  |             grant.commit(total_length); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             break Ok(()); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
	
		
			
				
					
					|  |  |  | @@ -356,159 +348,237 @@ impl DatabaseType for WordPositionDocids { | 
		
	
		
			
				|  |  |  |  |     const DATABASE: Database = Database::WordPositionDocids; | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub trait DocidsSender { | 
		
	
		
			
				|  |  |  |  |     fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>>; | 
		
	
		
			
				|  |  |  |  |     fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>>; | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct WordDocidsSender<'a, D> { | 
		
	
		
			
				|  |  |  |  |     sender: &'a ExtractorSender, | 
		
	
		
			
				|  |  |  |  | pub struct WordDocidsSender<'a, 'b, D> { | 
		
	
		
			
				|  |  |  |  |     sender: &'a ExtractorBbqueueSender<'b>, | 
		
	
		
			
				|  |  |  |  |     _marker: PhantomData<D>, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl<D: DatabaseType> DocidsSender for WordDocidsSender<'_, D> { | 
		
	
		
			
				|  |  |  |  |     fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value)); | 
		
	
		
			
				|  |  |  |  |         match self.sender.send_db_operation(DbOperation { database: D::DATABASE, entry }) { | 
		
	
		
			
				|  |  |  |  |             Ok(()) => Ok(()), | 
		
	
		
			
				|  |  |  |  |             Err(SendError(_)) => Err(SendError(())), | 
		
	
		
			
				|  |  |  |  | impl<D: DatabaseType> WordDocidsSender<'_, '_, D> { | 
		
	
		
			
				|  |  |  |  |     pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         let capacity = self.sender.capacity; | 
		
	
		
			
				|  |  |  |  |         let refcell = self.sender.inner.get().unwrap(); | 
		
	
		
			
				|  |  |  |  |         let mut producer = refcell.0.borrow_mut_or_yield(); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let key_length = key.len().try_into().unwrap(); | 
		
	
		
			
				|  |  |  |  |         let value_length = CboRoaringBitmapCodec::serialized_size(bitmap); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let total_length = EntryHeader::put_key_value_size(key_length, value_length); | 
		
	
		
			
				|  |  |  |  |         if total_length > capacity { | 
		
	
		
			
				|  |  |  |  |             unreachable!("entry larger that the bbqueue capacity"); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Delete(KeyEntry::from_key(key)); | 
		
	
		
			
				|  |  |  |  |         match self.sender.send_db_operation(DbOperation { database: D::DATABASE, entry }) { | 
		
	
		
			
				|  |  |  |  |             Ok(()) => Ok(()), | 
		
	
		
			
				|  |  |  |  |             Err(SendError(_)) => Err(SendError(())), | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct FacetDocidsSender<'a> { | 
		
	
		
			
				|  |  |  |  |     sender: &'a ExtractorSender, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl DocidsSender for FacetDocidsSender<'_> { | 
		
	
		
			
				|  |  |  |  |     fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |         let (facet_kind, key) = FacetKind::extract_from_key(key); | 
		
	
		
			
				|  |  |  |  |         let database = Database::from(facet_kind); | 
		
	
		
			
				|  |  |  |  |         let entry = match facet_kind { | 
		
	
		
			
				|  |  |  |  |             // skip level group size | 
		
	
		
			
				|  |  |  |  |             FacetKind::String | FacetKind::Number => { | 
		
	
		
			
				|  |  |  |  |                 // add facet group size | 
		
	
		
			
				|  |  |  |  |                 let value = [&[1], value].concat(); | 
		
	
		
			
				|  |  |  |  |                 EntryOperation::Write(KeyValueEntry::from_small_key_value(key, &value)) | 
		
	
		
			
				|  |  |  |  |             } | 
		
	
		
			
				|  |  |  |  |             _ => EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value)), | 
		
	
		
			
				|  |  |  |  |         let payload_header = EntryHeader::DbOperation { | 
		
	
		
			
				|  |  |  |  |             database: D::DATABASE, | 
		
	
		
			
				|  |  |  |  |             key_length: NonZeroU16::new(key_length), | 
		
	
		
			
				|  |  |  |  |         }; | 
		
	
		
			
				|  |  |  |  |         match self.sender.send_db_operation(DbOperation { database, entry }) { | 
		
	
		
			
				|  |  |  |  |             Ok(()) => Ok(()), | 
		
	
		
			
				|  |  |  |  |             Err(SendError(_)) => Err(SendError(())), | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         loop { | 
		
	
		
			
				|  |  |  |  |             let mut grant = match producer.grant(total_length) { | 
		
	
		
			
				|  |  |  |  |                 Ok(grant) => grant, | 
		
	
		
			
				|  |  |  |  |                 Err(bbqueue::Error::InsufficientSize) => continue, | 
		
	
		
			
				|  |  |  |  |                 Err(e) => unreachable!("{e:?}"), | 
		
	
		
			
				|  |  |  |  |             }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>()); | 
		
	
		
			
				|  |  |  |  |             header.copy_from_slice(payload_header.bytes_of()); | 
		
	
		
			
				|  |  |  |  |             let (key_out, value_out) = remaining.split_at_mut(key.len()); | 
		
	
		
			
				|  |  |  |  |             key_out.copy_from_slice(key); | 
		
	
		
			
				|  |  |  |  |             CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             // We could commit only the used memory. | 
		
	
		
			
				|  |  |  |  |             grant.commit(total_length); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             break Ok(()); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |     pub fn delete(&self, key: &[u8]) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         let capacity = self.sender.capacity; | 
		
	
		
			
				|  |  |  |  |         let refcell = self.sender.inner.get().unwrap(); | 
		
	
		
			
				|  |  |  |  |         let mut producer = refcell.0.borrow_mut_or_yield(); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let key_length = key.len().try_into().unwrap(); | 
		
	
		
			
				|  |  |  |  |         let total_length = EntryHeader::delete_key_size(key_length); | 
		
	
		
			
				|  |  |  |  |         if total_length > capacity { | 
		
	
		
			
				|  |  |  |  |             unreachable!("entry larger that the bbqueue capacity"); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let payload_header = EntryHeader::DbOperation { database: D::DATABASE, key_length: None }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         loop { | 
		
	
		
			
				|  |  |  |  |             let mut grant = match producer.grant(total_length) { | 
		
	
		
			
				|  |  |  |  |                 Ok(grant) => grant, | 
		
	
		
			
				|  |  |  |  |                 Err(bbqueue::Error::InsufficientSize) => continue, | 
		
	
		
			
				|  |  |  |  |                 Err(e) => unreachable!("{e:?}"), | 
		
	
		
			
				|  |  |  |  |             }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>()); | 
		
	
		
			
				|  |  |  |  |             header.copy_from_slice(payload_header.bytes_of()); | 
		
	
		
			
				|  |  |  |  |             remaining.copy_from_slice(key); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             // We could commit only the used memory. | 
		
	
		
			
				|  |  |  |  |             grant.commit(total_length); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             break Ok(()); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct FacetDocidsSender<'a, 'b> { | 
		
	
		
			
				|  |  |  |  |     sender: &'a ExtractorBbqueueSender<'b>, | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl FacetDocidsSender<'_, '_> { | 
		
	
		
			
				|  |  |  |  |     pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         let capacity = self.sender.capacity; | 
		
	
		
			
				|  |  |  |  |         let refcell = self.sender.inner.get().unwrap(); | 
		
	
		
			
				|  |  |  |  |         let mut producer = refcell.0.borrow_mut_or_yield(); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let (facet_kind, key) = FacetKind::extract_from_key(key); | 
		
	
		
			
				|  |  |  |  |         let database = Database::from(facet_kind); | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Delete(KeyEntry::from_key(key)); | 
		
	
		
			
				|  |  |  |  |         match self.sender.send_db_operation(DbOperation { database, entry }) { | 
		
	
		
			
				|  |  |  |  |             Ok(()) => Ok(()), | 
		
	
		
			
				|  |  |  |  |             Err(SendError(_)) => Err(SendError(())), | 
		
	
		
			
				|  |  |  |  |         let key_length = key.len().try_into().unwrap(); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let value_length = CboRoaringBitmapCodec::serialized_size(bitmap); | 
		
	
		
			
				|  |  |  |  |         let value_length = match facet_kind { | 
		
	
		
			
				|  |  |  |  |             // We must take the facet group size into account | 
		
	
		
			
				|  |  |  |  |             // when we serialize strings and numbers. | 
		
	
		
			
				|  |  |  |  |             FacetKind::Number | FacetKind::String => value_length + 1, | 
		
	
		
			
				|  |  |  |  |             FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_length, | 
		
	
		
			
				|  |  |  |  |         }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let total_length = EntryHeader::put_key_value_size(key_length, value_length); | 
		
	
		
			
				|  |  |  |  |         if total_length > capacity { | 
		
	
		
			
				|  |  |  |  |             unreachable!("entry larger that the bbqueue capacity"); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let payload_header = EntryHeader::DbOperation { | 
		
	
		
			
				|  |  |  |  |             database: Database::from(facet_kind), | 
		
	
		
			
				|  |  |  |  |             key_length: NonZeroU16::new(key_length), | 
		
	
		
			
				|  |  |  |  |         }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         loop { | 
		
	
		
			
				|  |  |  |  |             let mut grant = match producer.grant(total_length) { | 
		
	
		
			
				|  |  |  |  |                 Ok(grant) => grant, | 
		
	
		
			
				|  |  |  |  |                 Err(bbqueue::Error::InsufficientSize) => continue, | 
		
	
		
			
				|  |  |  |  |                 Err(e) => unreachable!("{e:?}"), | 
		
	
		
			
				|  |  |  |  |             }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>()); | 
		
	
		
			
				|  |  |  |  |             header.copy_from_slice(payload_header.bytes_of()); | 
		
	
		
			
				|  |  |  |  |             let (key_out, value_out) = remaining.split_at_mut(key.len()); | 
		
	
		
			
				|  |  |  |  |             key_out.copy_from_slice(key); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             let value_out = match facet_kind { | 
		
	
		
			
				|  |  |  |  |                 // We must take the facet group size into account | 
		
	
		
			
				|  |  |  |  |                 // when we serialize strings and numbers. | 
		
	
		
			
				|  |  |  |  |                 FacetKind::String | FacetKind::Number => { | 
		
	
		
			
				|  |  |  |  |                     let (first, remaining) = value_out.split_first_mut().unwrap(); | 
		
	
		
			
				|  |  |  |  |                     *first = 1; | 
		
	
		
			
				|  |  |  |  |                     remaining | 
		
	
		
			
				|  |  |  |  |                 } | 
		
	
		
			
				|  |  |  |  |                 FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_out, | 
		
	
		
			
				|  |  |  |  |             }; | 
		
	
		
			
				|  |  |  |  |             CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             // We could commit only the used memory. | 
		
	
		
			
				|  |  |  |  |             grant.commit(total_length); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             break Ok(()); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn delete(&self, key: &[u8]) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         let capacity = self.sender.capacity; | 
		
	
		
			
				|  |  |  |  |         let refcell = self.sender.inner.get().unwrap(); | 
		
	
		
			
				|  |  |  |  |         let mut producer = refcell.0.borrow_mut_or_yield(); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let (facet_kind, key) = FacetKind::extract_from_key(key); | 
		
	
		
			
				|  |  |  |  |         let key_length = key.len().try_into().unwrap(); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let total_length = EntryHeader::delete_key_size(key_length); | 
		
	
		
			
				|  |  |  |  |         if total_length > capacity { | 
		
	
		
			
				|  |  |  |  |             unreachable!("entry larger that the bbqueue capacity"); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let payload_header = | 
		
	
		
			
				|  |  |  |  |             EntryHeader::DbOperation { database: Database::from(facet_kind), key_length: None }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         loop { | 
		
	
		
			
				|  |  |  |  |             let mut grant = match producer.grant(total_length) { | 
		
	
		
			
				|  |  |  |  |                 Ok(grant) => grant, | 
		
	
		
			
				|  |  |  |  |                 Err(bbqueue::Error::InsufficientSize) => continue, | 
		
	
		
			
				|  |  |  |  |                 Err(e) => unreachable!("{e:?}"), | 
		
	
		
			
				|  |  |  |  |             }; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>()); | 
		
	
		
			
				|  |  |  |  |             header.copy_from_slice(payload_header.bytes_of()); | 
		
	
		
			
				|  |  |  |  |             remaining.copy_from_slice(key); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             // We could commit only the used memory. | 
		
	
		
			
				|  |  |  |  |             grant.commit(total_length); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |             break Ok(()); | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct FieldIdDocidFacetSender<'a>(&'a ExtractorSender); | 
		
	
		
			
				|  |  |  |  | pub struct FieldIdDocidFacetSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl FieldIdDocidFacetSender<'_> { | 
		
	
		
			
				|  |  |  |  |     pub fn write_facet_string(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  | impl FieldIdDocidFacetSender<'_, '_> { | 
		
	
		
			
				|  |  |  |  |     pub fn write_facet_string(&self, key: &[u8], value: &[u8]) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         debug_assert!(FieldDocIdFacetStringCodec::bytes_decode(key).is_ok()); | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value)); | 
		
	
		
			
				|  |  |  |  |         self.0 | 
		
	
		
			
				|  |  |  |  |             .send_db_operation(DbOperation { database: Database::FieldIdDocidFacetStrings, entry }) | 
		
	
		
			
				|  |  |  |  |         self.0.write_key_value(Database::FieldIdDocidFacetStrings, key, value) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn write_facet_f64(&self, key: &[u8]) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |     pub fn write_facet_f64(&self, key: &[u8]) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         debug_assert!(FieldDocIdFacetF64Codec::bytes_decode(key).is_ok()); | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, &[])); | 
		
	
		
			
				|  |  |  |  |         self.0.send_db_operation(DbOperation { database: Database::FieldIdDocidFacetF64s, entry }) | 
		
	
		
			
				|  |  |  |  |         self.0.write_key_value(Database::FieldIdDocidFacetF64s, key, &[]) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn delete_facet_string(&self, key: &[u8]) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |     pub fn delete_facet_string(&self, key: &[u8]) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         debug_assert!(FieldDocIdFacetStringCodec::bytes_decode(key).is_ok()); | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Delete(KeyEntry::from_key(key)); | 
		
	
		
			
				|  |  |  |  |         self.0 | 
		
	
		
			
				|  |  |  |  |             .send_db_operation(DbOperation { database: Database::FieldIdDocidFacetStrings, entry }) | 
		
	
		
			
				|  |  |  |  |         self.0.delete_entry(Database::FieldIdDocidFacetStrings, key) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn delete_facet_f64(&self, key: &[u8]) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |     pub fn delete_facet_f64(&self, key: &[u8]) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         debug_assert!(FieldDocIdFacetF64Codec::bytes_decode(key).is_ok()); | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Delete(KeyEntry::from_key(key)); | 
		
	
		
			
				|  |  |  |  |         self.0.send_db_operation(DbOperation { database: Database::FieldIdDocidFacetF64s, entry }) | 
		
	
		
			
				|  |  |  |  |         self.0.delete_entry(Database::FieldIdDocidFacetF64s, key) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct DocumentsSender<'a>(&'a ExtractorSender); | 
		
	
		
			
				|  |  |  |  | pub struct DocumentsSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl DocumentsSender<'_> { | 
		
	
		
			
				|  |  |  |  | impl DocumentsSender<'_, '_> { | 
		
	
		
			
				|  |  |  |  |     /// TODO do that efficiently | 
		
	
		
			
				|  |  |  |  |     pub fn uncompressed( | 
		
	
		
			
				|  |  |  |  |         &self, | 
		
	
		
			
				|  |  |  |  |         docid: DocumentId, | 
		
	
		
			
				|  |  |  |  |         external_id: String, | 
		
	
		
			
				|  |  |  |  |         document: &KvReaderFieldId, | 
		
	
		
			
				|  |  |  |  |     ) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value( | 
		
	
		
			
				|  |  |  |  |             &docid.to_be_bytes(), | 
		
	
		
			
				|  |  |  |  |             document.as_bytes(), | 
		
	
		
			
				|  |  |  |  |         )); | 
		
	
		
			
				|  |  |  |  |         match self.0.send_db_operation(DbOperation { database: Database::Documents, entry }) { | 
		
	
		
			
				|  |  |  |  |             Ok(()) => Ok(()), | 
		
	
		
			
				|  |  |  |  |             Err(SendError(_)) => Err(SendError(())), | 
		
	
		
			
				|  |  |  |  |         }?; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value( | 
		
	
		
			
				|  |  |  |  |     ) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         self.0.write_key_value(Database::Documents, &docid.to_be_bytes(), document.as_bytes())?; | 
		
	
		
			
				|  |  |  |  |         self.0.write_key_value( | 
		
	
		
			
				|  |  |  |  |             Database::ExternalDocumentsIds, | 
		
	
		
			
				|  |  |  |  |             external_id.as_bytes(), | 
		
	
		
			
				|  |  |  |  |             &docid.to_be_bytes(), | 
		
	
		
			
				|  |  |  |  |         )); | 
		
	
		
			
				|  |  |  |  |         match self | 
		
	
		
			
				|  |  |  |  |             .0 | 
		
	
		
			
				|  |  |  |  |             .send_db_operation(DbOperation { database: Database::ExternalDocumentsIds, entry }) | 
		
	
		
			
				|  |  |  |  |         { | 
		
	
		
			
				|  |  |  |  |             Ok(()) => Ok(()), | 
		
	
		
			
				|  |  |  |  |             Err(SendError(_)) => Err(SendError(())), | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |         ) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn delete(&self, docid: DocumentId, external_id: String) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Delete(KeyEntry::from_key(&docid.to_be_bytes())); | 
		
	
		
			
				|  |  |  |  |         match self.0.send_db_operation(DbOperation { database: Database::Documents, entry }) { | 
		
	
		
			
				|  |  |  |  |             Ok(()) => Ok(()), | 
		
	
		
			
				|  |  |  |  |             Err(SendError(_)) => Err(SendError(())), | 
		
	
		
			
				|  |  |  |  |         }?; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn delete(&self, docid: DocumentId, external_id: String) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         self.0.delete_entry(Database::Documents, &docid.to_be_bytes())?; | 
		
	
		
			
				|  |  |  |  |         self.0.send_delete_vector(docid)?; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         let entry = EntryOperation::Delete(KeyEntry::from_key(external_id.as_bytes())); | 
		
	
		
			
				|  |  |  |  |         match self | 
		
	
		
			
				|  |  |  |  |             .0 | 
		
	
		
			
				|  |  |  |  |             .send_db_operation(DbOperation { database: Database::ExternalDocumentsIds, entry }) | 
		
	
		
			
				|  |  |  |  |         { | 
		
	
		
			
				|  |  |  |  |             Ok(()) => Ok(()), | 
		
	
		
			
				|  |  |  |  |             Err(SendError(_)) => Err(SendError(())), | 
		
	
		
			
				|  |  |  |  |         } | 
		
	
		
			
				|  |  |  |  |         self.0.delete_entry(Database::ExternalDocumentsIds, external_id.as_bytes()) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct EmbeddingSender<'a>(&'a Sender<WriterOperation>); | 
		
	
		
			
				|  |  |  |  | pub struct EmbeddingSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl EmbeddingSender<'_> { | 
		
	
		
			
				|  |  |  |  | impl EmbeddingSender<'_, '_> { | 
		
	
		
			
				|  |  |  |  |     pub fn set_vectors( | 
		
	
		
			
				|  |  |  |  |         &self, | 
		
	
		
			
				|  |  |  |  |         docid: DocumentId, | 
		
	
		
			
				|  |  |  |  |         embedder_id: u8, | 
		
	
		
			
				|  |  |  |  |         embeddings: Vec<Embedding>, | 
		
	
		
			
				|  |  |  |  |     ) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |     ) -> crate::Result<()> { | 
		
	
		
			
				|  |  |  |  |         self.0 | 
		
	
		
			
				|  |  |  |  |             .send(WriterOperation::ArroyOperation(ArroyOperation::SetVectors { | 
		
	
		
			
				|  |  |  |  |                 docid, | 
		
	
	
		
			
				
					
					|  |  |  | @@ -541,33 +611,36 @@ impl EmbeddingSender<'_> { | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | pub struct GeoSender<'a>(&'a Sender<WriterOperation>); | 
		
	
		
			
				|  |  |  |  | pub struct GeoSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | impl GeoSender<'_> { | 
		
	
		
			
				|  |  |  |  | impl GeoSender<'_, '_> { | 
		
	
		
			
				|  |  |  |  |     pub fn set_rtree(&self, value: Mmap) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |         self.0 | 
		
	
		
			
				|  |  |  |  |             .send(WriterOperation::DbOperation(DbOperation { | 
		
	
		
			
				|  |  |  |  |                 database: Database::Main, | 
		
	
		
			
				|  |  |  |  |                 entry: EntryOperation::Write(KeyValueEntry::from_large_key_value( | 
		
	
		
			
				|  |  |  |  |                     GEO_RTREE_KEY.as_bytes(), | 
		
	
		
			
				|  |  |  |  |                     value, | 
		
	
		
			
				|  |  |  |  |                 )), | 
		
	
		
			
				|  |  |  |  |             })) | 
		
	
		
			
				|  |  |  |  |             .map_err(|_| SendError(())) | 
		
	
		
			
				|  |  |  |  |         todo!("set rtree from file") | 
		
	
		
			
				|  |  |  |  |         // self.0 | 
		
	
		
			
				|  |  |  |  |         //     .send(WriterOperation::DbOperation(DbOperation { | 
		
	
		
			
				|  |  |  |  |         //         database: Database::Main, | 
		
	
		
			
				|  |  |  |  |         //         entry: EntryOperation::Write(KeyValueEntry::from_large_key_value( | 
		
	
		
			
				|  |  |  |  |         //             GEO_RTREE_KEY.as_bytes(), | 
		
	
		
			
				|  |  |  |  |         //             value, | 
		
	
		
			
				|  |  |  |  |         //         )), | 
		
	
		
			
				|  |  |  |  |         //     })) | 
		
	
		
			
				|  |  |  |  |         //     .map_err(|_| SendError(())) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     pub fn set_geo_faceted(&self, bitmap: &RoaringBitmap) -> StdResult<(), SendError<()>> { | 
		
	
		
			
				|  |  |  |  |         let mut buffer = Vec::new(); | 
		
	
		
			
				|  |  |  |  |         bitmap.serialize_into(&mut buffer).unwrap(); | 
		
	
		
			
				|  |  |  |  |         todo!("serialize directly into bbqueue (as a real roaringbitmap not a cbo)") | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         self.0 | 
		
	
		
			
				|  |  |  |  |             .send(WriterOperation::DbOperation(DbOperation { | 
		
	
		
			
				|  |  |  |  |                 database: Database::Main, | 
		
	
		
			
				|  |  |  |  |                 entry: EntryOperation::Write(KeyValueEntry::from_small_key_value( | 
		
	
		
			
				|  |  |  |  |                     GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes(), | 
		
	
		
			
				|  |  |  |  |                     &buffer, | 
		
	
		
			
				|  |  |  |  |                 )), | 
		
	
		
			
				|  |  |  |  |             })) | 
		
	
		
			
				|  |  |  |  |             .map_err(|_| SendError(())) | 
		
	
		
			
				|  |  |  |  |         // let mut buffer = Vec::new(); | 
		
	
		
			
				|  |  |  |  |         // bitmap.serialize_into(&mut buffer).unwrap(); | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |         // self.0 | 
		
	
		
			
				|  |  |  |  |         //     .send(WriterOperation::DbOperation(DbOperation { | 
		
	
		
			
				|  |  |  |  |         //         database: Database::Main, | 
		
	
		
			
				|  |  |  |  |         //         entry: EntryOperation::Write(KeyValueEntry::from_small_key_value( | 
		
	
		
			
				|  |  |  |  |         //             GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes(), | 
		
	
		
			
				|  |  |  |  |         //             &buffer, | 
		
	
		
			
				|  |  |  |  |         //         )), | 
		
	
		
			
				|  |  |  |  |         //     })) | 
		
	
		
			
				|  |  |  |  |         //     .map_err(|_| SendError(())) | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
	
		
			
				
					
					| 
							
							
							
						 |  |  |   |