diff --git a/crates/meilisearch/db.snapshot b/crates/meilisearch/db.snapshot new file mode 100644 index 000000000..29377ce42 Binary files /dev/null and b/crates/meilisearch/db.snapshot differ diff --git a/crates/meilisearch/src/analytics/mock_analytics.rs b/crates/meilisearch/src/analytics/mock_analytics.rs index 54b8d4f1b..062240018 100644 --- a/crates/meilisearch/src/analytics/mock_analytics.rs +++ b/crates/meilisearch/src/analytics/mock_analytics.rs @@ -104,6 +104,4 @@ impl Analytics for MockAnalytics { _request: &HttpRequest, ) { } - fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} - fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} } diff --git a/crates/meilisearch/src/analytics/mod.rs b/crates/meilisearch/src/analytics/mod.rs index bd14b0bfa..0d1a860e1 100644 --- a/crates/meilisearch/src/analytics/mod.rs +++ b/crates/meilisearch/src/analytics/mod.rs @@ -73,12 +73,6 @@ pub enum DocumentDeletionKind { PerFilter, } -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum DocumentFetchKind { - PerDocumentId { retrieve_vectors: bool }, - Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, -} - /// To send an event to segment, your event must be able to aggregate itself with another event of the same type. pub trait Aggregate: 'static + mopa::Any + Send { /// The name of the event that will be sent to segment. diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs index 9c8d28e04..89af06ade 100644 --- a/crates/meilisearch/src/routes/indexes/documents.rs +++ b/crates/meilisearch/src/routes/indexes/documents.rs @@ -156,52 +156,6 @@ pub struct DocumentsFetchAggregator { marker: std::marker::PhantomData, } -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum DocumentFetchKind { - PerDocumentId { - retrieve_vectors: bool, - sort: bool, - }, - Normal { - with_filter: bool, - limit: usize, - offset: usize, - retrieve_vectors: bool, - sort: bool, - ids: usize, - }, -} - -impl DocumentsFetchAggregator { - pub fn from_query(query: &DocumentFetchKind) -> Self { - let (limit, offset, retrieve_vectors, sort) = match query { - DocumentFetchKind::PerDocumentId { retrieve_vectors, sort } => { - (1, 0, *retrieve_vectors, *sort) - } - DocumentFetchKind::Normal { limit, offset, retrieve_vectors, sort, .. } => { - (*limit, *offset, *retrieve_vectors, *sort) - } - }; - - let ids = match query { - DocumentFetchKind::Normal { ids, .. } => *ids, - DocumentFetchKind::PerDocumentId { .. } => 0, - }; - - Self { - per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), - per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), - max_limit: limit, - max_offset: offset, - sort, - retrieve_vectors, - max_document_ids: ids, - - marker: PhantomData, - } - } -} - impl Aggregate for DocumentsFetchAggregator { fn event_name(&self) -> &'static str { Method::event_name() @@ -1573,16 +1527,19 @@ fn retrieve_documents>( })? } - let facet_sort; let (it, number_of_documents) = if let Some(sort) = sort_criteria { let number_of_documents = candidates.len(); - facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?; + let facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?; let iter = facet_sort.iter()?; + let mut documents = Vec::with_capacity(limit); + for result in iter.skip(offset).take(limit) { + documents.push(result?); + } ( itertools::Either::Left(some_documents( index, &rtxn, - iter.map(|d| d.unwrap()).skip(offset).take(limit), + documents.into_iter(), retrieve_vectors, )?), number_of_documents, diff --git a/crates/milli/src/documents/sort.rs b/crates/milli/src/documents/sort.rs index 59858caad..3866d9e27 100644 --- a/crates/milli/src/documents/sort.rs +++ b/crates/milli/src/documents/sort.rs @@ -72,6 +72,10 @@ impl Iterator for SortedDocumentsIterator<'_> { /// The default implementation of `nth` would iterate over all children, which is inefficient for large datasets. /// This implementation will jump over whole chunks of children until it gets close. fn nth(&mut self, n: usize) -> Option { + if n == 0 { + return self.next(); + } + // If it's at the leaf level, just forward the call to the values iterator let (current_child, next_children, next_children_size) = match self { SortedDocumentsIterator::Leaf { values, size } => { @@ -189,41 +193,54 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> { fn build(self) -> crate::Result> { let size = self.candidates.len() as usize; - // There is no point sorting a 1-element array - if size <= 1 { - return Ok(SortedDocumentsIterator::Leaf { - size, - values: Box::new(self.candidates.into_iter()), - }); - } - - match self.fields.first().copied() { - Some(AscDescId::Facet { field_id, ascending }) => self.build_facet(field_id, ascending), - Some(AscDescId::Geo { field_ids, target_point, ascending }) => { - self.build_geo(field_ids, target_point, ascending) - } - None => Ok(SortedDocumentsIterator::Leaf { + match self.fields { + [] => Ok(SortedDocumentsIterator::Leaf { size, values: Box::new(self.candidates.into_iter()), }), + [AscDescId::Facet { field_id, ascending }, next_fields @ ..] => { + SortedDocumentsIteratorBuilder::build_facet( + self.index, + self.rtxn, + self.number_db, + self.string_db, + next_fields, + self.candidates, + self.geo_candidates, + *field_id, + *ascending, + ) + } + [AscDescId::Geo { field_ids, target_point, ascending }, next_fields @ ..] => { + SortedDocumentsIteratorBuilder::build_geo( + self.index, + self.rtxn, + self.number_db, + self.string_db, + next_fields, + self.candidates, + self.geo_candidates, + *field_ids, + *target_point, + *ascending, + ) + } } } /// Builds a [`SortedDocumentsIterator`] based on the results of a facet sort. + #[allow(clippy::too_many_arguments)] fn build_facet( - self, + index: &'ctx crate::Index, + rtxn: &'ctx heed::RoTxn<'ctx>, + number_db: Database, FacetGroupValueCodec>, + string_db: Database, FacetGroupValueCodec>, + next_fields: &'ctx [AscDescId], + candidates: RoaringBitmap, + geo_candidates: &'ctx RoaringBitmap, field_id: u16, ascending: bool, ) -> crate::Result> { - let SortedDocumentsIteratorBuilder { - index, - rtxn, - number_db, - string_db, - fields, - candidates, - geo_candidates, - } = self; let size = candidates.len() as usize; // Perform the sort on the first field @@ -248,7 +265,7 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> { rtxn, number_db, string_db, - fields: &fields[1..], + fields: next_fields, candidates: r?, geo_candidates, }) @@ -262,22 +279,19 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> { } /// Builds a [`SortedDocumentsIterator`] based on the (lazy) results of a geo sort. + #[allow(clippy::too_many_arguments)] fn build_geo( - self, + index: &'ctx crate::Index, + rtxn: &'ctx heed::RoTxn<'ctx>, + number_db: Database, FacetGroupValueCodec>, + string_db: Database, FacetGroupValueCodec>, + next_fields: &'ctx [AscDescId], + candidates: RoaringBitmap, + geo_candidates: &'ctx RoaringBitmap, field_ids: [u16; 2], target_point: [f64; 2], ascending: bool, ) -> crate::Result> { - let SortedDocumentsIteratorBuilder { - index, - rtxn, - number_db, - string_db, - fields, - candidates, - geo_candidates, - } = self; - let mut cache = VecDeque::new(); let mut rtree = None; let size = candidates.len() as usize; @@ -307,7 +321,7 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> { rtxn, number_db, string_db, - fields: &fields[1..], + fields: next_fields, candidates: docids, geo_candidates, })); @@ -322,7 +336,7 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> { rtxn, number_db, string_db, - fields: &fields[1..], + fields: next_fields, candidates: not_geo_candidates, geo_candidates, }));