Apply review suggestions

Add preconditions Fix underflow Remove unwrap Turn methods to associated functions Apply review suggestions
2025-07-26 00:01:00 +00:00 · 2025-07-15 17:04:08 +02:00
parent f60814b319
commit 77138a42d6
5 changed files with 58 additions and 95 deletions
--- a/crates/meilisearch/db.snapshot
+++ b/crates/meilisearch/db.snapshot
--- a/crates/meilisearch/src/analytics/mock_analytics.rs
+++ b/crates/meilisearch/src/analytics/mock_analytics.rs
@ -104,6 +104,4 @@ impl Analytics for MockAnalytics {
        _request: &HttpRequest,
    ) {
    }
-    fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
-    fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
 }
--- a/crates/meilisearch/src/analytics/mod.rs
+++ b/crates/meilisearch/src/analytics/mod.rs
@ -73,12 +73,6 @@ pub enum DocumentDeletionKind {
    PerFilter,
 }

-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum DocumentFetchKind {
-    PerDocumentId { retrieve_vectors: bool },
-    Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
-}
-
 /// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
 pub trait Aggregate: 'static + mopa::Any + Send {
    /// The name of the event that will be sent to segment.
--- a/crates/meilisearch/src/routes/indexes/documents.rs
+++ b/crates/meilisearch/src/routes/indexes/documents.rs
@ -156,52 +156,6 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
    marker: std::marker::PhantomData<Method>,
 }

-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum DocumentFetchKind {
-    PerDocumentId {
-        retrieve_vectors: bool,
-        sort: bool,
-    },
-    Normal {
-        with_filter: bool,
-        limit: usize,
-        offset: usize,
-        retrieve_vectors: bool,
-        sort: bool,
-        ids: usize,
-    },
-}
-
-impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
-    pub fn from_query(query: &DocumentFetchKind) -> Self {
-        let (limit, offset, retrieve_vectors, sort) = match query {
-            DocumentFetchKind::PerDocumentId { retrieve_vectors, sort } => {
-                (1, 0, *retrieve_vectors, *sort)
-            }
-            DocumentFetchKind::Normal { limit, offset, retrieve_vectors, sort, .. } => {
-                (*limit, *offset, *retrieve_vectors, *sort)
-            }
-        };
-
-        let ids = match query {
-            DocumentFetchKind::Normal { ids, .. } => *ids,
-            DocumentFetchKind::PerDocumentId { .. } => 0,
-        };
-
-        Self {
-            per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
-            per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
-            max_limit: limit,
-            max_offset: offset,
-            sort,
-            retrieve_vectors,
-            max_document_ids: ids,
-
-            marker: PhantomData,
-        }
-    }
-}
-
 impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
    fn event_name(&self) -> &'static str {
        Method::event_name()
@ -1573,16 +1527,19 @@ fn retrieve_documents<S: AsRef<str>>(
        })?
    }

-    let facet_sort;
    let (it, number_of_documents) = if let Some(sort) = sort_criteria {
        let number_of_documents = candidates.len();
-        facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?;
+        let facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?;
        let iter = facet_sort.iter()?;
+        let mut documents = Vec::with_capacity(limit);
+        for result in iter.skip(offset).take(limit) {
+            documents.push(result?);
+        }
        (
            itertools::Either::Left(some_documents(
                index,
                &rtxn,
-                iter.map(|d| d.unwrap()).skip(offset).take(limit),
+                documents.into_iter(),
                retrieve_vectors,
            )?),
            number_of_documents,
--- a/crates/milli/src/documents/sort.rs
+++ b/crates/milli/src/documents/sort.rs
@ -72,6 +72,10 @@ impl Iterator for SortedDocumentsIterator<'_> {
    /// The default implementation of `nth` would iterate over all children, which is inefficient for large datasets.
    /// This implementation will jump over whole chunks of children until it gets close.
    fn nth(&mut self, n: usize) -> Option<Self::Item> {
+        if n == 0 {
+            return self.next();
+        }
+
        // If it's at the leaf level, just forward the call to the values iterator
        let (current_child, next_children, next_children_size) = match self {
            SortedDocumentsIterator::Leaf { values, size } => {
@ -189,41 +193,54 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> {
    fn build(self) -> crate::Result<SortedDocumentsIterator<'ctx>> {
        let size = self.candidates.len() as usize;

-        // There is no point sorting a 1-element array
-        if size <= 1 {
-            return Ok(SortedDocumentsIterator::Leaf {
-                size,
-                values: Box::new(self.candidates.into_iter()),
-            });
-        }
-
-        match self.fields.first().copied() {
-            Some(AscDescId::Facet { field_id, ascending }) => self.build_facet(field_id, ascending),
-            Some(AscDescId::Geo { field_ids, target_point, ascending }) => {
-                self.build_geo(field_ids, target_point, ascending)
-            }
-            None => Ok(SortedDocumentsIterator::Leaf {
+        match self.fields {
+            [] => Ok(SortedDocumentsIterator::Leaf {
                size,
                values: Box::new(self.candidates.into_iter()),
            }),
+            [AscDescId::Facet { field_id, ascending }, next_fields @ ..] => {
+                SortedDocumentsIteratorBuilder::build_facet(
+                    self.index,
+                    self.rtxn,
+                    self.number_db,
+                    self.string_db,
+                    next_fields,
+                    self.candidates,
+                    self.geo_candidates,
+                    *field_id,
+                    *ascending,
+                )
+            }
+            [AscDescId::Geo { field_ids, target_point, ascending }, next_fields @ ..] => {
+                SortedDocumentsIteratorBuilder::build_geo(
+                    self.index,
+                    self.rtxn,
+                    self.number_db,
+                    self.string_db,
+                    next_fields,
+                    self.candidates,
+                    self.geo_candidates,
+                    *field_ids,
+                    *target_point,
+                    *ascending,
+                )
+            }
        }
    }

    /// Builds a [`SortedDocumentsIterator`] based on the results of a facet sort.
+    #[allow(clippy::too_many_arguments)]
    fn build_facet(
-        self,
+        index: &'ctx crate::Index,
+        rtxn: &'ctx heed::RoTxn<'ctx>,
+        number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+        string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+        next_fields: &'ctx [AscDescId],
+        candidates: RoaringBitmap,
+        geo_candidates: &'ctx RoaringBitmap,
        field_id: u16,
        ascending: bool,
    ) -> crate::Result<SortedDocumentsIterator<'ctx>> {
-        let SortedDocumentsIteratorBuilder {
-            index,
-            rtxn,
-            number_db,
-            string_db,
-            fields,
-            candidates,
-            geo_candidates,
-        } = self;
        let size = candidates.len() as usize;

        // Perform the sort on the first field
@ -248,7 +265,7 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> {
                rtxn,
                number_db,
                string_db,
-                fields: &fields[1..],
+                fields: next_fields,
                candidates: r?,
                geo_candidates,
            })
@ -262,22 +279,19 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> {
    }

    /// Builds a [`SortedDocumentsIterator`] based on the (lazy) results of a geo sort.
+    #[allow(clippy::too_many_arguments)]
    fn build_geo(
-        self,
+        index: &'ctx crate::Index,
+        rtxn: &'ctx heed::RoTxn<'ctx>,
+        number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+        string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+        next_fields: &'ctx [AscDescId],
+        candidates: RoaringBitmap,
+        geo_candidates: &'ctx RoaringBitmap,
        field_ids: [u16; 2],
        target_point: [f64; 2],
        ascending: bool,
    ) -> crate::Result<SortedDocumentsIterator<'ctx>> {
-        let SortedDocumentsIteratorBuilder {
-            index,
-            rtxn,
-            number_db,
-            string_db,
-            fields,
-            candidates,
-            geo_candidates,
-        } = self;
-
        let mut cache = VecDeque::new();
        let mut rtree = None;
        let size = candidates.len() as usize;
@ -307,7 +321,7 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> {
                        rtxn,
                        number_db,
                        string_db,
-                        fields: &fields[1..],
+                        fields: next_fields,
                        candidates: docids,
                        geo_candidates,
                    }));
@ -322,7 +336,7 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> {
                        rtxn,
                        number_db,
                        string_db,
-                        fields: &fields[1..],
+                        fields: next_fields,
                        candidates: not_geo_candidates,
                        geo_candidates,
                    }));