WIP

Merge #4682
4682: Speed Up Filter ANDs operations r=Kerollmops a=Kerollmops This PR fixes #4659 and improves the way we do AND operations by using the latest [RoaringBitmap feature to do intersections with serialized bitmaps](https://github.com/RoaringBitmap/roaring-rs/pull/281). Doing so drastically reduces the time spent reading, copying bytes in memory to use and keep a subset of the containers in the bitmap. ### Some Example Results With a 45M documents dataset running on a good NVMe. This example filter was taking 77ms and with this PR only 13ms (6x speedup): ```sql artist = 'The Beatles' AND (duration 150 TO 500 OR duration NOT EXISTS) AND genres IN [Rock, 'Rock and Roll'] AND rating > 4 AND released_year 1960 TO 1990 ``` By reordering the filter AND clauses we can reach a constant 8ms execution time. However, note that it is a manual operation. On the other side the previous filter pipeline is still at a constant 45ms execution time with this filter. (6x speedup) ```sql artist = 'The Beatles' AND genres IN [Rock, 'Rock and Roll'] AND released_year 1960 TO 1990 AND (duration 150 TO 500 OR duration NOT EXISTS) ``` ### To Do - [x] Rebase on `release-v1.9.0`. - [ ] ~Skip branches of the facet/filter tree when nothing is in common with the universe~ slower this way. - [x] When the universe is required use the universe given in parameter if possible. Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-12-07 05:05:42 +00:00 · 2024-07-04 11:18:45 +02:00 · 2024-06-11 02:51:17 +00:00 · 2024-06-10 14:03:55 +00:00 · 2024-06-10 15:07:34 +02:00 · 2024-06-10 09:36:08 +00:00
16 changed files with 693 additions and 59 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4377,12 +4377,6 @@ dependencies = [
 "winreg",
 ]

-[[package]]
-name = "retain_mut"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
-
 [[package]]
 name = "ring"
 version = "0.17.8"
@@ -4400,13 +4394,12 @@ dependencies = [

 [[package]]
 name = "roaring"
-version = "0.10.2"
+version = "0.10.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873"
+checksum = "7699249cc2c7d71939f30868f47e9d7add0bdc030d90ee10bfd16887ff8bb1c8"
 dependencies = [
 "bytemuck",
 "byteorder",
- "retain_mut",
 "serde",
 ]

--- a/meilisearch/src/search_queue.rs
+++ b/meilisearch/src/search_queue.rs
@@ -40,8 +40,9 @@ pub struct Permit {

 impl Drop for Permit {
    fn drop(&mut self) {
+        let sender = self.sender.clone();
        // if the channel is closed then the whole instance is down
-        let _ = futures::executor::block_on(self.sender.send(()));
+        std::mem::drop(tokio::spawn(async move { sender.send(()).await }));
    }
 }

--- a/milli/src/heed_codec/facet/mod.rs
+++ b/milli/src/heed_codec/facet/mod.rs
@@ -47,6 +47,12 @@ pub struct FacetGroupValue {
    pub bitmap: RoaringBitmap,
 }

+#[derive(Debug)]
+pub struct FacetGroupLazyValue<'b> {
+    pub size: u8,
+    pub bitmap_bytes: &'b [u8],
+}
+
 pub struct FacetGroupKeyCodec<T> {
    _phantom: PhantomData<T>,
 }
@@ -69,6 +75,7 @@ where
        Ok(Cow::Owned(v))
    }
 }
+
 impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
 where
    T: BytesDecode<'a>,
@@ -84,6 +91,7 @@ where
 }

 pub struct FacetGroupValueCodec;
+
 impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
    type EItem = FacetGroupValue;

@@ -93,11 +101,23 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
        Ok(Cow::Owned(v))
    }
 }
+
 impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
    type DItem = FacetGroupValue;
+
    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
        let size = bytes[0];
        let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
        Ok(FacetGroupValue { size, bitmap })
    }
 }
+
+pub struct FacetGroupLazyValueCodec;
+
+impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec {
+    type DItem = FacetGroupLazyValue<'a>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] })
+    }
+}
--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@@ -1,5 +1,5 @@
 use std::borrow::Cow;
-use std::io;
+use std::io::{self, Cursor};
 use std::mem::size_of;

 use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
@@ -57,6 +57,24 @@ impl CboRoaringBitmapCodec {
        }
    }

+    pub fn intersection_with_serialized(
+        mut bytes: &[u8],
+        other: &RoaringBitmap,
+    ) -> io::Result<RoaringBitmap> {
+        // See above `deserialize_from` method for implementation details.
+        if bytes.len() <= THRESHOLD * size_of::<u32>() {
+            let mut bitmap = RoaringBitmap::new();
+            while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
+                if other.contains(integer) {
+                    bitmap.insert(integer);
+                }
+            }
+            Ok(bitmap)
+        } else {
+            other.intersection_with_serialized_unchecked(Cursor::new(bytes))
+        }
+    }
+
    /// Merge serialized CboRoaringBitmaps in a buffer.
    ///
    /// if the merged values length is under the threshold, values are directly
--- a/milli/src/search/facet/facet_distribution_iter.rs
+++ b/milli/src/search/facet/facet_distribution_iter.rs
@@ -38,7 +38,7 @@ where
        field_id,
    )?;

-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
        Ok(())
    } else {
@@ -81,7 +81,7 @@ where
        field_id,
    )?;

-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        // We first fill the heap with values from the highest level
        let starting_key =
            FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
--- a/milli/src/search/facet/facet_range_search.rs
+++ b/milli/src/search/facet/facet_range_search.rs
@@ -4,9 +4,11 @@ use heed::BytesEncode;
 use roaring::RoaringBitmap;

 use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
-use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
+};
 use crate::heed_codec::BytesRefCodec;
-use crate::Result;
+use crate::{CboRoaringBitmapCodec, Result};

 /// Find all the document ids for which the given field contains a value contained within
 /// the two bounds.
@@ -16,6 +18,7 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
    field_id: u16,
    left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
    right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
+    universe: Option<&RoaringBitmap>,
    docids: &mut RoaringBitmap,
 ) -> Result<()>
 where
@@ -46,13 +49,15 @@ where
        }
        Bound::Unbounded => Bound::Unbounded,
    };
-    let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
-    let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
+    let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>();
+    let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids };
    let highest_level = get_highest_level(rtxn, db, field_id)?;

-    if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(starting_left_bound) =
+        get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?
+    {
        let rightmost_bound =
-            Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
+            Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
        let group_size = usize::MAX;
        f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
        Ok(())
@@ -64,12 +69,16 @@ where
 /// Fetch the document ids that have a facet with a value between the two given bounds
 struct FacetRangeSearch<'t, 'b, 'bitmap> {
    rtxn: &'t heed::RoTxn<'t>,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
    field_id: u16,
    left: Bound<&'b [u8]>,
    right: Bound<&'b [u8]>,
+    /// The subset of documents ids that are useful for this search.
+    /// Great performance optimizations can be achieved by only fetching values matching this subset.
+    universe: Option<&'bitmap RoaringBitmap>,
    docids: &'bitmap mut RoaringBitmap,
 }
+
 impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
    fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
        let left_key =
@@ -104,7 +113,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
            }

            if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
-                *self.docids |= value.bitmap;
+                *self.docids |= match self.universe {
+                    Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                        value.bitmap_bytes,
+                        universe,
+                    )?,
+                    None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
+                };
            }
        }
        Ok(())
@@ -195,7 +210,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
                left_condition && right_condition
            };
            if should_take_whole_group {
-                *self.docids |= &previous_value.bitmap;
+                *self.docids |= match self.universe {
+                    Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                        previous_value.bitmap_bytes,
+                        universe,
+                    )?,
+                    None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
+                };
                previous_key = next_key;
                previous_value = next_value;
                continue;
@@ -291,7 +312,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
            left_condition && right_condition
        };
        if should_take_whole_group {
-            *self.docids |= &previous_value.bitmap;
+            *self.docids |= match self.universe {
+                Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                    previous_value.bitmap_bytes,
+                    universe,
+                )?,
+                None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
+            };
        } else {
            let level = level - 1;
            let starting_left_bound = previous_key.left_bound;
@@ -365,6 +392,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -384,6 +412,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -418,6 +447,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -439,6 +469,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -474,6 +505,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -499,6 +531,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -537,6 +570,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -556,6 +590,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -571,6 +606,7 @@ mod tests {
                0,
                &Bound::Unbounded,
                &Bound::Unbounded,
+                None,
                &mut docids,
            )
            .unwrap();
@@ -586,6 +622,7 @@ mod tests {
                1,
                &Bound::Unbounded,
                &Bound::Unbounded,
+                None,
                &mut docids,
            )
            .unwrap();
@@ -621,6 +658,7 @@ mod tests {
                    0,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
@@ -634,6 +672,7 @@ mod tests {
                    1,
                    &start,
                    &end,
+                    None,
                    &mut docids,
                )
                .unwrap();
--- a/milli/src/search/facet/facet_sort_ascending.rs
+++ b/milli/src/search/facet/facet_sort_ascending.rs
@@ -36,7 +36,7 @@ pub fn ascending_facet_sort<'t>(
    candidates: RoaringBitmap,
 ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
    let highest_level = get_highest_level(rtxn, db, field_id)?;
-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
        let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);

--- a/milli/src/search/facet/facet_sort_descending.rs
+++ b/milli/src/search/facet/facet_sort_descending.rs
@@ -19,9 +19,9 @@ pub fn descending_facet_sort<'t>(
    candidates: RoaringBitmap,
 ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
    let highest_level = get_highest_level(rtxn, db, field_id)?;
-    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
-        let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
+        let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap();
        let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
        let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
        Ok(itertools::Either::Left(DescendingFacetSort {
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@@ -4,7 +4,7 @@ use std::ops::Bound::{self, Excluded, Included};

 use either::Either;
 pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
-use roaring::RoaringBitmap;
+use roaring::{MultiOps, RoaringBitmap};
 use serde_json::Value;

 use super::facet_range_search;
@@ -224,14 +224,14 @@ impl<'a> Filter<'a> {
    pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
        // to avoid doing this for each recursive call we're going to do it ONCE ahead of time
        let filterable_fields = index.filterable_fields(rtxn)?;
-
-        self.inner_evaluate(rtxn, index, &filterable_fields)
+        self.inner_evaluate(rtxn, index, &filterable_fields, None)
    }

    fn evaluate_operator(
        rtxn: &heed::RoTxn,
        index: &Index,
        field_id: FieldId,
+        universe: Option<&RoaringBitmap>,
        operator: &Condition<'a>,
    ) -> Result<RoaringBitmap> {
        let numbers_db = index.facet_id_f64_docids;
@@ -291,14 +291,22 @@ impl<'a> Filter<'a> {
            }
            Condition::NotEqual(val) => {
                let operator = Condition::Equal(val.clone());
-                let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?;
+                let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
                let all_ids = index.documents_ids(rtxn)?;
                return Ok(all_ids - docids);
            }
        };

        let mut output = RoaringBitmap::new();
-        Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?;
+        Self::explore_facet_number_levels(
+            rtxn,
+            numbers_db,
+            field_id,
+            left,
+            right,
+            universe,
+            &mut output,
+        )?;
        Ok(output)
    }

@@ -310,6 +318,7 @@ impl<'a> Filter<'a> {
        field_id: FieldId,
        left: Bound<f64>,
        right: Bound<f64>,
+        universe: Option<&RoaringBitmap>,
        output: &mut RoaringBitmap,
    ) -> Result<()> {
        match (left, right) {
@@ -321,7 +330,7 @@ impl<'a> Filter<'a> {
            (_, _) => (),
        }
        facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
-            rtxn, db, field_id, &left, &right, output,
+            rtxn, db, field_id, &left, &right, universe, output,
        )?;

        Ok(())
@@ -332,31 +341,37 @@ impl<'a> Filter<'a> {
        rtxn: &heed::RoTxn,
        index: &Index,
        filterable_fields: &HashSet<String>,
+        universe: Option<&RoaringBitmap>,
    ) -> Result<RoaringBitmap> {
+        if universe.map_or(false, |u| u.is_empty()) {
+            return Ok(RoaringBitmap::new());
+        }
+
        match &self.condition {
            FilterCondition::Not(f) => {
-                let all_ids = index.documents_ids(rtxn)?;
                let selected = Self::inner_evaluate(
                    &(f.as_ref().clone()).into(),
                    rtxn,
                    index,
                    filterable_fields,
+                    universe,
                )?;
-                Ok(all_ids - selected)
+                match universe {
+                    Some(universe) => Ok(universe - selected),
+                    None => {
+                        let all_ids = index.documents_ids(rtxn)?;
+                        Ok(all_ids - selected)
+                    }
+                }
            }
            FilterCondition::In { fid, els } => {
                if crate::is_faceted(fid.value(), filterable_fields) {
                    let field_ids_map = index.fields_ids_map(rtxn)?;
-
                    if let Some(fid) = field_ids_map.id(fid.value()) {
-                        let mut bitmap = RoaringBitmap::new();
-
-                        for el in els {
-                            let op = Condition::Equal(el.clone());
-                            let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?;
-                            bitmap |= el_bitmap;
-                        }
-                        Ok(bitmap)
+                        els.iter()
+                            .map(|el| Condition::Equal(el.clone()))
+                            .map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
+                            .union()
                    } else {
                        Ok(RoaringBitmap::new())
                    }
@@ -371,7 +386,7 @@ impl<'a> Filter<'a> {
                if crate::is_faceted(fid.value(), filterable_fields) {
                    let field_ids_map = index.fields_ids_map(rtxn)?;
                    if let Some(fid) = field_ids_map.id(fid.value()) {
-                        Self::evaluate_operator(rtxn, index, fid, op)
+                        Self::evaluate_operator(rtxn, index, fid, universe, op)
                    } else {
                        Ok(RoaringBitmap::new())
                    }
@@ -382,14 +397,11 @@ impl<'a> Filter<'a> {
                    }))?
                }
            }
-            FilterCondition::Or(subfilters) => {
-                let mut bitmap = RoaringBitmap::new();
-                for f in subfilters {
-                    bitmap |=
-                        Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
-                }
-                Ok(bitmap)
-            }
+            FilterCondition::Or(subfilters) => subfilters
+                .iter()
+                .cloned()
+                .map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe))
+                .union(),
            FilterCondition::And(subfilters) => {
                let mut subfilters_iter = subfilters.iter();
                if let Some(first_subfilter) = subfilters_iter.next() {
@@ -398,16 +410,21 @@ impl<'a> Filter<'a> {
                        rtxn,
                        index,
                        filterable_fields,
+                        universe,
                    )?;
                    for f in subfilters_iter {
                        if bitmap.is_empty() {
                            return Ok(bitmap);
                        }
+                        // TODO We are doing the intersections two times,
+                        //      it could be more efficient
+                        //      Can't I just replace this `&=` by an `=`?
                        bitmap &= Self::inner_evaluate(
                            &(f.clone()).into(),
                            rtxn,
                            index,
                            filterable_fields,
+                            Some(&bitmap),
                        )?;
                    }
                    Ok(bitmap)
@@ -507,6 +524,7 @@ impl<'a> Filter<'a> {
                        rtxn,
                        index,
                        filterable_fields,
+                        universe,
                    )?;

                    let geo_lng_token = Token::new(
@@ -539,6 +557,7 @@ impl<'a> Filter<'a> {
                            rtxn,
                            index,
                            filterable_fields,
+                            universe,
                        )?;

                        let condition_right = FilterCondition::Condition {
@@ -552,6 +571,7 @@ impl<'a> Filter<'a> {
                            rtxn,
                            index,
                            filterable_fields,
+                            universe,
                        )?;

                        left | right
@@ -567,6 +587,7 @@ impl<'a> Filter<'a> {
                            rtxn,
                            index,
                            filterable_fields,
+                            universe,
                        )?
                    };

--- a/milli/src/search/facet/mod.rs
+++ b/milli/src/search/facet/mod.rs
@@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
 pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
 pub use self::filter::{BadGeoError, Filter};
 pub use self::search::{FacetValueHit, SearchForFacetValues};
-use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
+use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
 use crate::heed_codec::BytesRefCodec;
 use crate::{Index, Result};

@@ -54,9 +54,9 @@ pub fn facet_max_value<'t>(
 }

 /// Get the first facet value in the facet database
-pub(crate) fn get_first_facet_value<'t, BoundCodec>(
+pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
    txn: &'t RoTxn,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
    field_id: u16,
 ) -> heed::Result<Option<BoundCodec::DItem>>
 where
@@ -78,9 +78,9 @@ where
 }

 /// Get the last facet value in the facet database
-pub(crate) fn get_last_facet_value<'t, BoundCodec>(
+pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
    txn: &'t RoTxn,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
    field_id: u16,
 ) -> heed::Result<Option<BoundCodec::DItem>>
 where
@@ -102,9 +102,9 @@ where
 }

 /// Get the height of the highest level in the facet database
-pub(crate) fn get_highest_level<'t>(
+pub(crate) fn get_highest_level<'t, DC>(
    txn: &'t RoTxn<'t>,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
    field_id: u16,
 ) -> heed::Result<u8> {
    let field_id_prefix = &field_id.to_be_bytes();
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -548,6 +548,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
    Ok(())
 }

+#[tracing::instrument(level = "trace", skip_all, target = "search")]
 pub fn filtered_universe(
    index: &Index,
    txn: &RoTxn<'_>,
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -8,6 +8,7 @@ mod extract_vector_points;
 mod extract_word_docids;
 mod extract_word_pair_proximity_docids;
 mod extract_word_position_docids;
+// mod searchable;

 use std::fs::File;
 use std::io::BufReader;
--- a/milli/src/update/index_documents/extract/searchable/field_word_position.rs
+++ b/milli/src/update/index_documents/extract/searchable/field_word_position.rs
@@ -0,0 +1,211 @@
+use std::collections::HashMap;
+
+use charabia::normalizer::NormalizedTokenIter;
+use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
+use roaring::RoaringBitmap;
+use serde_json::Value;
+
+use crate::update::settings::InnerIndexSettings;
+use crate::{InternalError, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};
+
+pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>;
+
+pub struct FieldWordPositionExtractorBuilder<'a> {
+    max_positions_per_attributes: u16,
+    stop_words: Option<&'a fst::Set<Vec<u8>>>,
+    separators: Option<Vec<&'a str>>,
+    dictionary: Option<Vec<&'a str>>,
+}
+
+impl<'a> FieldWordPositionExtractorBuilder<'a> {
+    pub fn new(
+        max_positions_per_attributes: Option<u32>,
+        settings: &'a InnerIndexSettings,
+    ) -> Result<Self> {
+        let stop_words = settings.stop_words.as_ref();
+        let separators: Option<Vec<_>> =
+            settings.allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        let dictionary: Option<Vec<_>> =
+            settings.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        Ok(Self {
+            max_positions_per_attributes: max_positions_per_attributes
+                .map_or(MAX_POSITION_PER_ATTRIBUTE as u16, |max| {
+                    max.min(MAX_POSITION_PER_ATTRIBUTE) as u16
+                }),
+            stop_words,
+            separators,
+            dictionary,
+        })
+    }
+
+    pub fn build(&'a self) -> FieldWordPositionExtractor<'a> {
+        let builder = tokenizer_builder(
+            self.stop_words,
+            self.separators.as_deref(),
+            self.dictionary.as_deref(),
+            None,
+        );
+
+        FieldWordPositionExtractor {
+            tokenizer: builder.into_tokenizer(),
+            max_positions_per_attributes: self.max_positions_per_attributes,
+        }
+    }
+}
+
+pub struct FieldWordPositionExtractor<'a> {
+    tokenizer: Tokenizer<'a>,
+    max_positions_per_attributes: u16,
+}
+
+impl<'a> FieldWordPositionExtractor<'a> {
+    pub fn extract<'b>(
+        &'a self,
+        field_bytes: &[u8],
+        buffer: &'b mut String,
+    ) -> Result<ExtractedFieldWordPosition<'a, 'b>> {
+        let field_value = serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+        Ok(ExtractedFieldWordPosition {
+            tokenizer: &self.tokenizer,
+            max_positions_per_attributes: self.max_positions_per_attributes,
+            field_value,
+            buffer: buffer,
+        })
+    }
+}
+
+pub struct ExtractedFieldWordPosition<'a, 'b> {
+    tokenizer: &'a Tokenizer<'a>,
+    max_positions_per_attributes: u16,
+    field_value: Value,
+    buffer: &'b mut String,
+}
+
+impl<'a> ExtractedFieldWordPosition<'a, '_> {
+    pub fn iter<'o>(&'o mut self) -> FieldWordPositionIter<'o> {
+        self.buffer.clear();
+        let inner = match json_to_string(&self.field_value, &mut self.buffer) {
+            Some(field) => Some(self.tokenizer.tokenize(field)),
+            None => None,
+        };
+
+        // create an iterator of token with their positions.
+        FieldWordPositionIter {
+            inner,
+            max_positions_per_attributes: self.max_positions_per_attributes,
+            position: 0,
+            prev_kind: None,
+        }
+    }
+}
+
+pub struct FieldWordPositionIter<'a> {
+    inner: Option<NormalizedTokenIter<'a, 'a>>,
+    max_positions_per_attributes: u16,
+    position: u16,
+    prev_kind: Option<TokenKind>,
+}
+
+impl<'a> Iterator for FieldWordPositionIter<'a> {
+    type Item = (u16, Token<'a>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.position >= self.max_positions_per_attributes {
+            return None;
+        }
+
+        let token = self.inner.as_mut().map(|i| i.next()).flatten()?;
+
+        match token.kind {
+            TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
+                self.position += match self.prev_kind {
+                    Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
+                    Some(_) => 1,
+                    None => 0,
+                };
+                self.prev_kind = Some(token.kind)
+            }
+            TokenKind::Separator(_) if self.position == 0 => {
+                return self.next();
+            }
+            TokenKind::Separator(SeparatorKind::Hard) => {
+                self.prev_kind = Some(token.kind);
+            }
+            TokenKind::Separator(SeparatorKind::Soft)
+                if self.prev_kind != Some(TokenKind::Separator(SeparatorKind::Hard)) =>
+            {
+                self.prev_kind = Some(token.kind);
+            }
+            _ => return self.next(),
+        }
+
+        if !token.is_word() {
+            return self.next();
+        }
+
+        // keep a word only if it is not empty and fit in a LMDB key.
+        let lemma = token.lemma().trim();
+        if !lemma.is_empty() && lemma.len() <= MAX_WORD_LENGTH {
+            Some((self.position, token))
+        } else {
+            self.next()
+        }
+    }
+}
+
+/// Factorize tokenizer building.
+pub fn tokenizer_builder<'a>(
+    stop_words: Option<&'a fst::Set<Vec<u8>>>,
+    allowed_separators: Option<&'a [&str]>,
+    dictionary: Option<&'a [&str]>,
+    script_language: Option<&'a HashMap<Script, Vec<Language>>>,
+) -> TokenizerBuilder<'a, Vec<u8>> {
+    let mut tokenizer_builder = TokenizerBuilder::new();
+    if let Some(stop_words) = stop_words {
+        tokenizer_builder.stop_words(stop_words);
+    }
+    if let Some(dictionary) = dictionary {
+        tokenizer_builder.words_dict(dictionary);
+    }
+    if let Some(separators) = allowed_separators {
+        tokenizer_builder.separators(separators);
+    }
+
+    if let Some(script_language) = script_language {
+        tokenizer_builder.allow_list(script_language);
+    }
+
+    tokenizer_builder
+}
+
+/// Transform a JSON value into a string that can be indexed.
+fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a str> {
+    fn inner(value: &Value, output: &mut String) -> bool {
+        use std::fmt::Write;
+        match value {
+            Value::Null | Value::Object(_) => false,
+            Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
+            Value::Number(number) => write!(output, "{}", number).is_ok(),
+            Value::String(string) => write!(output, "{}", string).is_ok(),
+            Value::Array(array) => {
+                let mut count = 0;
+                for value in array {
+                    if inner(value, output) {
+                        output.push_str(". ");
+                        count += 1;
+                    }
+                }
+                // check that at least one value was written
+                count != 0
+            }
+        }
+    }
+
+    if let Value::String(string) = value {
+        Some(string)
+    } else if inner(value, buffer) {
+        Some(buffer)
+    } else {
+        None
+    }
+}
--- a/milli/src/update/index_documents/extract/searchable/mod.rs
+++ b/milli/src/update/index_documents/extract/searchable/mod.rs
@@ -0,0 +1,114 @@
+use std::collections::{BTreeMap, BTreeSet};
+use std::convert::TryInto;
+use std::fs::File;
+use std::io;
+use std::io::BufReader;
+
+use field_word_position::FieldWordPositionExtractorBuilder;
+use obkv::KvReader;
+use roaring::RoaringBitmap;
+use word_docids::{WordDocidsDump, WordDocidsExtractor};
+
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
+use crate::update::index_documents::extract::extract_docid_word_positions::ScriptLanguageDocidsMap;
+use crate::update::index_documents::GrenadParameters;
+use crate::update::settings::InnerIndexSettingsDiff;
+use crate::{FieldId, Result, SerializationError};
+
+mod field_word_position;
+mod word_docids;
+
+#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
+pub fn extract_searchable_data<R: io::Read + io::Seek>(
+    obkv_documents: grenad::Reader<R>,
+    indexer: GrenadParameters,
+    settings_diff: &InnerIndexSettingsDiff,
+    max_positions_per_attributes: Option<u32>,
+) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
+    let searchable_fields_to_index = settings_diff.searchable_fields_to_index();
+
+    let mut documents_ids = RoaringBitmap::new();
+
+    let add_builder =
+        FieldWordPositionExtractorBuilder::new(max_positions_per_attributes, &settings_diff.new)?;
+    let add_token_positions_extractor = add_builder.build();
+    let del_builder;
+    let del_token_positions_extractor = if settings_diff.settings_update_only {
+        del_builder = FieldWordPositionExtractorBuilder::new(
+            max_positions_per_attributes,
+            &settings_diff.old,
+        )?;
+        del_builder.build()
+    } else {
+        add_builder.build()
+    };
+    let token_positions_extractor = &[del_token_positions_extractor, add_token_positions_extractor];
+
+    let mut word_map = BTreeMap::new();
+    let mut word_docids_extractor = WordDocidsExtractor::new(settings_diff);
+
+    let mut cursor = obkv_documents.into_cursor()?;
+    // loop over documents
+    while let Some((key, value)) = cursor.move_on_next()? {
+        let document_id = key
+            .try_into()
+            .map(u32::from_be_bytes)
+            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
+        let obkv = KvReader::<FieldId>::new(value);
+        // if the searchable fields didn't change, skip the searchable indexing for this document.
+        if !settings_diff.reindex_searchable()
+            && !searchable_fields_changed(&obkv, &searchable_fields_to_index)
+        {
+            continue;
+        }
+
+        documents_ids.push(document_id);
+
+        let mut buffer = String::new();
+        for field_id in searchable_fields_to_index.iter() {
+            let Some(field_obkv) = obkv.get(*field_id).map(KvReaderDelAdd::new) else { continue };
+
+            for (deladd, field_bytes) in field_obkv {
+                let mut extracted_positions =
+                    token_positions_extractor[deladd as usize].extract(field_bytes, &mut buffer)?;
+                for (position, token) in extracted_positions.iter() {
+                    let word = token.lemma().trim();
+                    if !word_map.contains_key(word) {
+                        word_map.insert(word.to_string(), word_map.len() as u32);
+                    }
+                    let word_id = word_map.get(word).unwrap();
+                    word_docids_extractor.insert(*word_id, *field_id, document_id, deladd);
+                }
+            }
+        }
+
+        if word_docids_extractor.rough_size_estimate()
+            > indexer.max_memory.map_or(512 * 1024 * 1024, |s| s.min(512 * 1024 * 1024))
+        {
+            let WordDocidsDump { .. } =
+                word_docids_extractor.dump(&word_map, &searchable_fields_to_index, indexer)?;
+        }
+    }
+
+    todo!()
+}
+
+/// Check if any searchable fields of a document changed.
+fn searchable_fields_changed(
+    obkv: &KvReader<FieldId>,
+    searchable_fields: &BTreeSet<FieldId>,
+) -> bool {
+    for field_id in searchable_fields {
+        let Some(field_obkv) = obkv.get(*field_id).map(KvReaderDelAdd::new) else { continue };
+        match (field_obkv.get(DelAdd::Deletion), field_obkv.get(DelAdd::Addition)) {
+            // if both fields are None, check the next field.
+            (None, None) => (),
+            // if both contains a value and values are the same, check the next field.
+            (Some(del), Some(add)) if del == add => (),
+            // otherwise the fields are different, return true.
+            _otherwise => return true,
+        }
+    }
+
+    false
+}
--- a/milli/src/update/index_documents/extract/searchable/word_docids.rs
+++ b/milli/src/update/index_documents/extract/searchable/word_docids.rs
@@ -0,0 +1,203 @@
+use std::collections::hash_map::Entry::{Occupied, Vacant};
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::fs::File;
+use std::hash::Hash;
+use std::io::BufReader;
+use std::mem::size_of;
+
+use roaring::RoaringBitmap;
+
+use crate::update::del_add::KvWriterDelAdd;
+use crate::update::index_documents::extract::searchable::DelAdd;
+use crate::update::index_documents::{create_writer, writer_into_reader, GrenadParameters};
+use crate::update::settings::InnerIndexSettingsDiff;
+use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result};
+
+pub struct WordDocidsExtractor<'a> {
+    word_fid_docids: RevertedIndex<(u32, FieldId)>,
+    settings_diff: &'a InnerIndexSettingsDiff,
+}
+
+impl<'a> WordDocidsExtractor<'a> {
+    pub fn new(settings_diff: &'a InnerIndexSettingsDiff) -> Self {
+        Self { word_fid_docids: RevertedIndex::new(), settings_diff }
+    }
+    pub fn insert(&mut self, wordid: u32, fieldid: FieldId, docid: DocumentId, del_add: DelAdd) {
+        self.word_fid_docids.insert((wordid, fieldid), docid, del_add);
+    }
+
+    pub fn rough_size_estimate(&self) -> usize {
+        self.word_fid_docids.rough_size_estimate()
+    }
+
+    pub fn dump(
+        &mut self,
+        word_map: &BTreeMap<String, u32>,
+        fields: &BTreeSet<FieldId>,
+        indexer: GrenadParameters,
+    ) -> Result<WordDocidsDump> {
+        let mut word_fid_docids_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        let mut word_docids_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        let mut exact_word_docids_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        let mut exact_word_deletion = RoaringBitmap::new();
+        let mut exact_word_addition = RoaringBitmap::new();
+        let mut word_deletion = RoaringBitmap::new();
+        let mut word_addition = RoaringBitmap::new();
+        let mut key_buffer = Vec::new();
+        let mut bitmap_buffer = Vec::new();
+        let mut obkv_buffer = Vec::new();
+        for (word, wid) in word_map {
+            exact_word_deletion.clear();
+            exact_word_addition.clear();
+            word_deletion.clear();
+            word_addition.clear();
+            for fid in fields {
+                if let Some((deletion, addition)) = self.word_fid_docids.inner.get(&(*wid, *fid)) {
+                    if self.settings_diff.old.exact_attributes.contains(&fid) {
+                        exact_word_deletion |= deletion;
+                    } else {
+                        word_deletion |= deletion;
+                    }
+
+                    if self.settings_diff.new.exact_attributes.contains(&fid) {
+                        exact_word_addition |= addition;
+                    } else {
+                        word_addition |= addition;
+                    }
+
+                    if deletion != addition {
+                        key_buffer.clear();
+                        key_buffer.extend_from_slice(word.as_bytes());
+                        key_buffer.push(0);
+                        key_buffer.extend_from_slice(&fid.to_be_bytes());
+                        let value = bitmaps_into_deladd_obkv(
+                            deletion,
+                            addition,
+                            &mut obkv_buffer,
+                            &mut bitmap_buffer,
+                        )?;
+                        word_fid_docids_writer.insert(&key_buffer, value)?;
+                    }
+                }
+            }
+
+            key_buffer.clear();
+            key_buffer.extend_from_slice(word.as_bytes());
+            if exact_word_deletion != exact_word_addition {
+                let value = bitmaps_into_deladd_obkv(
+                    &exact_word_deletion,
+                    &exact_word_addition,
+                    &mut obkv_buffer,
+                    &mut bitmap_buffer,
+                )?;
+                exact_word_docids_writer.insert(&key_buffer, value)?;
+            }
+
+            if word_deletion != word_addition {
+                let value = bitmaps_into_deladd_obkv(
+                    &word_deletion,
+                    &word_addition,
+                    &mut obkv_buffer,
+                    &mut bitmap_buffer,
+                )?;
+                word_docids_writer.insert(&key_buffer, value)?;
+            }
+        }
+
+        self.word_fid_docids.clear();
+
+        Ok(WordDocidsDump {
+            word_fid_docids: writer_into_reader(word_fid_docids_writer)?,
+            word_docids: writer_into_reader(word_docids_writer)?,
+            exact_word_docids: writer_into_reader(exact_word_docids_writer)?,
+        })
+    }
+}
+
+fn bitmaps_into_deladd_obkv<'a>(
+    deletion: &RoaringBitmap,
+    addition: &RoaringBitmap,
+    obkv_buffer: &'a mut Vec<u8>,
+    bitmap_buffer: &mut Vec<u8>,
+) -> Result<&'a mut Vec<u8>> {
+    obkv_buffer.clear();
+    let mut value_writer = KvWriterDelAdd::new(obkv_buffer);
+    if !deletion.is_empty() {
+        bitmap_buffer.clear();
+        CboRoaringBitmapCodec::serialize_into(deletion, bitmap_buffer);
+        value_writer.insert(DelAdd::Deletion, &*bitmap_buffer)?;
+    }
+    if !addition.is_empty() {
+        bitmap_buffer.clear();
+        CboRoaringBitmapCodec::serialize_into(addition, bitmap_buffer);
+        value_writer.insert(DelAdd::Addition, &*bitmap_buffer)?;
+    }
+    Ok(value_writer.into_inner()?)
+}
+
+#[derive(Debug)]
+struct RevertedIndex<K> {
+    inner: HashMap<K, (RoaringBitmap, RoaringBitmap)>,
+    max_value_size: usize,
+}
+
+impl<K: PartialEq + Eq + Hash> RevertedIndex<K> {
+    pub fn insert(&mut self, key: K, docid: DocumentId, del_add: DelAdd) {
+        let size = match self.inner.entry(key) {
+            Occupied(mut entry) => {
+                let (ref mut del, ref mut add) = entry.get_mut();
+                match del_add {
+                    DelAdd::Deletion => del.insert(docid),
+                    DelAdd::Addition => add.insert(docid),
+                };
+                del.serialized_size() + add.serialized_size()
+            }
+            Vacant(entry) => {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(docid);
+                let size = bitmap.serialized_size();
+                match del_add {
+                    DelAdd::Deletion => entry.insert((bitmap, RoaringBitmap::new())),
+                    DelAdd::Addition => entry.insert((RoaringBitmap::new(), bitmap)),
+                };
+                size * 2
+            }
+        };
+
+        self.max_value_size = self.max_value_size.max(size);
+    }
+
+    pub fn new() -> Self {
+        Self { inner: HashMap::new(), max_value_size: 0 }
+    }
+
+    pub fn rough_size_estimate(&self) -> usize {
+        self.inner.len() * size_of::<K>() + self.inner.len() * self.max_value_size
+    }
+
+    fn clear(&mut self) {
+        self.max_value_size = 0;
+        self.inner.clear();
+    }
+}
+
+pub struct WordDocidsDump {
+    pub word_fid_docids: grenad::Reader<BufReader<File>>,
+    pub word_docids: grenad::Reader<BufReader<File>>,
+    pub exact_word_docids: grenad::Reader<BufReader<File>>,
+}
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1162,6 +1162,18 @@ impl InnerIndexSettingsDiff {
        }
    }

+    pub fn searchable_fields_to_index(&self) -> BTreeSet<FieldId> {
+        if self.settings_update_only {
+            self.new
+                .fields_ids_map
+                .ids()
+                .filter(|id| self.reindex_searchable_id(*id).is_some())
+                .collect()
+        } else {
+            self.new.searchable_fields_ids.iter().copied().collect()
+        }
+    }
+
    pub fn any_reindexing_needed(&self) -> bool {
        self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors()
    }
Author	SHA1	Message	Date
ManyTheFish	9874efc352	WIP	2024-07-04 11:18:45 +02:00
meili-bors[bot]	a838f39fce	Merge #4682 4682: Speed Up Filter ANDs operations r=Kerollmops a=Kerollmops This PR fixes #4659 and improves the way we do AND operations by using the latest [RoaringBitmap feature to do intersections with serialized bitmaps](https://github.com/RoaringBitmap/roaring-rs/pull/281). Doing so drastically reduces the time spent reading, copying bytes in memory to use and keep a subset of the containers in the bitmap. ### Some Example Results With a 45M documents dataset running on a good NVMe. This example filter was taking 77ms and with this PR only 13ms (6x speedup): ```sql artist = 'The Beatles' AND (duration 150 TO 500 OR duration NOT EXISTS) AND genres IN [Rock, 'Rock and Roll'] AND rating > 4 AND released_year 1960 TO 1990 ``` By reordering the filter AND clauses we can reach a constant 8ms execution time. However, note that it is a manual operation. On the other side the previous filter pipeline is still at a constant 45ms execution time with this filter. (6x speedup) ```sql artist = 'The Beatles' AND genres IN [Rock, 'Rock and Roll'] AND released_year 1960 TO 1990 AND (duration 150 TO 500 OR duration NOT EXISTS) ``` ### To Do - [x] Rebase on `release-v1.9.0`. - [ ] ~Skip branches of the facet/filter tree when nothing is in common with the universe~ slower this way. - [x] When the universe is required use the universe given in parameter if possible. Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-06-11 02:51:17 +00:00
meili-bors[bot]	7add7d053c	Merge #4689 4689: Bring back changes from v1.8.2 into v1.9.0 r=curquiza a=dureuill Co-authored-by: dureuill <dureuill@users.noreply.github.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>	2024-06-10 14:03:55 +00:00
Louis Dureuil	7559dfc814	Merge tag 'v1.8.2' into release-v1.9.0	2024-06-10 15:07:34 +02:00
meili-bors[bot]	6c6c4732a1	Merge #4681 4681: Fix concurrency issue r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4654 ## What does this PR do? - Asynchronously drop permits Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-10 09:36:08 +00:00
meili-bors[bot]	3976fe660e	Merge #4688 4688: Update version for the next release (v1.8.2) in Cargo.toml r=dureuill a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: dureuill <dureuill@users.noreply.github.com>	2024-06-10 08:28:34 +00:00
Louis Dureuil	50f8218a5d	Asynchronously drop permits	2024-06-10 10:19:57 +02:00
dureuill	19585f1a4f	Update version for the next release (v1.8.2) in Cargo.toml	2024-06-10 07:59:36 +00:00
Clément Renault	8ec6e175e5	Replace roaring patch to the v0.10.5	2024-06-07 22:11:26 -04:00
Clément Renault	75b2e02cd2	Log more stuff around filtering	2024-06-06 11:00:07 -04:00
Clément Renault	40f05fe156	Bump roaring to the latest commit	2024-06-06 10:59:55 -04:00
Clément Renault	52d0d35b39	Revert "Reduce the universe while exploring the facet tree" because it's slower this way This reverts commit 14026115f21409535772ede0ee4273f37848dd61.	2024-06-06 09:17:51 -04:00
Clément Renault	5432776132	Reduce the universe while exploring the facet tree	2024-06-06 09:17:51 -04:00
Clément Renault	66470b27e6	Use the MultiOps trait for IN operations	2024-06-06 09:17:51 -04:00
Clément Renault	0a9bd398c7	Improve the NOT operator to use the universe when possible	2024-06-06 09:17:51 -04:00
Clément Renault	7967e93c16	Skip evaluating when a universe is empty, nothing can be found	2024-06-06 09:17:51 -04:00
Clément Renault	a6f3a01c6a	Expose the universe to do efficient intersections on deserialization	2024-06-06 09:17:51 -04:00
Clément Renault	4ca4a3f954	Make the CboRoaringBitmapCodec support intersection on deserialization	2024-06-06 09:17:51 -04:00
Clément Renault	e4a69c5ac3	Introduce the FacetGroupLazyValue type	2024-06-06 09:17:50 -04:00
Clément Renault	ff2e498267	Patch roaring to use the version supporting intersection on deserialization	2024-06-06 09:17:50 -04:00
Clément Renault	531e3d7d6a	MultiOps trait for OR operations	2024-06-06 09:17:50 -04:00