Compare commits

...

9 Commits

Author SHA1 Message Date
Louis Dureuil
7ac1eafceb add facet test 2023-02-09 13:36:21 +01:00
Louis Dureuil
82ca61f366 Remove some clippy type complexity warns by deboxing iters 2023-02-09 13:36:21 +01:00
Louis Dureuil
e8b8319006 Add min and max facet stats 2023-02-09 13:36:21 +01:00
Louis Dureuil
6df9177b3b Update usage of iterators 2023-02-09 13:36:21 +01:00
Louis Dureuil
8656f83a81 facet sort ascending/descending now also return the values 2023-02-09 13:36:21 +01:00
Louis Dureuil
f2d0672453 Add prototype to analytics if any 2023-02-09 13:35:06 +01:00
Louis Dureuil
59cf58e773 If using a prototype, display its name at Meilisearch startup 2023-02-09 13:35:05 +01:00
bors[bot]
9882029fa4 Merge #3456
3456: Bump tokio from 1.24.1 to 1.24.2 r=curquiza a=dependabot[bot]

Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.24.1 to 1.24.2.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/tokio-rs/tokio/commits">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=tokio&package-manager=cargo&previous-version=1.24.1&new-version=1.24.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-02-07 13:28:42 +00:00
dependabot[bot]
5f56e6dd58 Bump tokio from 1.24.1 to 1.24.2
Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.24.1 to 1.24.2.
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/commits)

---
updated-dependencies:
- dependency-name: tokio
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-02-07 12:14:05 +00:00
13 changed files with 424 additions and 50 deletions

4
Cargo.lock generated
View File

@@ -3867,9 +3867,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "tokio"
version = "1.24.1"
version = "1.24.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d9f76183f91ecfb55e1d7d5602bd1d979e38a3a522fe900241cf195624d67ae"
checksum = "597a12a59981d9e3c38d216785b0c37399f6e415e8d0712047620f189371b0bb"
dependencies = [
"autocfg",
"bytes",

View File

@@ -25,7 +25,7 @@ tar = "0.4.38"
tempfile = "3.3.0"
thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.0"
tokio = "1.24"
uuid = { version = "1.1.2", features = ["serde", "v4"] }
[dev-dependencies]

View File

@@ -65,7 +65,7 @@ tar = "0.4.38"
tempfile = "3.3.0"
thiserror = "1.0.37"
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = { version = "1.21.2", features = ["full"] }
tokio = { version = "1.24.2", features = ["full"] }
tokio-stream = "0.1.10"
toml = "0.5.9"
uuid = { version = "1.1.2", features = ["serde", "v4"] }

View File

@@ -1,7 +1,13 @@
use vergen::{vergen, Config};
use vergen::{vergen, Config, SemverKind};
fn main() {
if let Err(e) = vergen(Config::default()) {
let mut config = Config::default();
// allow using non-annotated tags
*config.git_mut().semver_kind_mut() = SemverKind::Lightweight;
// add -dirty suffix when we're not right on the tag
*config.git_mut().semver_dirty_mut() = Some("-dirty");
if let Err(e) = vergen(config) {
println!("cargo:warning=vergen: {}", e);
}

View File

@@ -401,12 +401,19 @@ impl Segment {
if let Ok(stats) =
create_all_stats(index_scheduler.into(), auth_controller, &SearchRules::default())
{
// Replace the version number with the prototype name if any.
let version = if let Some(prototype) = crate::prototype_name() {
prototype
} else {
env!("CARGO_PKG_VERSION")
};
let _ = self
.batcher
.push(Identify {
context: Some(json!({
"app": {
"version": env!("CARGO_PKG_VERSION").to_string(),
"version": version.to_string(),
},
})),
user: self.user.clone(),

View File

@@ -427,3 +427,35 @@ pub fn configure_metrics_route(config: &mut web::ServiceConfig, enable_metrics_r
);
}
}
/// Parses the output of
/// [`VERGEN_GIT_SEMVER_LIGHTWEIGHT`](https://docs.rs/vergen/latest/vergen/struct.Git.html#instructions)
/// as a prototype name.
///
/// Returns `Some(prototype_name)` if the following conditions are met on this value:
///
/// 1. starts with `prototype-`,
/// 2. does not end with `dirty-`,
/// 3. ends with `-<some_number>`,
/// 4. does not end with `<some_number>-<some_number>`.
///
/// Otherwise, returns `None`.
pub fn prototype_name() -> Option<&'static str> {
let prototype: &'static str = option_env!("VERGEN_GIT_SEMVER_LIGHTWEIGHT")?;
if prototype.ends_with("-dirty") {
return None;
}
if !prototype.starts_with("prototype-") {
return None;
}
let mut rsplit_prototype = prototype.rsplit('-');
// last component MUST be a number
rsplit_prototype.next()?.parse::<u64>().ok()?;
// before than last component SHALL NOT be a number
rsplit_prototype.next()?.parse::<u64>().err()?;
Some(prototype)
}

View File

@@ -8,7 +8,7 @@ use actix_web::web::Data;
use actix_web::HttpServer;
use index_scheduler::IndexScheduler;
use meilisearch::analytics::Analytics;
use meilisearch::{analytics, create_app, setup_meilisearch, Opt};
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
@@ -137,6 +137,9 @@ pub fn print_launch_resume(
eprintln!("Commit SHA:\t\t{:?}", commit_sha.to_string());
eprintln!("Commit date:\t\t{:?}", commit_date.to_string());
eprintln!("Package version:\t{:?}", env!("CARGO_PKG_VERSION").to_string());
if let Some(prototype) = prototype_name() {
eprintln!("Prototype:\t\t{:?}", prototype);
}
#[cfg(all(not(debug_assertions), feature = "analytics"))]
{

View File

@@ -108,7 +108,7 @@ pub struct SearchHit {
pub matches_position: Option<MatchesPosition>,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
pub hits: Vec<SearchHit>,
@@ -118,6 +118,8 @@ pub struct SearchResult {
pub hits_info: HitsInfo,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
@@ -129,6 +131,12 @@ pub enum HitsInfo {
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
}
#[derive(Serialize, Debug, Clone, PartialEq)]
pub struct FacetStats {
pub min: f64,
pub max: f64,
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
@@ -300,7 +308,7 @@ pub fn perform_search(
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
};
let facet_distribution = match query.facets {
let (facet_distribution, facet_stats) = match query.facets {
Some(ref fields) => {
let mut facet_distribution = index.facets_distribution(&rtxn);
@@ -314,18 +322,23 @@ pub fn perform_search(
facet_distribution.facets(fields);
}
let distribution = facet_distribution.candidates(candidates).execute()?;
Some(distribution)
let stats = facet_distribution.compute_stats()?;
(Some(distribution), Some(stats))
}
None => None,
None => (None, None),
};
let facet_stats = facet_stats.map(|stats| {
stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect()
});
let result = SearchResult {
hits: documents,
hits_info,
query: query.q.clone().unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(),
facet_distribution,
facet_stats,
};
Ok(result)
}

View File

@@ -1,5 +1,6 @@
use std::mem::take;
use heed::BytesDecode;
use itertools::Itertools;
use log::debug;
use ordered_float::OrderedFloat;
@@ -7,7 +8,7 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType;
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
@@ -196,6 +197,38 @@ fn facet_ordered_iterative<'t>(
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
}
fn facet_extreme_value<'t>(
mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
) -> Result<Option<f64>> {
let extreme_value =
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
let (_, extreme_value) = extreme_value?;
Ok(OrderedF64Codec::bytes_decode(extreme_value))
}
pub fn facet_min_value<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
pub fn facet_max_value<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
fn facet_ordered_set_based<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
@@ -203,23 +236,24 @@ fn facet_ordered_set_based<'t>(
is_ascending: bool,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
let number_db =
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let string_db =
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let number_iter = make_iter(
rtxn,
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates.clone(),
)?;
let (number_iter, string_iter) = if is_ascending {
let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?;
let string_iter = make_iter(
rtxn,
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
)?;
(itertools::Either::Left(number_iter), itertools::Either::Left(string_iter))
} else {
let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?;
Ok(Box::new(number_iter.chain(string_iter)))
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
};
Ok(Box::new(number_iter.chain(string_iter).map(|res| res.map(|(doc_ids, _)| doc_ids))))
}
/// Returns an iterator over groups of the given candidates in ascending or descending order.

View File

@@ -21,6 +21,7 @@ use crate::update::{MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB, MAX_PROXIMITY_FOR_PREFIX
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
mod asc_desc;
pub use asc_desc::{facet_max_value, facet_min_value};
mod attribute;
mod exactness;
pub mod r#final;

View File

@@ -278,6 +278,65 @@ impl<'a> FacetDistribution<'a> {
}
}
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let candidates = if let Some(candidates) = self.candidates.clone() {
candidates
} else {
return Ok(Default::default());
};
let fields = match &self.facets {
Some(facets) => {
let invalid_fields: HashSet<_> = facets
.iter()
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
.collect();
if !invalid_fields.is_empty() {
return Err(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
valid_facets_name: filterable_fields.into_iter().collect(),
}
.into());
} else {
facets.clone()
}
}
None => filterable_fields,
};
let mut distribution = BTreeMap::new();
for (fid, name) in fields_ids_map.iter() {
if crate::is_faceted(name, &fields) {
let min_value = if let Some(min_value) = crate::search::criteria::facet_min_value(
self.index,
self.rtxn,
fid,
candidates.clone(),
)? {
min_value
} else {
continue;
};
let max_value = if let Some(max_value) = crate::search::criteria::facet_max_value(
self.index,
self.rtxn,
fid,
candidates.clone(),
)? {
max_value
} else {
continue;
};
distribution.insert(name.to_string(), (min_value, max_value));
}
}
Ok(distribution)
}
pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
@@ -537,4 +596,216 @@ mod tests {
milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992");
}
#[test]
fn facet_stats() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = serde_json::json!({
"colour": facet_values[i % 1000],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
}
#[test]
fn facet_stats_array() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = serde_json::json!({
"colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 1776.0)}"###);
}
#[test]
fn facet_stats_mixed_array() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = serde_json::json!({
"colour": [facet_values[i % 1000], format!("{}", facet_values[i % 1000] + 1000)],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
}
#[test]
fn facet_mixed_values() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = if i % 2 == 0 {
serde_json::json!({
"colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
})
} else {
serde_json::json!({
"colour": format!("{}", facet_values[i % 1000] + 10000),
})
};
let document = document.as_object().unwrap().clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (218.0, 1776.0)}"###);
}
}

View File

@@ -34,15 +34,20 @@ pub fn ascending_facet_sort<'t>(
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }))
Ok(itertools::Either::Left(AscendingFacetSort {
rtxn,
db,
field_id,
stack: vec![(candidates, iter)],
}))
} else {
Ok(Box::new(std::iter::empty()))
Ok(itertools::Either::Right(std::iter::empty()))
}
}
@@ -60,7 +65,7 @@ struct AscendingFacetSort<'t, 'e> {
}
impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
type Item = Result<RoaringBitmap>;
type Item = Result<(RoaringBitmap, &'t [u8])>;
fn next(&mut self) -> Option<Self::Item> {
'outer: loop {
@@ -90,7 +95,8 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
*documents_ids -= &bitmap;
if level == 0 {
return Some(Ok(bitmap));
// Since the level is 0, the left_bound is the exact value.
return Some(Ok((bitmap, left_bound)));
}
let starting_key_below =
FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
@@ -130,7 +136,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -152,7 +158,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -161,7 +167,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -183,7 +189,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -192,7 +198,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -214,7 +220,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}

View File

@@ -17,21 +17,21 @@ pub fn descending_facet_sort<'t>(
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
Ok(Box::new(DescendingFacetSort {
Ok(itertools::Either::Left(DescendingFacetSort {
rtxn,
db,
field_id,
stack: vec![(candidates, iter, Bound::Included(last_bound))],
}))
} else {
Ok(Box::new(std::iter::empty()))
Ok(itertools::Either::Right(std::iter::empty()))
}
}
@@ -50,7 +50,7 @@ struct DescendingFacetSort<'t> {
}
impl<'t> Iterator for DescendingFacetSort<'t> {
type Item = Result<RoaringBitmap>;
type Item = Result<(RoaringBitmap, &'t [u8])>;
fn next(&mut self) -> Option<Self::Item> {
'outer: loop {
@@ -77,7 +77,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
*documents_ids -= &bitmap;
if level == 0 {
return Some(Ok(bitmap));
// Since we're at the level 0 the left_bound is the exact value.
return Some(Ok((bitmap, left_bound)));
}
let starting_key_below =
FacetGroupKey { field_id, level: level - 1, left_bound };
@@ -146,7 +147,7 @@ mod tests {
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -169,7 +170,7 @@ mod tests {
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -179,7 +180,7 @@ mod tests {
let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -200,7 +201,7 @@ mod tests {
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -209,7 +210,7 @@ mod tests {
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -231,7 +232,7 @@ mod tests {
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}