Compare commits

..

1 Commits

Author SHA1 Message Date
ManyTheFish
ac76f243c7 WIP 2025-12-22 14:08:42 +01:00
110 changed files with 1470 additions and 2138 deletions

View File

@@ -15,7 +15,7 @@ env:
jobs:
test-linux:
name: Tests on ${{ matrix.runner }} ${{ matrix.features }}
name: Tests on Ubuntu
runs-on: ${{ matrix.runner }}
strategy:
matrix:

606
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.30.1"
version = "1.30.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -172,7 +172,8 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let progress = Progress::default();
let mut search = index.search(&rtxn, &progress);
search
.query(query)
.terms_matching_strategy(TermsMatchingStrategy::default());

View File

@@ -153,7 +153,8 @@ fn main() {
.unwrap();
// after executing a batch we check if the database is corrupted
let res = index.search(&wtxn).execute().unwrap();
let progress = Progress::default();
let res = index.search(&wtxn, &progress).execute().unwrap();
index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed);
}

View File

@@ -4,7 +4,7 @@ use std::fmt::Write;
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Kind, Status, Task};
use meilisearch_types::versioning::{self, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use roaring::RoaringBitmap;
@@ -188,7 +188,7 @@ pub fn snapshot_all_batches(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Batch>>)
pub fn snapshot_batches_to_tasks_mappings(
rtxn: &RoTxn,
db: Database<BEU32, DeCboRoaringBitmapCodec>,
db: Database<BEU32, CboRoaringBitmapCodec>,
) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
@@ -199,7 +199,7 @@ pub fn snapshot_batches_to_tasks_mappings(
snap
}
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, DeCboRoaringBitmapCodec>) -> String {
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {

View File

@@ -4,7 +4,7 @@ use std::ops::{Bound, RangeBounds};
use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status};
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
@@ -42,11 +42,11 @@ pub struct BatchQueue {
/// Store the batches associated to an index.
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
/// Store the batches containing tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<BEI128, DeCboRoaringBitmapCodec>,
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the batches containing finished tasks started at a specific date
pub(crate) started_at: Database<BEI128, DeCboRoaringBitmapCodec>,
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the batches containing tasks finished at a specific date
pub(crate) finished_at: Database<BEI128, DeCboRoaringBitmapCodec>,
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
}
impl BatchQueue {

View File

@@ -14,7 +14,7 @@ use std::time::Duration;
use file_store::FileStore;
use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, BEU32};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::network::DbTaskNetwork;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
@@ -131,7 +131,7 @@ pub struct Queue {
pub(crate) batches: batches::BatchQueue,
/// Matches a batch id with the associated task ids.
pub(crate) batch_to_tasks_mapping: Database<BEU32, DeCboRoaringBitmapCodec>,
pub(crate) batch_to_tasks_mapping: Database<BEU32, CboRoaringBitmapCodec>,
/// The list of files referenced by the tasks.
pub(crate) file_store: FileStore,

View File

@@ -2,7 +2,7 @@ use std::ops::{Bound, RangeBounds};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::network::DbTaskNetwork;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::{MultiOps, RoaringBitmap};
@@ -44,11 +44,11 @@ pub struct TaskQueue {
/// Store the tasks that were canceled by a task uid
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
/// Store the task ids of tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<BEI128, DeCboRoaringBitmapCodec>,
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the task ids of finished tasks which started being processed at a specific date
pub(crate) started_at: Database<BEI128, DeCboRoaringBitmapCodec>,
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the task ids of tasks which finished at a specific date
pub(crate) finished_at: Database<BEI128, DeCboRoaringBitmapCodec>,
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
}
impl TaskQueue {

View File

@@ -662,8 +662,13 @@ impl IndexScheduler {
// 2. Get the task set for index = name that appeared before the index swap task
let mut index_lhs_task_ids = self.queue.tasks.index_tasks(wtxn, lhs)?;
index_lhs_task_ids.remove_range(task_id..);
let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?;
index_rhs_task_ids.remove_range(task_id..);
let index_rhs_task_ids = if rename {
let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?;
index_rhs_task_ids.remove_range(task_id..);
index_rhs_task_ids
} else {
RoaringBitmap::new()
};
// 3. before_name -> new_name in the task's KindWithContent
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);

View File

@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
4 {uid: 4, batch_uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "b"), rename: false }, IndexSwap { indexes: ("c", "d"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b"), rename: false }, IndexSwap { indexes: ("c", "d"), rename: false }] }}
5 {uid: 5, status: enqueued, details: { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }}
----------------------------------------------------------------------
@@ -22,10 +22,10 @@ succeeded [0,1,2,3,4,]
"indexSwap" [4,5,]
----------------------------------------------------------------------
### Index Tasks:
a [1,4,5,]
b [0,4,]
c [3,4,5,]
d [2,4,]
a [4,5,]
b [0,1,4,]
c [4,5,]
d [2,3,4,]
----------------------------------------------------------------------
### Index Mapper:
a: { number_of_documents: 0, field_distribution: {} }

View File

@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
4 {uid: 4, batch_uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }}
5 {uid: 5, batch_uid: 5, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }}
----------------------------------------------------------------------
@@ -22,10 +22,10 @@ succeeded [0,1,2,3,4,5,]
"indexSwap" [4,5,]
----------------------------------------------------------------------
### Index Tasks:
a [3,4,5,]
b [0,4,]
c [1,4,5,]
d [2,4,]
a [5,]
b [0,1,4,]
c [4,5,]
d [2,3,4,]
----------------------------------------------------------------------
### Index Mapper:
a: { number_of_documents: 0, field_distribution: {} }

View File

@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
4 {uid: 4, batch_uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }}
5 {uid: 5, batch_uid: 5, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }}
6 {uid: 6, batch_uid: 6, status: succeeded, details: { swaps: [] }, kind: IndexSwap { swaps: [] }}
@@ -23,10 +23,10 @@ succeeded [0,1,2,3,4,5,6,]
"indexSwap" [4,5,6,]
----------------------------------------------------------------------
### Index Tasks:
a [3,4,5,]
b [0,4,]
c [1,4,5,]
d [2,4,]
a [5,]
b [0,1,4,]
c [4,5,]
d [2,3,4,]
----------------------------------------------------------------------
### Index Mapper:
a: { number_of_documents: 0, field_distribution: {} }

View File

@@ -5,7 +5,7 @@ use crate::test_utils::Breakpoint::*;
use crate::test_utils::{
index_creation_task, read_json, replace_document_import_task, sample_documents,
};
use crate::{IndexScheduler, Query};
use crate::IndexScheduler;
use big_s::S;
use meili_snap::{json_string, snapshot};
use meilisearch_auth::AuthFilter;
@@ -404,103 +404,6 @@ fn swap_indexes() {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed");
}
#[test]
fn swap_indexes_with_correct_task_allocations() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let to_enqueue = [index_creation_task("a", "id"), index_creation_task("b", "id")];
for task in to_enqueue {
let _ = index_scheduler.register(task, None, false).unwrap();
index_scheduler.assert_internally_consistent();
}
handle.advance_n_successful_batches(2);
let (file0, count0) = sample_documents(&index_scheduler, 1, 1);
let (file1, count1) = sample_documents(&index_scheduler, 2, 2);
let (file2, count2) = sample_documents(&index_scheduler, 3, 3);
let (file3, count3) = sample_documents(&index_scheduler, 4, 4);
file0.persist().unwrap();
file1.persist().unwrap();
file2.persist().unwrap();
file3.persist().unwrap();
index_scheduler
.register(replace_document_import_task("a", Some("id"), 1, count0), None, false)
.unwrap();
index_scheduler
.register(replace_document_import_task("a", Some("id"), 2, count1), None, false)
.unwrap();
index_scheduler
.register(replace_document_import_task("b", Some("id"), 3, count2), None, false)
.unwrap();
index_scheduler
.register(replace_document_import_task("b", Some("id"), 4, count3), None, false)
.unwrap();
handle.advance_n_successful_batches(2);
let (a_tasks, _) = index_scheduler
.get_tasks_from_authorized_indexes(
&Query { index_uids: Some(vec!["a".to_string()]), ..Default::default() },
&AuthFilter::default(),
)
.unwrap();
assert_eq!(a_tasks.len(), 3);
let (b_tasks, _) = index_scheduler
.get_tasks_from_authorized_indexes(
&Query { index_uids: Some(vec!["b".to_string()]), ..Default::default() },
&AuthFilter::default(),
)
.unwrap();
assert_eq!(b_tasks.len(), 3);
index_scheduler
.register(
KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: ("a".to_owned(), "b".to_owned()), rename: false }],
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
let (a_after_tasks, _) = index_scheduler
.get_tasks_from_authorized_indexes(
&Query { index_uids: Some(vec!["a".to_string()]), ..Default::default() },
&AuthFilter::default(),
)
.unwrap();
let (b_after_tasks, _) = index_scheduler
.get_tasks_from_authorized_indexes(
&Query { index_uids: Some(vec!["b".to_string()]), ..Default::default() },
&AuthFilter::default(),
)
.unwrap();
assert_eq!(a_after_tasks.len(), 3);
assert_eq!(a_after_tasks.len(), b_after_tasks.len());
for (a, b) in a_tasks.iter().zip(b_after_tasks.iter()) {
assert_eq!(a.uid, b.uid);
}
for (b, a) in b_tasks.iter().zip(a_after_tasks.iter()) {
assert_eq!(b.uid, a.uid);
}
}
#[test]
fn swap_indexes_errors() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);

View File

@@ -8,7 +8,7 @@ use convert_case::{Case, Casing as _};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{ChannelCongestion, DeCboRoaringBitmapCodec};
use meilisearch_types::milli::{CboRoaringBitmapCodec, ChannelCongestion};
use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{
BatchStopReason, Details, IndexSwap, Kind, KindWithContent, Status,
@@ -211,7 +211,7 @@ impl ProcessingBatch {
pub(crate) fn insert_task_datetime(
wtxn: &mut RwTxn,
database: Database<BEI128, DeCboRoaringBitmapCodec>,
database: Database<BEI128, CboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
@@ -224,7 +224,7 @@ pub(crate) fn insert_task_datetime(
pub(crate) fn remove_task_datetime(
wtxn: &mut RwTxn,
database: Database<BEI128, DeCboRoaringBitmapCodec>,
database: Database<BEI128, CboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
@@ -243,7 +243,7 @@ pub(crate) fn remove_task_datetime(
pub(crate) fn remove_n_tasks_datetime_earlier_than(
wtxn: &mut RwTxn,
database: Database<BEI128, DeCboRoaringBitmapCodec>,
database: Database<BEI128, CboRoaringBitmapCodec>,
earlier_than: OffsetDateTime,
mut count: usize,
task_id: TaskId,
@@ -271,7 +271,7 @@ pub(crate) fn remove_n_tasks_datetime_earlier_than(
pub(crate) fn keep_ids_within_datetimes(
rtxn: &RoTxn,
ids: &mut RoaringBitmap,
database: Database<BEI128, DeCboRoaringBitmapCodec>,
database: Database<BEI128, CboRoaringBitmapCodec>,
after: Option<OffsetDateTime>,
before: Option<OffsetDateTime>,
) -> Result<()> {

View File

@@ -2,7 +2,7 @@ use std::collections::BTreeMap;
use base64::Engine as _;
use itertools::{EitherOrBoth, Itertools as _};
use milli::{DeCboRoaringBitmapCodec, DocumentId};
use milli::{CboRoaringBitmapCodec, DocumentId};
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
@@ -461,8 +461,7 @@ impl Serialize for TaskKeys {
{
let TaskKeys(task_keys) = self;
let mut bytes = Vec::new();
// TODO correctly handle this io::Error
DeCboRoaringBitmapCodec::serialize_into(task_keys, &mut bytes).unwrap();
CboRoaringBitmapCodec::serialize_into_vec(task_keys, &mut bytes);
let encoded = base64::prelude::BASE64_STANDARD.encode(&bytes);
serializer.serialize_str(&encoded)
}
@@ -499,7 +498,7 @@ impl<'de> serde::de::Visitor<'de> for TaskKeysVisitor {
where
E: serde::de::Error,
{
let task_keys = DeCboRoaringBitmapCodec::deserialize_from(decoded).map_err(|_err| {
let task_keys = CboRoaringBitmapCodec::deserialize_from(decoded).map_err(|_err| {
E::invalid_value(serde::de::Unexpected::Bytes(decoded), &"a cbo roaring bitmap")
})?;
Ok(TaskKeys(task_keys))

View File

@@ -300,7 +300,6 @@ impl Infos {
max_indexing_memory,
max_indexing_threads,
skip_index_budget: _,
experimental_disable_delta_encoding: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,

View File

@@ -21,7 +21,6 @@ use meilisearch::{
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use meilisearch_types::milli::heed_codec::DELTA_ENCODING_STATUS;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
@@ -96,14 +95,6 @@ async fn main() -> anyhow::Result<()> {
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
// Disables the delta encoding of bitmaps as soon as possible
if opt.indexer_options.experimental_disable_delta_encoding {
DELTA_ENCODING_STATUS.set_to_disabled()
} else {
DELTA_ENCODING_STATUS.set_to_enabled()
}
.expect("the delta-encoding status to be set only once");
std::panic::set_hook(Box::new(on_panic));
anyhow::ensure!(

View File

@@ -60,7 +60,6 @@ const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
const MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING: &str = "MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
@@ -849,14 +848,6 @@ pub struct IndexerOpts {
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
#[serde(default)]
pub experimental_no_edition_2024_for_facet_post_processing: bool,
/// Experimental disable delta-encoding for bitmaps. For more information,
/// see: <https://github.com/orgs/meilisearch/discussions/875>
///
/// Enables the experimental disable delta-encoding for bitmaps feature.
#[clap(long, env = MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING)]
#[serde(default)]
pub experimental_disable_delta_encoding: bool,
}
impl IndexerOpts {
@@ -870,7 +861,6 @@ impl IndexerOpts {
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing,
experimental_disable_delta_encoding,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
@@ -908,12 +898,6 @@ impl IndexerOpts {
experimental_no_edition_2024_for_facet_post_processing.to_string(),
);
}
if experimental_disable_delta_encoding {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING,
experimental_disable_delta_encoding.to_string(),
);
}
}
}
@@ -929,7 +913,6 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing,
experimental_disable_delta_encoding: _, // managed in try_main
} = other;
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
@@ -1309,7 +1292,7 @@ where
T: AsRef<OsStr>,
{
if let Err(VarError::NotPresent) = std::env::var(key) {
unsafe { std::env::set_var(key, value) }
std::env::set_var(key, value);
}
}

View File

@@ -1,7 +1,8 @@
use std::time::Duration;
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::milli::TimeBudget;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{SearchStep, TimeBudget};
use rand::Rng;
use reqwest::Client;
use serde::{Deserialize, Serialize};
@@ -346,9 +347,11 @@ impl PersonalizationService {
personalize: &Personalize,
query: Option<&str>,
time_budget: TimeBudget,
progress: &Progress,
) -> Result<SearchResult, ResponseError> {
match self {
Self::Cohere(cohere_service) => {
let _ = progress.update_progress_scoped(SearchStep::ApplyingPersonalization);
cohere_service
.rerank_search_results(search_result, personalize, query, time_budget)
.await

View File

@@ -30,7 +30,11 @@ use meilisearch_types::features::{
use meilisearch_types::heed::RoTxn;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::index::ChatConfig;
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget};
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{
all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget,
TotalProcessingTimeStep,
};
use meilisearch_types::{Document, Index};
use serde::Deserialize;
use serde_json::json;
@@ -262,6 +266,7 @@ async fn process_search_request(
filter: Option<String>,
) -> Result<(Index, Vec<Document>, String), ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let progress = Progress::default();
let rtxn = index.static_read_txn()?;
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
let mut query = SearchQuery {
@@ -285,7 +290,9 @@ async fn process_search_request(
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
progress.update_progress(TotalProcessingTimeStep::WaitingForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Searching);
let features = index_scheduler.features();
let index_cloned = index.clone();
let output = tokio::task::spawn_blocking(move || -> Result<_, ResponseError> {
@@ -297,8 +304,15 @@ async fn process_search_request(
None => TimeBudget::default(),
};
let (search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
let (search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search(
&index_cloned,
&rtxn,
&query,
&search_kind,
time_budget,
features,
&progress,
)?;
match search_from_kind(index_uid, search_kind, search) {
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),

View File

@@ -8,7 +8,8 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use meilisearch_types::milli;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{self, TotalProcessingTimeStep};
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::debug;
@@ -336,6 +337,10 @@ pub async fn search_with_url_query(
) -> Result<HttpResponse, ResponseError> {
let request_uid = Uuid::now_v7();
debug!(request_uid = ?request_uid, parameters = ?params, "Search get");
let progress = Progress::default();
progress.update_progress(TotalProcessingTimeStep::WaitingForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Searching);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().try_into()?;
@@ -359,9 +364,9 @@ pub async fn search_with_url_query(
// Save the query string for personalization if requested
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
let permit = search_queue.try_get_search_permit().await?;
let include_metadata = parse_include_metadata_header(&req);
let progress_clone = progress.clone();
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
SearchParams {
@@ -374,15 +379,21 @@ pub async fn search_with_url_query(
include_metadata,
},
&index,
&progress_clone,
)
})
.await;
permit.drop().await;
let search_result = search_result?;
let analytics_step =
progress.update_progress_scoped(TotalProcessingTimeStep::PublishingAnalytics);
if let Ok((search_result, _)) = search_result.as_ref() {
aggregate.succeed(search_result);
}
analytics.publish(aggregate, &req);
// early finish progress step
drop(analytics_step);
let (mut search_result, time_budget) = search_result?;
@@ -394,11 +405,12 @@ pub async fn search_with_url_query(
personalize,
personalize_query.as_deref(),
time_budget,
&progress,
)
.await?;
}
debug!(request_uid = ?request_uid, returns = ?search_result, "Search get");
debug!(request_uid = ?request_uid, returns = ?search_result, progress = ?progress.accumulated_durations(), "Search get");
Ok(HttpResponse::Ok().json(search_result))
}
@@ -470,6 +482,11 @@ pub async fn search_with_post(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let request_uid = Uuid::now_v7();
let progress = Progress::default();
progress.update_progress(TotalProcessingTimeStep::WaitingForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Searching);
let mut query = params.into_inner();
debug!(request_uid = ?request_uid, parameters = ?query, "Search post");
@@ -494,7 +511,7 @@ pub async fn search_with_post(
// Save the query string for personalization if requested
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
let permit = search_queue.try_get_search_permit().await?;
let progress_clone = progress.clone();
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
SearchParams {
@@ -507,11 +524,14 @@ pub async fn search_with_post(
include_metadata,
},
&index,
&progress_clone,
)
})
.await;
permit.drop().await;
let search_result = search_result?;
let analytics_step =
progress.update_progress_scoped(TotalProcessingTimeStep::PublishingAnalytics);
if let Ok((ref search_result, _)) = search_result {
aggregate.succeed(search_result);
if search_result.degraded {
@@ -519,6 +539,8 @@ pub async fn search_with_post(
}
}
analytics.publish(aggregate, &req);
// early finish progress step
drop(analytics_step);
let (mut search_result, time_budget) = search_result?;
@@ -530,11 +552,12 @@ pub async fn search_with_post(
personalize,
personalize_query.as_deref(),
time_budget,
&progress,
)
.await?;
}
debug!(request_uid = ?request_uid, returns = ?search_result, "Search post");
debug!(request_uid = ?request_uid, returns = ?search_result, progress = ?progress.accumulated_durations(), "Search post");
Ok(HttpResponse::Ok().json(search_result))
}

View File

@@ -8,6 +8,8 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::TotalProcessingTimeStep;
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::debug;
@@ -217,7 +219,7 @@ async fn similar(
mut query: SimilarQuery,
) -> Result<SimilarResult, ResponseError> {
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
let progress = Progress::default();
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
@@ -234,7 +236,10 @@ async fn similar(
Route::Similar,
)?;
tokio::task::spawn_blocking(move || {
let progress_clone = progress.clone();
let result = tokio::task::spawn_blocking(move || {
let _step = progress_clone.update_progress_scoped(TotalProcessingTimeStep::Searching);
perform_similar(
&index,
query,
@@ -243,9 +248,14 @@ async fn similar(
quantized,
retrieve_vectors,
index_scheduler.features(),
&progress_clone,
)
})
.await?
.await;
debug!(progress = ?progress.accumulated_durations(), "Similar");
result?
}
#[derive(Debug, deserr::Deserr, IntoParams)]

View File

@@ -6,6 +6,8 @@ use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::TotalProcessingTimeStep;
use serde::Serialize;
use tracing::debug;
use utoipa::{OpenApi, ToSchema};
@@ -153,7 +155,10 @@ pub async fn multi_search_with_post(
) -> Result<HttpResponse, ResponseError> {
// Since we don't want to process half of the search requests and then get a permit refused
// we're going to get one permit for the whole duration of the multi-search request.
let progress = Progress::default();
progress.update_progress(TotalProcessingTimeStep::WaitingForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Searching);
let request_uid = Uuid::now_v7();
let federated_search = params.into_inner();
@@ -213,6 +218,7 @@ pub async fn multi_search_with_post(
is_proxy,
request_uid,
include_metadata,
&progress,
)
.await;
permit.drop().await;
@@ -288,6 +294,7 @@ pub async fn multi_search_with_post(
.with_index(query_index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
let progress_clone = progress.clone();
let (mut search_result, time_budget) = tokio::task::spawn_blocking(move || {
perform_search(
SearchParams {
@@ -300,6 +307,7 @@ pub async fn multi_search_with_post(
include_metadata,
},
&index,
&progress_clone,
)
})
.await
@@ -314,6 +322,7 @@ pub async fn multi_search_with_post(
personalize,
personalize_query.as_deref(),
time_budget,
&progress,
)
.await
.with_index(query_index)?;
@@ -324,15 +333,19 @@ pub async fn multi_search_with_post(
result: search_result,
});
}
Ok(search_results)
}
.await;
permit.drop().await;
let analytics_step =
progress.update_progress_scoped(TotalProcessingTimeStep::PublishingAnalytics);
if search_results.is_ok() {
multi_aggregate.succeed();
}
analytics.publish(multi_aggregate, &req);
drop(analytics_step);
let search_results = search_results.map_err(|(mut err, query_index)| {
// Add the query index that failed as context for the error message.
@@ -345,6 +358,7 @@ pub async fn multi_search_with_post(
debug!(
request_uid = ?request_uid,
returns = ?search_results,
progress = ?progress.accumulated_durations(),
"Multi-search"
);

View File

@@ -11,9 +11,13 @@ use index_scheduler::{IndexScheduler, RoFeatures};
use itertools::Itertools;
use meilisearch_types::error::ResponseError;
use meilisearch_types::milli::order_by_map::OrderByMap;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
use meilisearch_types::milli::vector::Embedding;
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
use meilisearch_types::milli::{
self, DocumentId, FederatingResultsStep, OrderBy, SearchStep, TimeBudget,
DEFAULT_VALUES_PER_FACET,
};
use meilisearch_types::network::{Network, Remote};
use roaring::RoaringBitmap;
use tokio::task::JoinHandle;
@@ -43,6 +47,7 @@ pub async fn perform_federated_search(
is_proxy: bool,
request_uid: Uuid,
include_metadata: bool,
progress: &Progress,
) -> Result<FederatedSearchResult, ResponseError> {
if is_proxy {
features.check_network("Performing a remote federated search")?;
@@ -111,7 +116,7 @@ pub async fn perform_federated_search(
for (index_uid, queries) in partitioned_queries.local_queries_by_index {
// note: this is the only place we open `index_uid`
search_by_index.execute(index_uid, queries, &params)?;
search_by_index.execute(index_uid, queries, &params, progress)?;
}
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
@@ -126,6 +131,8 @@ pub async fn perform_federated_search(
facet_order,
} = search_by_index;
progress.update_progress(SearchStep::FederatingResults);
progress.update_progress(FederatingResultsStep::WaitingForRemoteResults);
let before_waiting_remote_results = std::time::Instant::now();
// 2.3. Wait for proxy search requests to complete
@@ -134,7 +141,7 @@ pub async fn perform_federated_search(
let after_waiting_remote_results = std::time::Instant::now();
// 3. merge hits and metadata across indexes and hosts
progress.update_progress(FederatingResultsStep::MergingResults);
// 3.1. Build metadata in the same order as the original queries
let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| {
// If a remote is present, set the local remote name
@@ -187,6 +194,7 @@ pub async fn perform_federated_search(
};
// 3.5. merge facets
progress.update_progress(FederatingResultsStep::MergingFacets);
let (facet_distribution, facet_stats, facets_by_index) =
facet_order.merge(federation.merge_facets, remote_results, facets);
@@ -831,6 +839,7 @@ impl SearchByIndex {
index_uid: String,
queries: Vec<QueryByIndex>,
params: &SearchByIndexParams<'_>,
progress: &Progress,
) -> Result<(), ResponseError> {
let first_query_index = queries.first().map(|query| query.query_index);
let index = match params.index_scheduler.index(&index_uid) {
@@ -957,6 +966,7 @@ impl SearchByIndex {
// clones of `TimeBudget` share the budget rather than restart it
time_budget.clone(),
params.features,
progress,
)?;
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
@@ -1044,7 +1054,7 @@ impl SearchByIndex {
hit_maker,
query_index,
}| {
let mut hit = hit_maker.make_hit(docid, &score)?;
let mut hit = hit_maker.make_hit(docid, &score, progress)?;
let weighted_score = ScoreDetails::global_score(score.iter()) * (*weight);
let mut _federation = serde_json::json!(

View File

@@ -17,11 +17,13 @@ use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use meilisearch_types::milli::index::{self, EmbeddingsWithMetadata, SearchParameters};
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder;
use meilisearch_types::milli::{
FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, TimeBudget,
FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, SearchStep,
TimeBudget,
};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
@@ -1024,11 +1026,13 @@ pub fn prepare_search<'t>(
search_kind: &SearchKind,
time_budget: TimeBudget,
features: RoFeatures,
progress: &'t Progress,
) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> {
let _step = progress.update_progress_scoped(SearchStep::PreparingSearch);
if query.media.is_some() {
features.check_multimodal("passing `media` in a search query")?;
}
let mut search = index.search(rtxn);
let mut search = index.search(rtxn, progress);
search.time_budget(time_budget);
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
search.ranking_score_threshold(ranking_score_threshold.0);
@@ -1048,6 +1052,7 @@ pub fn prepare_search<'t>(
let vector = match query.vector.clone() {
Some(vector) => vector,
None => {
let _ = progress.update_progress_scoped(SearchStep::EmbeddingQuery);
let span = tracing::trace_span!(target: "search::vector", "embed_one");
let _entered = span.enter();
@@ -1061,6 +1066,7 @@ pub fn prepare_search<'t>(
(q, media) => milli::vector::SearchQuery::Media { q, media },
};
embedder
.embed_search(search_query, Some(deadline))
.map_err(milli::vector::Error::from)
@@ -1173,6 +1179,7 @@ pub struct SearchParams {
pub fn perform_search(
params: SearchParams,
index: &Index,
progress: &Progress,
) -> Result<(SearchResult, TimeBudget), ResponseError> {
let SearchParams {
index_uid,
@@ -1191,8 +1198,15 @@ pub fn perform_search(
None => TimeBudget::default(),
};
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, &search_kind, time_budget.clone(), features)?;
let (search, is_finite_pagination, max_total_hits, offset) = prepare_search(
index,
&rtxn,
&query,
&search_kind,
time_budget.clone(),
features,
progress,
)?;
let (
milli::SearchResult {
@@ -1253,6 +1267,7 @@ pub fn perform_search(
personalize: _,
} = query;
progress.update_progress(SearchStep::FormattingResults);
let format = AttributesFormat {
attributes_to_retrieve,
retrieve_vectors,
@@ -1275,6 +1290,7 @@ pub fn perform_search(
format,
matching_words,
documents_ids.iter().copied().zip(document_scores.iter()),
progress,
)?;
let number_of_hits = min(candidates.len() as usize, max_total_hits);
@@ -1297,11 +1313,13 @@ pub fn perform_search(
let (facet_distribution, facet_stats) = facets
.map(move |facets| {
let _ = progress.update_progress_scoped(SearchStep::ComputingFacetDistribution);
compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search)
})
.transpose()?
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
.unzip();
let result = SearchResult {
hits: documents,
hits_info,
@@ -1316,6 +1334,7 @@ pub fn perform_search(
request_uid: Some(request_uid),
metadata,
};
Ok((result, time_budget))
}
@@ -1580,7 +1599,13 @@ impl<'a> HitMaker<'a> {
})
}
pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
pub fn make_hit(
&self,
id: u32,
score: &[ScoreDetails],
progress: &Progress,
) -> milli::Result<SearchHit> {
let _step = progress.update_progress_scoped(SearchStep::FormattingResults);
let (_, obkv) =
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
@@ -1669,6 +1694,7 @@ fn make_hits<'a>(
format: AttributesFormat,
matching_words: milli::MatchingWords,
documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
progress: &Progress,
) -> milli::Result<Vec<SearchHit>> {
let mut documents = Vec::new();
@@ -1686,7 +1712,7 @@ fn make_hits<'a>(
let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?;
for (id, score) in documents_ids_scores {
documents.push(hit_maker.make_hit(id, score)?);
documents.push(hit_maker.make_hit(id, score, progress)?);
}
Ok(documents)
}
@@ -1701,6 +1727,7 @@ pub fn perform_facet_search(
locales: Option<Vec<Language>>,
) -> Result<FacetSearchResult, ResponseError> {
let before_search = Instant::now();
let progress = Progress::default();
let rtxn = index.read_txn()?;
let time_budget = match index.search_cutoff(&rtxn)? {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
@@ -1729,8 +1756,15 @@ pub fn perform_facet_search(
.collect()
});
let (search, _, _, _) =
prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?;
let (search, _, _, _) = prepare_search(
index,
&rtxn,
&search_query,
&search_kind,
time_budget,
features,
&progress,
)?;
let mut facet_search = SearchForFacetValues::new(
facet_name,
search,
@@ -1762,6 +1796,7 @@ pub fn perform_similar(
quantized: bool,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
progress: &Progress,
) -> Result<SimilarResult, ResponseError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
@@ -1851,6 +1886,7 @@ pub fn perform_similar(
format,
Default::default(),
documents_ids.iter().copied().zip(document_scores.iter()),
progress,
)?;
let max_total_hits = index

View File

@@ -43,9 +43,9 @@ impl Server<Owned> {
let dir = TempDir::new().unwrap();
if cfg!(windows) {
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
std::env::set_var("TMP", TEST_TEMP_DIR.path());
} else {
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
}
let options = default_settings(dir.path());
@@ -58,9 +58,9 @@ impl Server<Owned> {
pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self {
if cfg!(windows) {
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
std::env::set_var("TMP", TEST_TEMP_DIR.path());
} else {
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
}
options.master_key = Some("MASTER_KEY".to_string());
@@ -215,9 +215,9 @@ impl Server<Shared> {
let dir = TempDir::new().unwrap();
if cfg!(windows) {
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
std::env::set_var("TMP", TEST_TEMP_DIR.path());
} else {
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
}
let options = default_settings(dir.path());
@@ -508,8 +508,6 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
experimental_no_edition_2024_for_dumps: false,
experimental_no_edition_2024_for_prefix_post_processing: false,
experimental_no_edition_2024_for_facet_post_processing: false,
// It has no effect to set the delta encoding here as the toggle is done in try_main
experimental_disable_delta_encoding: false,
},
experimental_enable_metrics: false,
..Parser::parse_from(None as Option<&str>)

View File

@@ -452,7 +452,6 @@ async fn limit_offset() {
}
#[actix_rt::test]
#[cfg(not(windows))]
async fn simple_search_hf() {
let server = Server::new_shared();
let index = index_with_documents_hf(server, &SIMPLE_SEARCH_DOCUMENTS).await;

View File

@@ -103,7 +103,7 @@ async fn swap_indexes() {
{
"uid": 1,
"batchUid": 1,
"indexUid": "a",
"indexUid": "b",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
@@ -266,7 +266,7 @@ async fn swap_indexes() {
{
"uid": 4,
"batchUid": 4,
"indexUid": "c",
"indexUid": "d",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
@@ -341,7 +341,7 @@ async fn swap_indexes() {
{
"uid": 0,
"batchUid": 0,
"indexUid": "a",
"indexUid": "b",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,

View File

@@ -274,19 +274,19 @@ async fn test_both_apis() {
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
@@ -314,19 +314,19 @@ async fn test_both_apis() {
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
@@ -354,19 +354,19 @@ async fn test_both_apis() {
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
@@ -394,19 +394,19 @@ async fn test_both_apis() {
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
@@ -420,13 +420,6 @@ async fn test_both_apis() {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 0,
"name": "kefir",
@@ -435,11 +428,11 @@ async fn test_both_apis() {
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 3,
@@ -447,6 +440,13 @@ async fn test_both_apis() {
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
@@ -460,13 +460,6 @@ async fn test_both_apis() {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 0,
"name": "kefir",
@@ -475,11 +468,11 @@ async fn test_both_apis() {
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 3,
@@ -487,6 +480,13 @@ async fn test_both_apis() {
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);

View File

@@ -10,7 +10,6 @@ license.workspace = true
[dependencies]
anyhow = "1.0.100"
bstr = "1.12.1"
clap = { version = "4.5.52", features = ["derive"] }
dump = { path = "../dump" }
file-store = { path = "../file-store" }

View File

@@ -19,7 +19,7 @@ use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{obkv_to_json, DeCboRoaringBitmapCodec, BEU32};
use meilisearch_types::milli::{obkv_to_json, BEU32};
use meilisearch_types::tasks::{Status, Task};
use meilisearch_types::versioning::{get_version, parse_version};
use meilisearch_types::Index;
@@ -140,14 +140,6 @@ enum Command {
#[arg(long, value_delimiter = ',')]
index_part: Vec<IndexPart>,
},
/// Outputs all entries of the index in a formatted way.
///
/// This command is useful for debugging purposes.
OutputFormattedEntries {
#[arg(long)]
index_name: String,
},
}
#[derive(Clone, ValueEnum)]
@@ -177,148 +169,9 @@ fn main() -> anyhow::Result<()> {
Command::HairDryer { index_name, index_part } => {
hair_dryer(db_path, &index_name, &index_part)
}
Command::OutputFormattedEntries { index_name } => {
output_formatted_entries(db_path, &index_name)
}
}
}
fn output_formatted_entries(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks");
let env = unsafe {
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let index_mapper_rtxn = env.read_txn()?;
let index_mapping: Database<Str, UuidCodec> =
try_opening_database(&env, &index_mapper_rtxn, "index-mapping")?;
for result in index_mapping.iter(&index_mapper_rtxn)? {
let (uid, uuid) = result?;
if uid != index_name {
continue;
}
let index_path = db_path.join("indexes").join(uuid.to_string());
let index = Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
.with_context(|| {
format!("While trying to open the index at path {:?}", index_path.display())
})?;
let rtxn = index.read_txn()?;
let Index {
word_docids,
exact_word_docids,
word_prefix_docids,
exact_word_prefix_docids,
word_pair_proximity_docids,
word_position_docids,
word_fid_docids,
field_id_word_count_docids,
word_prefix_position_docids,
word_prefix_fid_docids,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,
..
} = index;
struct DatabaseInfo {
name: &'static str,
database: Database<Bytes, DeCboRoaringBitmapCodec>,
}
impl DatabaseInfo {
fn new(name: &'static str, database: Database<Bytes, DeCboRoaringBitmapCodec>) -> Self {
DatabaseInfo { name, database }
}
}
let databases = [
DatabaseInfo::new("word_docids", word_docids.remap_key_type()),
DatabaseInfo::new("exact_word_docids", exact_word_docids.remap_key_type()),
DatabaseInfo::new("word_prefix_docids", word_prefix_docids.remap_key_type()),
DatabaseInfo::new(
"exact_word_prefix_docids",
exact_word_prefix_docids.remap_key_type(),
),
DatabaseInfo::new(
"word_pair_proximity_docids",
word_pair_proximity_docids.remap_key_type(),
),
DatabaseInfo::new("word_position_docids", word_position_docids.remap_key_type()),
DatabaseInfo::new("word_fid_docids", word_fid_docids.remap_key_type()),
DatabaseInfo::new(
"field_id_word_count_docids",
field_id_word_count_docids.remap_key_type(),
),
DatabaseInfo::new(
"word_prefix_position_docids",
word_prefix_position_docids.remap_key_type(),
),
DatabaseInfo::new("word_prefix_fid_docids", word_prefix_fid_docids.remap_key_type()),
DatabaseInfo::new("facet_id_exists_docids", facet_id_exists_docids.remap_key_type()),
DatabaseInfo::new("facet_id_is_null_docids", facet_id_is_null_docids.remap_key_type()),
DatabaseInfo::new(
"facet_id_is_empty_docids",
facet_id_is_empty_docids.remap_key_type(),
),
// DatabaseInfo::new("facet_id_f64_docids", facet_id_f64_docids.remap_key_type()),
// DatabaseInfo::new(
// "facet_id_string_docids",
// facet_id_string_docids.remap_key_type(),
// ),
// DatabaseInfo::new(
// "facet_id_normalized_string_strings",
// facet_id_normalized_string_strings.remap_key_type(),
// ),
// DatabaseInfo::new("facet_id_string_fst", facet_id_string_fst.remap_key_type()),
// DatabaseInfo::new(
// "field_id_docid_facet_f64s",
// field_id_docid_facet_f64s.remap_key_type(),
// ),
// DatabaseInfo::new(
// "field_id_docid_facet_strings",
// field_id_docid_facet_strings.remap_key_type(),
// ),
];
use bstr::ByteSlice as _;
let stdout = std::io::stdout();
let mut stdout_lock = BufWriter::new(stdout.lock());
for DatabaseInfo { name: db_name, database } in databases {
for result in database.iter(&rtxn)? {
let (key, bitmap) = result?;
let value: Vec<u32> = bitmap.iter().collect();
writeln!(&mut stdout_lock, "{db_name}: {} -> {:?}", key.as_bstr(), value)?;
}
}
{
let db_name = "main";
let fst = index.words_fst(&rtxn)?;
writeln!(&mut stdout_lock, "{db_name}: words-fst -> {fst:?}")?;
let prefix_fst = index.words_prefixes_fst(&rtxn)?;
writeln!(&mut stdout_lock, "{db_name}: words-prefixes-fst -> {prefix_fst:?}")?;
let documents_ids = index.documents_ids(&rtxn)?;
writeln!(&mut stdout_lock, "{db_name}: documents-ids -> {documents_ids:?}")?;
let exact_words = index.exact_words(&rtxn)?;
writeln!(&mut stdout_lock, "{db_name}: exact-words -> {exact_words:?}")?;
}
break;
}
Ok(())
}
/// Clears the task queue located at `db_path`.
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
let path = db_path.join("tasks");

View File

@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
"sync",
] }
arroy = "0.6.4-nested-rtxns"
hannoy = { version = "0.1.2-nested-rtxns", features = ["arroy"] }
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
rand = "0.8.5"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }
@@ -120,16 +120,14 @@ twox-hash = { version = "2.1.2", default-features = false, features = [
] }
geo-types = "0.7.17"
zerometry = "0.3.0"
bitpacking = "0.9.2"
[dev-dependencies]
mimalloc = { version = "0.1.48", default-features = false }
# fixed version due to format breakages in v1.40
insta = "=1.39.0"
mimalloc = { version = "0.1.48", default-features = false }
maplit = "1.0.2"
md5 = "0.8.0"
meili-snap = { path = "../meili-snap" }
quickcheck = "1.0.3"
rand = { version = "0.8.5", features = ["small_rng"] }
[features]

View File

@@ -12,7 +12,7 @@ use roaring::RoaringBitmap;
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
pub use self::ordered_f64_codec::OrderedF64Codec;
use super::StrRefCodec;
use crate::{DeCboRoaringBitmapCodec, BEU16};
use crate::{CboRoaringBitmapCodec, BEU16};
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
@@ -97,7 +97,7 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut v = vec![value.size];
DeCboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v)?;
CboRoaringBitmapCodec::serialize_into_vec(&value.bitmap, &mut v);
Ok(Cow::Owned(v))
}
}
@@ -107,7 +107,7 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let size = bytes[0];
let bitmap = DeCboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
Ok(FacetGroupValue { size, bitmap })
}
}

View File

@@ -22,10 +22,10 @@ pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
pub use self::fst_set_codec::FstSetCodec;
pub use self::obkv_codec::ObkvCodec;
pub use self::roaring_bitmap::{
DeCboRoaringBitmapCodec, RoaringBitmapCodec, DELTA_ENCODING_STATUS,
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
pub use self::roaring_bitmap_length::{
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
};
pub use self::roaring_bitmap_length::DeCboRoaringBitmapLenCodec;
pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};

View File

@@ -0,0 +1,50 @@
use std::borrow::Cow;
use std::convert::TryInto;
use std::mem::size_of;
use heed::{BoxedError, BytesDecode};
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
pub struct BoRoaringBitmapCodec;
impl BoRoaringBitmapCodec {
pub fn serialize_into(bitmap: &RoaringBitmap, out: &mut Vec<u8>) {
out.reserve(bitmap.len() as usize * size_of::<u32>());
bitmap.iter().map(u32::to_ne_bytes).for_each(|bytes| out.extend_from_slice(&bytes));
}
}
impl BytesDecode<'_> for BoRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
let mut bitmap = RoaringBitmap::new();
for chunk in bytes.chunks(size_of::<u32>()) {
let bytes = chunk.try_into()?;
bitmap.push(u32::from_ne_bytes(bytes));
}
Ok(bitmap)
}
}
impl BytesDecodeOwned for BoRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes)
}
}
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
let mut out = Vec::new();
BoRoaringBitmapCodec::serialize_into(item, &mut out);
Ok(Cow::Owned(out))
}
}

View File

@@ -7,6 +7,7 @@ use heed::BoxedError;
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
/// This is the limit where using a byteorder became less size efficient
/// than using a direct roaring encoding, it is also the point where we are able
@@ -18,19 +19,8 @@ pub const THRESHOLD: usize = 7;
pub struct CboRoaringBitmapCodec;
impl CboRoaringBitmapCodec {
/// If the number of items (u32s) to encode is less than or equal to the threshold
/// it means that it would weigh the same or less than the RoaringBitmap
/// header, so we directly encode them using ByteOrder instead.
pub fn bitmap_serialize_as_raw_u32s(roaring: &RoaringBitmap) -> bool {
roaring.len() <= THRESHOLD as u64
}
pub fn bytes_deserialize_as_raw_u32s(bytes: &[u8]) -> bool {
bytes.len() <= THRESHOLD * size_of::<u32>()
}
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
if Self::bitmap_serialize_as_raw_u32s(roaring) {
if roaring.len() <= THRESHOLD as u64 {
roaring.len() as usize * size_of::<u32>()
} else {
roaring.serialized_size()
@@ -45,7 +35,10 @@ impl CboRoaringBitmapCodec {
roaring: &RoaringBitmap,
mut writer: W,
) -> io::Result<()> {
if Self::bitmap_serialize_as_raw_u32s(roaring) {
if roaring.len() <= THRESHOLD as u64 {
// If the number of items (u32s) to encode is less than or equal to the threshold
// it means that it would weigh the same or less than the RoaringBitmap
// header, so we directly encode them using ByteOrder instead.
for integer in roaring {
writer.write_u32::<NativeEndian>(integer)?;
}
@@ -58,7 +51,7 @@ impl CboRoaringBitmapCodec {
}
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
if Self::bytes_deserialize_as_raw_u32s(bytes) {
if bytes.len() <= THRESHOLD * size_of::<u32>() {
// If there is threshold or less than threshold integers that can fit into this array
// of bytes it means that we used the ByteOrder codec serializer.
let mut bitmap = RoaringBitmap::new();
@@ -78,7 +71,7 @@ impl CboRoaringBitmapCodec {
other: &RoaringBitmap,
) -> io::Result<RoaringBitmap> {
// See above `deserialize_from` method for implementation details.
if Self::bytes_deserialize_as_raw_u32s(bytes) {
if bytes.len() <= THRESHOLD * size_of::<u32>() {
let mut bitmap = RoaringBitmap::new();
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
if other.contains(integer) {
@@ -90,6 +83,78 @@ impl CboRoaringBitmapCodec {
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
}
}
/// Merge serialized CboRoaringBitmaps in a buffer.
///
/// if the merged values length is under the threshold, values are directly
/// serialized in the buffer else a RoaringBitmap is created from the
/// values and is serialized in the buffer.
pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
where
I: IntoIterator<Item = A>,
A: AsRef<[u8]>,
{
let mut roaring = RoaringBitmap::new();
let mut vec = Vec::new();
for bytes in slices {
if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
let mut reader = bytes.as_ref();
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
vec.push(integer);
}
} else {
roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?;
}
}
if roaring.is_empty() {
vec.sort_unstable();
vec.dedup();
if vec.len() <= THRESHOLD {
for integer in vec {
buffer.extend_from_slice(&integer.to_ne_bytes());
}
} else {
// We can unwrap safely because the vector is sorted upper.
let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
roaring.serialize_into(buffer)?;
}
} else {
roaring.extend(vec);
roaring.serialize_into(buffer)?;
}
Ok(())
}
/// Merges a DelAdd delta into a CboRoaringBitmap.
pub fn merge_deladd_into<'a>(
deladd: &KvReaderDelAdd,
previous: &[u8],
buffer: &'a mut Vec<u8>,
) -> io::Result<Option<&'a [u8]>> {
// Deserialize the bitmap that is already there
let mut previous = Self::deserialize_from(previous)?;
// Remove integers we no more want in the previous bitmap
if let Some(value) = deladd.get(DelAdd::Deletion) {
previous -= Self::deserialize_from(value)?;
}
// Insert the new integers we want in the previous bitmap
if let Some(value) = deladd.get(DelAdd::Addition) {
previous |= Self::deserialize_from(value)?;
}
if previous.is_empty() {
return Ok(None);
}
Self::serialize_into_vec(&previous, buffer);
Ok(Some(&buffer[..]))
}
}
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
@@ -117,3 +182,75 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
Ok(Cow::Owned(vec))
}
}
#[cfg(test)]
mod tests {
use std::iter::FromIterator;
use heed::{BytesDecode, BytesEncode};
use super::*;
#[test]
fn verify_encoding_decoding() {
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
let bytes = CboRoaringBitmapCodec::bytes_encode(&input).unwrap();
let output = CboRoaringBitmapCodec::bytes_decode(&bytes).unwrap();
assert_eq!(input, output);
}
#[test]
fn verify_threshold() {
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
// use roaring bitmap
let mut bytes = Vec::new();
input.serialize_into(&mut bytes).unwrap();
let roaring_size = bytes.len();
// use byteorder directly
let mut bytes = Vec::new();
for integer in input {
bytes.write_u32::<NativeEndian>(integer).unwrap();
}
let bo_size = bytes.len();
assert!(roaring_size > bo_size);
}
#[test]
fn merge_cbo_roaring_bitmaps() {
let mut buffer = Vec::new();
let small_data = [
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
RoaringBitmap::from_sorted_iter(4..6).unwrap(),
RoaringBitmap::from_sorted_iter(1..3).unwrap(),
];
let small_data: Vec<_> =
small_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
CboRoaringBitmapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
assert_eq!(bitmap, expected);
let medium_data = [
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
RoaringBitmap::from_sorted_iter(4..8).unwrap(),
RoaringBitmap::from_sorted_iter(0..3).unwrap(),
RoaringBitmap::from_sorted_iter(7..23).unwrap(),
];
let medium_data: Vec<_> =
medium_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
buffer.clear();
CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
assert_eq!(bitmap, expected);
}
}

View File

@@ -1,374 +0,0 @@
use std::borrow::Cow;
use std::io::{self, Cursor, ErrorKind};
use std::sync::OnceLock;
use byteorder::{NativeEndian, ReadBytesExt as _};
use heed::BoxedError;
use roaring::RoaringBitmap;
use super::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
use super::de_roaring_bitmap_codec::DeRoaringBitmapCodec;
use crate::heed_codec::roaring_bitmap::take_all_blocks;
use crate::heed_codec::BytesDecodeOwned;
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
/// Defines the status of the delta encoding on whether we have enabled it or not.
pub static DELTA_ENCODING_STATUS: DeltaEncodingStatusLock = DeltaEncodingStatusLock::new();
pub struct DeCboRoaringBitmapCodec;
impl DeCboRoaringBitmapCodec {
pub fn serialized_size_with_tmp_buffer(
bitmap: &RoaringBitmap,
tmp_buffer: &mut Vec<u32>,
) -> usize {
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|| DELTA_ENCODING_STATUS.is_disabled()
{
CboRoaringBitmapCodec::serialized_size(bitmap)
} else {
DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, tmp_buffer)
}
}
/// Writes the delta-encoded compressed version of
/// the given roaring bitmap into the provided writer.
pub fn serialize_into<W: io::Write>(bitmap: &RoaringBitmap, writer: &mut W) -> io::Result<()> {
let mut tmp_buffer = Vec::new();
Self::serialize_into_with_tmp_buffer(bitmap, writer, &mut tmp_buffer)
}
/// Same as [Self::serialize_into] but accepts a buffer to avoid allocating one.
///
/// Note that we always serialize the bitmap with the delta-encoded compressed version.
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
bitmap: &RoaringBitmap,
writer: &mut W,
tmp_buffer: &mut Vec<u32>,
) -> io::Result<()> {
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|| DELTA_ENCODING_STATUS.is_disabled()
{
CboRoaringBitmapCodec::serialize_into_writer(bitmap, writer)
} else {
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(bitmap, writer, tmp_buffer)
}
}
/// Returns the delta-decoded roaring bitmap from the compressed bytes.
pub fn deserialize_from(compressed: &[u8]) -> io::Result<RoaringBitmap> {
let mut tmp_buffer = Vec::new();
Self::deserialize_from_with_tmp_buffer(compressed, &mut tmp_buffer)
}
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
///
/// It tries to decode the input by using the delta-decoded version and
/// if it fails, falls back to the CboRoaringBitmap version.
pub fn deserialize_from_with_tmp_buffer(
input: &[u8],
tmp_buffer: &mut Vec<u32>,
) -> io::Result<RoaringBitmap> {
// The input is too short to be a valid delta-decoded bitmap.
// We fall back to the CboRoaringBitmap version with raw u32s.
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(input) {
return CboRoaringBitmapCodec::deserialize_from(input);
}
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
input,
take_all_blocks,
tmp_buffer,
) {
Ok(bitmap) => Ok(bitmap),
// If the error kind is Other it means that the delta-decoder found
// an invalid magic header. We fall back to the CboRoaringBitmap version.
Err(e) if e.kind() == ErrorKind::Other => {
CboRoaringBitmapCodec::deserialize_from(input)
}
Err(e) => Err(e),
}
}
/// Merge serialized DeCboRoaringBitmaps in a buffer.
///
/// If the merged values length is under the threshold, values are directly
/// serialized in the buffer else a delta-encoded list of integers is created
/// from the values and is serialized in the buffer.
pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
where
I: IntoIterator<Item = A>,
A: AsRef<[u8]>,
{
let mut roaring = RoaringBitmap::new();
let mut vec = Vec::new();
let mut tmp_buffer = Vec::new();
for bytes in slices {
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes.as_ref()) {
let mut reader = bytes.as_ref();
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
vec.push(integer);
}
} else {
roaring |= DeCboRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
bytes.as_ref(),
&mut tmp_buffer,
)?;
}
}
roaring.extend(vec);
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&roaring, buffer, &mut tmp_buffer)?;
Ok(())
}
/// Do an intersection directly with a serialized delta-encoded bitmap.
///
/// When doing the intersection we only need to deserialize the necessary
/// bitmap containers and avoid a lot of unnecessary allocations. We do
/// that by skipping entire delta-encoded blocks when possible to avoid
/// storing them in the bitmap we use for the final intersection.
pub fn intersection_with_serialized(
bytes: &[u8],
other: &RoaringBitmap,
) -> io::Result<RoaringBitmap> {
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes) {
return CboRoaringBitmapCodec::intersection_with_serialized(bytes, other);
}
// TODO move this tmp buffer outside
let mut tmp_buffer = Vec::new();
let filter_block = |first, last| other.range_cardinality(first..=last) == 0;
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
bytes,
filter_block,
&mut tmp_buffer,
) {
Ok(bitmap) => Ok(bitmap & other),
// If the error kind is Other it means that the delta-decoder found
// an invalid magic header. We fall back to the CboRoaringBitmap version.
Err(e) if e.kind() == ErrorKind::Other => {
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
}
Err(e) => Err(e),
}
}
pub fn merge_deladd_into<'a>(
deladd: &KvReaderDelAdd,
previous: &[u8],
buffer: &'a mut Vec<u8>,
tmp_buffer: &mut Vec<u32>,
) -> io::Result<Option<&'a [u8]>> {
// Deserialize the bitmap that is already there
let mut previous = Self::deserialize_from_with_tmp_buffer(previous, tmp_buffer)?;
// Remove integers we no more want in the previous bitmap
if let Some(value) = deladd.get(DelAdd::Deletion) {
previous -= Self::deserialize_from_with_tmp_buffer(value, tmp_buffer)?;
}
// Insert the new integers we want in the previous bitmap
if let Some(value) = deladd.get(DelAdd::Addition) {
previous |= Self::deserialize_from_with_tmp_buffer(value, tmp_buffer)?;
}
if previous.is_empty() {
return Ok(None);
}
Self::serialize_into_with_tmp_buffer(&previous, buffer, tmp_buffer)?;
Ok(Some(&buffer[..]))
}
}
impl heed::BytesDecode<'_> for DeCboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).map_err(Into::into)
}
}
impl BytesDecodeOwned for DeCboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).map_err(Into::into)
}
}
impl heed::BytesEncode<'_> for DeCboRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
let mut tmp_buffer = Vec::new();
let capacity = Self::serialized_size_with_tmp_buffer(item, &mut tmp_buffer);
let mut output = Vec::with_capacity(capacity);
Self::serialize_into_with_tmp_buffer(item, &mut output, &mut tmp_buffer)?;
Ok(Cow::Owned(output))
}
}
/// Manages the global status of the delta encoding.
///
/// Whether we must use delta encoding or not when encoding roaring bitmaps.
#[derive(Default)]
pub struct DeltaEncodingStatusLock(OnceLock<DeltaEncodingStatus>);
impl DeltaEncodingStatusLock {
pub const fn new() -> Self {
Self(OnceLock::new())
}
}
#[derive(Default)]
enum DeltaEncodingStatus {
Enabled,
#[default]
Disabled,
}
impl DeltaEncodingStatusLock {
pub fn set_to_enabled(&self) -> Result<(), ()> {
self.0.set(DeltaEncodingStatus::Enabled).map_err(drop)
}
pub fn set_to_disabled(&self) -> Result<(), ()> {
self.0.set(DeltaEncodingStatus::Disabled).map_err(drop)
}
pub fn is_enabled(&self) -> bool {
matches!(self.0.get(), Some(DeltaEncodingStatus::Enabled))
}
pub fn is_disabled(&self) -> bool {
!self.is_enabled()
}
}
#[cfg(test)]
mod tests {
use std::iter::FromIterator;
use byteorder::WriteBytesExt as _;
use heed::{BytesDecode, BytesEncode};
use quickcheck::quickcheck;
use roaring::RoaringBitmap;
use super::super::super::roaring_bitmap_length::DeCboRoaringBitmapLenCodec;
use super::super::THRESHOLD;
use super::*;
#[test]
fn verify_encoding_decoding() {
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
let bytes = DeCboRoaringBitmapCodec::bytes_encode(&input).unwrap();
let output = DeCboRoaringBitmapCodec::bytes_decode(&bytes).unwrap();
assert_eq!(input, output);
}
#[test]
fn verify_threshold() {
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
// use roaring bitmap
let mut bytes = Vec::new();
input.serialize_into(&mut bytes).unwrap();
let roaring_size = bytes.len();
// use byteorder directly
let mut bytes = Vec::new();
for integer in input {
bytes.write_u32::<NativeEndian>(integer).unwrap();
}
let bo_size = bytes.len();
assert!(roaring_size > bo_size);
}
#[test]
fn merge_de_cbo_roaring_bitmaps() {
let mut buffer = Vec::new();
let small_data = [
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
RoaringBitmap::from_sorted_iter(4..6).unwrap(),
RoaringBitmap::from_sorted_iter(1..3).unwrap(),
];
let small_data: Vec<_> =
small_data.iter().map(|b| DeCboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
DeCboRoaringBitmapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
let bitmap = DeCboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
assert_eq!(bitmap, expected);
let medium_data = [
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
RoaringBitmap::from_sorted_iter(4..8).unwrap(),
RoaringBitmap::from_sorted_iter(0..3).unwrap(),
RoaringBitmap::from_sorted_iter(7..23).unwrap(),
];
let medium_data: Vec<_> =
medium_data.iter().map(|b| DeCboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
buffer.clear();
DeCboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
let bitmap = DeCboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
assert_eq!(bitmap, expected);
}
quickcheck! {
fn qc_random(xs: Vec<u32>) -> bool {
let bitmap = RoaringBitmap::from_iter(xs);
let mut compressed = Vec::new();
let mut tmp_buffer = Vec::new();
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
let length = DeCboRoaringBitmapLenCodec::bytes_decode(&compressed[..]).unwrap();
let decompressed = DeCboRoaringBitmapCodec::deserialize_from_with_tmp_buffer(&compressed[..], &mut tmp_buffer).unwrap();
length == bitmap.len() && decompressed == bitmap
}
}
quickcheck! {
fn qc_random_check_serialized_size(xs: Vec<u32>) -> bool {
let bitmap = RoaringBitmap::from_iter(xs);
let mut compressed = Vec::new();
let mut tmp_buffer = Vec::new();
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
let length = DeCboRoaringBitmapLenCodec::bytes_decode(&compressed).unwrap();
let expected_len = DeCboRoaringBitmapCodec::serialized_size_with_tmp_buffer(&bitmap, &mut tmp_buffer);
length == bitmap.len() && compressed.len() == expected_len
}
}
quickcheck! {
fn qc_random_intersection_with_serialized(lhs: Vec<u32>, rhs: Vec<u32>) -> bool {
let mut compressed = Vec::new();
let mut tmp_buffer = Vec::new();
let lhs = RoaringBitmap::from_iter(lhs);
let rhs = RoaringBitmap::from_iter(rhs);
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&lhs, &mut compressed, &mut tmp_buffer).unwrap();
let intersection = DeCboRoaringBitmapCodec::intersection_with_serialized(&compressed, &rhs).unwrap();
let expected_intersection = lhs & rhs;
intersection == expected_intersection
}
}
}

View File

@@ -1,474 +0,0 @@
use std::io::{self, ErrorKind};
use std::mem::{self, size_of, size_of_val};
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
use roaring::RoaringBitmap;
/// The magic header for our custom encoding format
const MAGIC_HEADER: u16 = 36869;
pub struct DeRoaringBitmapCodec;
// TODO reintroduce:
// - serialized_size?
// - serialize_into_vec
// - intersection_with_serialized
// - merge_into
// - merge_deladd_into
impl DeRoaringBitmapCodec {
/// Returns the serialized size of the given roaring bitmap with the delta encoding format.
pub fn serialized_size_with_tmp_buffer(
bitmap: &RoaringBitmap,
tmp_buffer: &mut Vec<u32>,
) -> usize {
let mut size = 2; // u16 magic header
let bitpacker8x = BitPacker8x::new();
let bitpacker4x = BitPacker4x::new();
let bitpacker1x = BitPacker1x::new();
// This temporary buffer is used to store each chunk of decompressed u32s.
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
let decompressed = &mut tmp_buffer[..];
let mut buffer_index = 0;
let mut initial = None;
// We initially collect all the integers into a flat buffer of the size
// of the largest bitpacker. We encode them with it until we don't have
// enough of them...
for n in bitmap {
decompressed[buffer_index] = n;
buffer_index += 1;
if buffer_index == BitPacker8x::BLOCK_LEN {
let num_bits = bitpacker8x.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = BitPacker8x::compressed_block_size(num_bits);
size += 1; // u8 chunk header
size += compressed_len; // compressed data length
initial = Some(n);
buffer_index = 0;
}
}
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
let decompressed = &decompressed[..buffer_index];
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let num_bits = bitpacker4x.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = BitPacker4x::compressed_block_size(num_bits);
size += 1; // u8 chunk header
size += compressed_len; // compressed data length
initial = decompressed.iter().last().copied();
}
// ...And so on...
let decompressed = chunks.remainder();
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let num_bits = bitpacker1x.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = BitPacker1x::compressed_block_size(num_bits);
size += 1; // u8 chunk header
size += compressed_len; // compressed data length
initial = decompressed.iter().last().copied();
}
// ...Until we don't have any small enough bitpacker. We put them raw
// at the end of out buffer with a header indicating the matter.
let decompressed = chunks.remainder();
if !decompressed.is_empty() {
size += 1; // u8 chunk header
size += mem::size_of_val(decompressed); // remaining uncompressed u32s
}
size
}
/// Writes the delta-encoded compressed version of the given roaring bitmap
/// into the provided writer. Accepts a buffer to avoid allocating one.
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
bitmap: &RoaringBitmap,
mut writer: W,
tmp_buffer: &mut Vec<u32>,
) -> io::Result<()> {
// Insert the magic header
writer.write_all(&MAGIC_HEADER.to_ne_bytes())?;
let bitpacker8x = BitPacker8x::new();
let bitpacker4x = BitPacker4x::new();
let bitpacker1x = BitPacker1x::new();
// This temporary buffer is used to store each chunk of decompressed and
// compressed and delta-encoded u32s. We need room for the decompressed
// u32s coming from the roaring bitmap, the compressed output that can
// be as large as the decompressed u32s, and the chunk header.
tmp_buffer.resize((BitPacker8x::BLOCK_LEN * 2) + 1, 0u32);
let (decompressed, compressed) = tmp_buffer.split_at_mut(BitPacker8x::BLOCK_LEN);
let compressed = bytemuck::cast_slice_mut(compressed);
let mut buffer_index = 0;
let mut initial = None;
// We initially collect all the integers into a flat buffer of the size
// of the largest bitpacker. We encode them with it until we don't have
// enough of them...
for n in bitmap {
decompressed[buffer_index] = n;
buffer_index += 1;
if buffer_index == BitPacker8x::BLOCK_LEN {
let output = encode_with_packer(&bitpacker8x, decompressed, initial, compressed);
writer.write_all(output)?;
initial = Some(n);
buffer_index = 0;
}
}
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
let decompressed = &decompressed[..buffer_index];
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let output = encode_with_packer(&bitpacker4x, decompressed, initial, compressed);
writer.write_all(output)?;
initial = decompressed.iter().last().copied();
}
// ...And so on...
let decompressed = chunks.remainder();
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let output = encode_with_packer(&bitpacker1x, decompressed, initial, compressed);
writer.write_all(output)?;
initial = decompressed.iter().last().copied();
}
// ...Until we don't have any small enough bitpacker. We put them raw
// at the end of out buffer with a header indicating the matter.
let decompressed = chunks.remainder();
if !decompressed.is_empty() {
let header = encode_chunk_header(BitPackerLevel::None, u32::BITS as u8);
// Note: Not convinced about the performance of writing a single
// byte followed by a larger write. However, we will use this
// codec with a BufWriter or directly with a Vec of bytes.
writer.write_all(&[header])?;
writer.write_all(bytemuck::cast_slice(decompressed))?;
}
Ok(())
}
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
///
/// The `filter_block` function is used to filter out blocks. It takes the first
/// and last u32 values of a block and returns `true` if the block must be kept.
pub fn deserialize_from_with_tmp_buffer<F>(
input: &[u8],
filter_block: F,
tmp_buffer: &mut Vec<u32>,
) -> io::Result<RoaringBitmap>
where
F: Fn(u32, u32) -> bool,
{
let Some((header, mut compressed)) = input.split_at_checked(size_of_val(&MAGIC_HEADER))
else {
return Err(io::Error::new(ErrorKind::UnexpectedEof, "expecting a two-bytes header"));
};
// Safety: This unwrap cannot happen as the header buffer is the right size
let header = u16::from_ne_bytes(header.try_into().unwrap());
if header != MAGIC_HEADER {
return Err(io::Error::other("invalid header value"));
}
let bitpacker8x = BitPacker8x::new();
let bitpacker4x = BitPacker4x::new();
let bitpacker1x = BitPacker1x::new();
let mut bitmap = RoaringBitmap::new();
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
let decompressed = &mut tmp_buffer[..];
let mut initial = None;
while let Some((&chunk_header, encoded)) = compressed.split_first() {
let (level, num_bits) = decode_chunk_header(chunk_header);
let (bytes_read, decompressed) = match level {
BitPackerLevel::None => {
if num_bits != u32::BITS as u8 {
return Err(io::Error::new(
ErrorKind::InvalidData,
"invalid number of bits to encode non-compressed u32s",
));
}
let chunks = encoded.chunks_exact(size_of::<u32>());
if !chunks.remainder().is_empty() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"expecting last chunk to be a multiple of the size of an u32",
));
}
let integers = chunks
// safety: This unwrap cannot happen as
// the size of u32 is set correctly.
.map(|b| b.try_into().unwrap())
.map(u32::from_ne_bytes);
if let Some((first, last)) =
integers.clone().next().zip(integers.clone().next_back())
{
if !(filter_block)(first, last) {
bitmap
.append(integers)
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?;
}
}
// This is basically always the last chunk that exists in
// this delta-encoded format as the raw u32s are appended
// when there is not enough of them to fit in a bitpacker.
break;
}
BitPackerLevel::BitPacker1x => {
decode_with_packer(&bitpacker1x, decompressed, initial, encoded, num_bits)
}
BitPackerLevel::BitPacker4x => {
decode_with_packer(&bitpacker4x, decompressed, initial, encoded, num_bits)
}
BitPackerLevel::BitPacker8x => {
decode_with_packer(&bitpacker8x, decompressed, initial, encoded, num_bits)
}
};
initial = decompressed.iter().last().copied();
if let Some((first, last)) = decompressed.first().copied().zip(initial) {
if !(filter_block)(first, last) {
// TODO investigate perf
// Safety: Bitpackers cannot output unsorter integers when
// used with the compress_strictly_sorted function.
bitmap.append(decompressed.iter().copied()).unwrap();
}
}
// What the delta-decoding read plus the chunk header size
compressed = &compressed[bytes_read + 1..];
}
Ok(bitmap)
}
/// Returns the length of the serialized DeRoaringBitmap.
pub fn deserialize_length_from(input: &[u8]) -> io::Result<u64> {
let Some((header, mut compressed)) = input.split_at_checked(size_of_val(&MAGIC_HEADER))
else {
return Err(io::Error::new(ErrorKind::UnexpectedEof, "expecting a two-bytes header"));
};
// Safety: This unwrap cannot happen as the header buffer is the right size
let header = u16::from_ne_bytes(header.try_into().unwrap());
if header != MAGIC_HEADER {
return Err(io::Error::other("invalid header value"));
}
let mut length = 0;
while let Some((&chunk_header, encoded)) = compressed.split_first() {
let (level, num_bits) = decode_chunk_header(chunk_header);
let bytes_read = match level {
BitPackerLevel::None => {
if num_bits != u32::BITS as u8 {
return Err(io::Error::new(
ErrorKind::InvalidData,
"invalid number of bits to encode non-compressed u32s",
));
}
let chunks = encoded.chunks_exact(size_of::<u32>());
if !chunks.remainder().is_empty() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"expecting last chunk to be a multiple of the size of an u32",
));
}
// This call is optimized for performance
// and will not iterate over the chunks.
length += chunks.count() as u64;
// This is basically always the last chunk that exists in
// this delta-encoded format as the raw u32s are appended
// when there is not enough of them to fit in a bitpacker.
break;
}
BitPackerLevel::BitPacker1x => {
length += BitPacker1x::BLOCK_LEN as u64;
BitPacker1x::compressed_block_size(num_bits)
}
BitPackerLevel::BitPacker4x => {
length += BitPacker4x::BLOCK_LEN as u64;
BitPacker4x::compressed_block_size(num_bits)
}
BitPackerLevel::BitPacker8x => {
length += BitPacker8x::BLOCK_LEN as u64;
BitPacker8x::compressed_block_size(num_bits)
}
};
// What the delta-decoding read plus the chunk header size
compressed = &compressed[bytes_read + 1..];
}
Ok(length)
}
}
/// A utility function to take all blocks.
pub fn take_all_blocks(_first: u32, _last: u32) -> bool {
false
}
/// Takes a strickly sorted list of u32s and outputs delta-encoded
/// bytes with a chunk header. We expect the output buffer to be
/// at least BLOCK_LEN + 1.
fn encode_with_packer<'c, B: BitPackerExt>(
bitpacker: &B,
decompressed: &[u32],
initial: Option<u32>,
output: &'c mut [u8],
) -> &'c [u8] {
let num_bits = bitpacker.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = B::compressed_block_size(num_bits);
let chunk_header = encode_chunk_header(B::level(), num_bits);
let buffer = &mut output[..compressed_len + 1];
// Safety: The buffer is at least one byte
let (header_in_buffer, encoded) = buffer.split_first_mut().unwrap();
*header_in_buffer = chunk_header;
bitpacker.compress_strictly_sorted(initial, decompressed, encoded, num_bits);
buffer
}
/// Returns the number of bytes read and the decoded unsigned integers.
fn decode_with_packer<'d, B: BitPacker>(
bitpacker: &B,
decompressed: &'d mut [u32],
initial: Option<u32>,
compressed: &[u8],
num_bits: u8,
) -> (usize, &'d [u32]) {
let decompressed = &mut decompressed[..B::BLOCK_LEN];
let read = bitpacker.decompress_strictly_sorted(initial, compressed, decompressed, num_bits);
(read, decompressed)
}
/// An identifier for the bitpacker to be able
/// to correctly decode the compressed integers.
#[derive(Debug, PartialEq, Eq)]
#[repr(u8)]
enum BitPackerLevel {
/// The remaining bytes are raw little endian encoded u32s.
None,
/// The remaining bits are encoded using a `BitPacker1x`.
BitPacker1x,
/// The remaining bits are encoded using a `BitPacker4x`.
BitPacker4x,
/// The remaining bits are encoded using a `BitPacker8x`.
BitPacker8x,
}
/// Returns the chunk header based on the bitpacker level
/// and the number of bits to encode the list of integers.
fn encode_chunk_header(level: BitPackerLevel, num_bits: u8) -> u8 {
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
let level = level as u8;
debug_assert!(level <= 3);
num_bits | (level << 6)
}
/// Decodes the chunk header and output the bitpacker level
/// and the number of bits to decode the following bytes.
fn decode_chunk_header(data: u8) -> (BitPackerLevel, u8) {
let num_bits = data & 0b00111111;
let level = match data >> 6 {
0 => BitPackerLevel::None,
1 => BitPackerLevel::BitPacker1x,
2 => BitPackerLevel::BitPacker4x,
3 => BitPackerLevel::BitPacker8x,
invalid => panic!("Invalid bitpacker level: {invalid}"),
};
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
(level, num_bits)
}
/// A simple helper trait to get the BitPackerLevel
/// and correctly generate the chunk header.
trait BitPackerExt: BitPacker {
/// Returns the level of the bitpacker: an identifier to be
/// able to decode the numbers with the right bitpacker.
fn level() -> BitPackerLevel;
}
impl BitPackerExt for BitPacker8x {
fn level() -> BitPackerLevel {
BitPackerLevel::BitPacker8x
}
}
impl BitPackerExt for BitPacker4x {
fn level() -> BitPackerLevel {
BitPackerLevel::BitPacker4x
}
}
impl BitPackerExt for BitPacker1x {
fn level() -> BitPackerLevel {
BitPackerLevel::BitPacker1x
}
}
#[cfg(test)]
mod tests {
use quickcheck::quickcheck;
use roaring::RoaringBitmap;
use super::{take_all_blocks, DeRoaringBitmapCodec};
quickcheck! {
fn qc_random(xs: Vec<u32>) -> bool {
let bitmap = RoaringBitmap::from_iter(xs);
let mut compressed = Vec::new();
let mut tmp_buffer = Vec::new();
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
let length = DeRoaringBitmapCodec::deserialize_length_from(&compressed[..]).unwrap();
let decompressed = DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(&compressed[..], take_all_blocks, &mut tmp_buffer).unwrap();
length == bitmap.len() && decompressed == bitmap
}
}
quickcheck! {
fn qc_random_check_serialized_size(xs: Vec<u32>) -> bool {
let bitmap = RoaringBitmap::from_iter(xs);
let mut compressed = Vec::new();
let mut tmp_buffer = Vec::new();
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
let length = DeRoaringBitmapCodec::deserialize_length_from(&compressed).unwrap();
let expected_len = DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(&bitmap, &mut tmp_buffer);
length == bitmap.len() && compressed.len() == expected_len
}
}
quickcheck! {
fn qc_random_intersection_with_serialized(lhs: Vec<u32>, rhs: Vec<u32>) -> bool {
let mut compressed = Vec::new();
let mut tmp_buffer = Vec::new();
let lhs = RoaringBitmap::from_iter(lhs);
let rhs = RoaringBitmap::from_iter(rhs);
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(&lhs, &mut compressed, &mut tmp_buffer).unwrap();
let sub_lhs = DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(&compressed, |first, last| {
rhs.range_cardinality(first..=last) == 0
}, &mut tmp_buffer).unwrap();
let intersection = sub_lhs & rhs.clone();
let expected_intersection = lhs & rhs;
intersection == expected_intersection
}
}
}

View File

@@ -1,9 +1,7 @@
pub(super) mod cbo_roaring_bitmap_codec;
mod de_cbo_roaring_bitmap_codec;
pub(super) mod de_roaring_bitmap_codec;
mod bo_roaring_bitmap_codec;
pub mod cbo_roaring_bitmap_codec;
mod roaring_bitmap_codec;
pub use self::cbo_roaring_bitmap_codec::THRESHOLD;
pub use self::de_cbo_roaring_bitmap_codec::{DeCboRoaringBitmapCodec, DELTA_ENCODING_STATUS};
pub use self::de_roaring_bitmap_codec::take_all_blocks;
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;

View File

@@ -3,7 +3,7 @@ use std::mem;
use heed::{BoxedError, BytesDecode};
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
use crate::heed_codec::roaring_bitmap::THRESHOLD;
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
use crate::heed_codec::BytesDecodeOwned;
pub struct CboRoaringBitmapLenCodec;

View File

@@ -1,42 +0,0 @@
use std::io::ErrorKind;
use heed::{BoxedError, BytesDecode};
use super::BoRoaringBitmapLenCodec;
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
use crate::heed_codec::roaring_bitmap::de_roaring_bitmap_codec::DeRoaringBitmapCodec;
use crate::heed_codec::roaring_bitmap_length::CboRoaringBitmapLenCodec;
use crate::heed_codec::BytesDecodeOwned;
pub struct DeCboRoaringBitmapLenCodec;
impl BytesDecode<'_> for DeCboRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes) {
// If there is threshold or less than threshold integers that can fit
// into this array of bytes it means that we used the ByteOrder codec
// serializer.
BoRoaringBitmapLenCodec::bytes_decode(bytes)
} else {
match DeRoaringBitmapCodec::deserialize_length_from(bytes) {
Ok(bitmap) => Ok(bitmap),
// If the error kind is Other it means that the delta-decoder found
// an invalid magic header. We fall back to the CboRoaringBitmap version.
Err(e) if e.kind() == ErrorKind::Other => {
CboRoaringBitmapLenCodec::bytes_decode(bytes)
}
Err(e) => Err(e.into()),
}
}
}
}
impl BytesDecodeOwned for DeCboRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes)
}
}

View File

@@ -1,9 +1,7 @@
mod bo_roaring_bitmap_len_codec;
mod cbo_roaring_bitmap_len_codec;
mod de_cbo_roaring_bitmap_len_codec;
mod roaring_bitmap_len_codec;
use self::bo_roaring_bitmap_len_codec::BoRoaringBitmapLenCodec;
use self::cbo_roaring_bitmap_len_codec::CboRoaringBitmapLenCodec;
pub use self::de_cbo_roaring_bitmap_len_codec::DeCboRoaringBitmapLenCodec;
use self::roaring_bitmap_len_codec::RoaringBitmapLenCodec;
pub use self::bo_roaring_bitmap_len_codec::BoRoaringBitmapLenCodec;
pub use self::cbo_roaring_bitmap_len_codec::CboRoaringBitmapLenCodec;
pub use self::roaring_bitmap_len_codec::RoaringBitmapLenCodec;

View File

@@ -72,15 +72,16 @@ impl BytesDecodeOwned for RoaringBitmapLenCodec {
#[cfg(test)]
mod tests {
use heed::BytesEncode;
use roaring::RoaringBitmap;
use super::*;
use crate::heed_codec::RoaringBitmapCodec;
#[test]
fn deserialize_roaring_bitmap_length() {
let bitmap: RoaringBitmap = (0..500).chain(800..800_000).chain(920_056..930_032).collect();
let mut bytes = Vec::new();
bitmap.serialize_into(&mut bytes).unwrap();
let bytes = RoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
let len = RoaringBitmapLenCodec::deserialize_from_slice(&bytes).unwrap();
assert_eq!(bitmap.len(), len);
}

View File

@@ -28,16 +28,18 @@ use crate::heed_codec::facet::{
use crate::heed_codec::version::VersionCodec;
use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
use crate::order_by_map::OrderByMap;
use crate::progress::Progress;
use crate::prompt::PromptData;
use crate::proximity::ProximityPrecision;
use crate::update::new::StdResult;
use crate::vector::db::IndexEmbeddingConfigs;
use crate::vector::{Embedding, VectorStore, VectorStoreBackend, VectorStoreStats};
use crate::{
default_criteria, Criterion, DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec, DocumentId,
ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry,
FieldIdWordCountCodec, FieldidsWeightsMap, FilterableAttributesRule, GeoPoint,
LocalizedAttributesRule, ObkvCodec, Result, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec,
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32,
BEU64,
};
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -132,38 +134,38 @@ pub struct Index {
pub external_documents_ids: Database<Str, BEU32>,
/// A word and all the documents ids containing the word.
pub word_docids: Database<Str, DeCboRoaringBitmapCodec>,
pub word_docids: Database<Str, CboRoaringBitmapCodec>,
/// A word and all the documents ids containing the word, from attributes for which typos are not allowed.
pub exact_word_docids: Database<Str, DeCboRoaringBitmapCodec>,
pub exact_word_docids: Database<Str, CboRoaringBitmapCodec>,
/// A prefix of word and all the documents ids containing this prefix.
pub word_prefix_docids: Database<Str, DeCboRoaringBitmapCodec>,
pub word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
/// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed.
pub exact_word_prefix_docids: Database<Str, DeCboRoaringBitmapCodec>,
pub exact_word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
/// Maps the proximity between a pair of words with all the docids where this relation appears.
pub word_pair_proximity_docids: Database<U8StrStrCodec, DeCboRoaringBitmapCodec>,
pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
/// Maps the word and the position with the docids that corresponds to it.
pub word_position_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
pub word_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
/// Maps the word and the field id with the docids that corresponds to it.
pub word_fid_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
pub word_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
/// Maps the field id and the word count with the docids that corresponds to it.
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, DeCboRoaringBitmapCodec>,
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
/// Maps the word prefix and a position with all the docids where the prefix appears at the position.
pub word_prefix_position_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
/// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
pub word_prefix_fid_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
/// Maps the facet field id and the docids for which this field exists
pub facet_id_exists_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
/// Maps the facet field id and the docids for which this field is set as null
pub facet_id_is_null_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
/// Maps the facet field id and the docids for which this field is considered empty
pub facet_id_is_empty_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
@@ -504,7 +506,7 @@ impl Index {
wtxn: &mut RwTxn<'_>,
docids: &RoaringBitmap,
) -> heed::Result<()> {
self.main.remap_types::<Str, DeCboRoaringBitmapCodec>().put(
self.main.remap_types::<Str, RoaringBitmapCodec>().put(
wtxn,
main_key::DOCUMENTS_IDS_KEY,
docids,
@@ -515,7 +517,7 @@ impl Index {
pub fn documents_ids(&self, rtxn: &RoTxn<'_>) -> heed::Result<RoaringBitmap> {
Ok(self
.main
.remap_types::<Str, DeCboRoaringBitmapCodec>()
.remap_types::<Str, RoaringBitmapCodec>()
.get(rtxn, main_key::DOCUMENTS_IDS_KEY)?
.unwrap_or_default())
}
@@ -524,7 +526,7 @@ impl Index {
pub fn number_of_documents(&self, rtxn: &RoTxn<'_>) -> Result<u64> {
let count = self
.main
.remap_types::<Str, DeCboRoaringBitmapLenCodec>()
.remap_types::<Str, RoaringBitmapLenCodec>()
.get(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
Ok(count.unwrap_or_default())
}
@@ -598,6 +600,13 @@ impl Index {
/// Returns the fields ids map which associate the documents keys with an internal field id
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
pub fn fields_ids_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldsIdsMap> {
let map = self.fields_ids_map_with_metadata(rtxn).unwrap();
eprintln!(
"fields_ids_map: {:?}",
map.iter_id_metadata()
.map(|(id, metadata)| (id, map.name(id).unwrap(), metadata))
.collect::<Vec<_>>()
);
Ok(self
.main
.remap_types::<Str, SerdeJson<FieldsIdsMap>>()
@@ -610,7 +619,10 @@ impl Index {
/// This structure is not yet stored in the index, and is generated on the fly.
pub fn fields_ids_map_with_metadata(&self, rtxn: &RoTxn<'_>) -> Result<FieldIdMapWithMetadata> {
Ok(FieldIdMapWithMetadata::new(
self.fields_ids_map(rtxn)?,
self.main
.remap_types::<Str, SerdeJson<FieldsIdsMap>>()
.get(rtxn, main_key::FIELDS_IDS_MAP_KEY)?
.unwrap_or_default(),
MetadataBuilder::from_index(self, rtxn)?,
))
}
@@ -725,7 +737,7 @@ impl Index {
wtxn: &mut RwTxn<'_>,
docids: &RoaringBitmap,
) -> heed::Result<()> {
self.main.remap_types::<Str, DeCboRoaringBitmapCodec>().put(
self.main.remap_types::<Str, RoaringBitmapCodec>().put(
wtxn,
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
docids,
@@ -744,7 +756,7 @@ impl Index {
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn<'_>) -> heed::Result<RoaringBitmap> {
match self
.main
.remap_types::<Str, DeCboRoaringBitmapCodec>()
.remap_types::<Str, RoaringBitmapCodec>()
.get(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
{
Some(docids) => Ok(docids),
@@ -1397,7 +1409,7 @@ impl Index {
/// Returns the number of documents ids associated with the given word,
/// it is much faster than deserializing the bitmap and getting the length of it.
pub fn word_documents_count(&self, rtxn: &RoTxn<'_>, word: &str) -> heed::Result<Option<u64>> {
self.word_docids.remap_data_type::<DeCboRoaringBitmapLenCodec>().get(rtxn, word)
self.word_docids.remap_data_type::<RoaringBitmapLenCodec>().get(rtxn, word)
}
/* documents */
@@ -1476,8 +1488,8 @@ impl Index {
FacetDistribution::new(rtxn, self)
}
pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>) -> Search<'a> {
Search::new(rtxn, self)
pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>, progress: &'a Progress) -> Search<'a> {
Search::new(rtxn, self, progress)
}
/// Returns the index creation time.

View File

@@ -72,14 +72,16 @@ pub use self::filterable_attributes_rules::{
FilterableAttributesRule,
};
pub use self::heed_codec::{
BEU16StrCodec, BEU32StrCodec, DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec,
FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec, StrBEU32Codec, U8StrStrCodec,
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
RoaringBitmapCodec, RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec,
UncheckedU8StrStrCodec,
};
pub use self::index::Index;
pub use self::localized_attributes_rules::LocalizedAttributesRule;
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
pub use self::search::similar::Similar;
pub use self::search::steps::{FederatingResultsStep, SearchStep, TotalProcessingTimeStep};
pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,

View File

@@ -64,6 +64,30 @@ impl Progress {
steps.push((step_type, Box::new(sub_progress), now));
}
/// End a step that has been started without having to start a new step.
fn end_progress_step<P: Step>(&self, sub_progress: P) {
let mut inner = self.steps.write().unwrap();
let InnerProgress { steps, durations } = &mut *inner;
let now = Instant::now();
let step_type = TypeId::of::<P>();
debug_assert!(
steps.iter().any(|(id, s, _)| *id == step_type && s.name() == sub_progress.name()),
"Step `{}` must have been started",
sub_progress.name()
);
if let Some(idx) = steps.iter().position(|(id, _, _)| *id == step_type) {
push_steps_durations(steps, durations, now, idx);
steps.truncate(idx);
}
}
/// Update the progress and return a scoped progress step that will end the progress step when dropped.
pub fn update_progress_scoped<P: Step + Copy>(&self, step: P) -> ScopedProgressStep<'_, P> {
self.update_progress(step);
ScopedProgressStep { progress: self, step }
}
// TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
pub fn as_progress_view(&self) -> ProgressView {
let inner = self.steps.read().unwrap();
@@ -95,7 +119,15 @@ impl Progress {
let now = Instant::now();
push_steps_durations(steps, &mut durations, now, 0);
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
let mut accumulated_durations = IndexMap::new();
for (name, duration) in durations.drain(..) {
accumulated_durations.entry(name).and_modify(|d| *d += duration).or_insert(duration);
}
accumulated_durations
.into_iter()
.map(|(name, duration)| (name, format!("{duration:.2?}")))
.collect()
}
// TODO: ideally we should expose the progress in a way that let arroy use it directly
@@ -343,3 +375,14 @@ impl<T: steppe::Step> Step for Compat<T> {
self.0.total().try_into().unwrap_or(u32::MAX)
}
}
pub struct ScopedProgressStep<'a, P: Step + Copy> {
progress: &'a Progress,
step: P,
}
impl<'a, P: Step + Copy> Drop for ScopedProgressStep<'a, P> {
fn drop(&mut self) {
self.progress.end_progress_step(self.step);
}
}

View File

@@ -10,7 +10,7 @@ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
};
use crate::heed_codec::BytesRefCodec;
use crate::{DeCboRoaringBitmapCodec, DocumentId};
use crate::{CboRoaringBitmapCodec, DocumentId};
/// Call the given closure on the facet distribution of the candidate documents.
///
@@ -88,7 +88,7 @@ where
if key.field_id != field_id {
break;
}
let intersection = DeCboRoaringBitmapCodec::intersection_with_serialized(
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;
@@ -120,7 +120,7 @@ where
if key.field_id != field_id {
break;
}
let intersection = DeCboRoaringBitmapCodec::intersection_with_serialized(
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;
@@ -173,7 +173,7 @@ where
if key.field_id != self.field_id {
return Ok(ControlFlow::Break(()));
}
let docids_in_common = DeCboRoaringBitmapCodec::intersection_with_serialized(
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;
@@ -210,7 +210,7 @@ where
if key.field_id != self.field_id {
return Ok(ControlFlow::Break(()));
}
let docids_in_common = DeCboRoaringBitmapCodec::intersection_with_serialized(
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
candidates,
)?;

View File

@@ -8,7 +8,7 @@ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
};
use crate::heed_codec::BytesRefCodec;
use crate::{DeCboRoaringBitmapCodec, Result};
use crate::{CboRoaringBitmapCodec, Result};
/// Find all the document ids for which the given field contains a value contained within
/// the two bounds.
@@ -114,11 +114,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
*self.docids |= match self.universe {
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
value.bitmap_bytes,
universe,
)?,
None => DeCboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
};
}
}
@@ -211,11 +211,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
};
if should_take_whole_group {
*self.docids |= match self.universe {
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
previous_value.bitmap_bytes,
universe,
)?,
None => DeCboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
};
previous_key = next_key;
previous_value = next_value;
@@ -313,11 +313,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
};
if should_take_whole_group {
*self.docids |= match self.universe {
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
previous_value.bitmap_bytes,
universe,
)?,
None => DeCboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
};
} else {
let level = level - 1;

View File

@@ -1165,7 +1165,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
// this filter is copy pasted from #2380 with the exact same espace sequence
search.filter(Filter::from_str("monitor_diagonal = '27\" to 30\\''").unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
@@ -1225,7 +1225,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.filter(Filter::from_str("_geoRadius(45.4777599, 9.1967508, 0)").unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@@ -6,6 +6,7 @@ use roaring::RoaringBitmap;
use crate::score_details::{ScoreDetails, ScoreValue, ScoringStrategy};
use crate::search::new::{distinct_fid, distinct_single_docid};
use crate::search::steps::SearchStep;
use crate::search::SemanticSearch;
use crate::vector::{Embedding, SearchQuery};
use crate::{Index, MatchingWords, Result, Search, SearchResult};
@@ -221,6 +222,7 @@ impl Search<'_> {
time_budget: self.time_budget.clone(),
ranking_score_threshold: self.ranking_score_threshold,
locales: self.locales.clone(),
progress: self.progress,
};
let semantic = search.semantic.take();
@@ -241,6 +243,7 @@ impl Search<'_> {
Some(vector_query) => vector_query,
None => {
// attempt to embed the vector
self.progress.update_progress(SearchStep::EmbeddingQuery);
let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
let _entered = span.enter();

View File

@@ -12,7 +12,9 @@ use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats};
use crate::documents::GeoSortParameter;
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::index::MatchingStrategy;
use crate::progress::Progress;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::steps::SearchStep;
use crate::vector::{Embedder, Embedding};
use crate::{
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index,
@@ -29,6 +31,7 @@ mod fst_utils;
pub mod hybrid;
pub mod new;
pub mod similar;
pub mod steps;
#[derive(Debug, Clone)]
pub struct SemanticSearch {
@@ -61,10 +64,11 @@ pub struct Search<'a> {
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
locales: Option<Vec<Language>>,
progress: &'a Progress,
}
impl<'a> Search<'a> {
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Search<'a> {
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index, progress: &'a Progress) -> Search<'a> {
Search {
query: None,
filter: None,
@@ -86,6 +90,7 @@ impl<'a> Search<'a> {
locales: None,
time_budget: TimeBudget::max(),
ranking_score_threshold: None,
progress,
}
}
@@ -198,7 +203,7 @@ impl<'a> Search<'a> {
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
if has_vector_search {
let ctx = SearchContext::new(self.index, self.rtxn)?;
filtered_universe(ctx.index, ctx.txn, &self.filter)
filtered_universe(ctx.index, ctx.txn, &self.filter, self.progress)
} else {
Ok(self.execute()?.candidates)
}
@@ -239,8 +244,9 @@ impl<'a> Search<'a> {
}
}
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter, self.progress)?;
let mut query_vector = None;
let PartialSearchResult {
located_query_terms,
candidates,
@@ -276,6 +282,7 @@ impl<'a> Search<'a> {
*quantized,
self.time_budget.clone(),
self.ranking_score_threshold,
self.progress,
)?
}
_ => execute_search(
@@ -297,6 +304,7 @@ impl<'a> Search<'a> {
self.time_budget.clone(),
self.ranking_score_threshold,
self.locales.as_ref(),
self.progress,
)?,
};
@@ -306,6 +314,7 @@ impl<'a> Search<'a> {
tracing::debug!("Vector store stats: total_time={total_time:.02?}, total_queries={total_queries}, total_results={total_results}");
}
self.progress.update_progress(SearchStep::FormattingResults);
// consume context and located_query_terms to build MatchingWords.
let matching_words = match located_query_terms {
Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
@@ -347,6 +356,7 @@ impl fmt::Debug for Search<'_> {
time_budget,
ranking_score_threshold,
locales,
progress: _,
} = self;
f.debug_struct("Search")
.field("query", query)

View File

@@ -3,10 +3,12 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
use super::SearchContext;
use crate::progress::Progress;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::{
apply_distinct_rule, distinct_fid, distinct_single_docid, DistinctOutput,
};
use crate::search::steps::{ComputingBucketSortStep, SearchStep};
use crate::{Result, TimeBudget};
pub struct BucketSortOutput {
@@ -34,7 +36,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ranking_score_threshold: Option<f64>,
exhaustive_number_hits: bool,
max_total_hits: Option<usize>,
progress: &Progress,
) -> Result<BucketSortOutput> {
let _step = progress.update_progress_scoped(SearchStep::ComputingBucketSort);
logger.initial_query(query);
logger.ranking_rules(&ranking_rules);
logger.initial_universe(universe);
@@ -93,6 +97,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
};
}
let step = progress.update_progress_scoped(ComputingBucketSortStep::Initializing);
let ranking_rules_len = ranking_rules.len();
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
@@ -105,6 +110,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
vec![RoaringBitmap::default(); ranking_rules_len];
ranking_rule_universes[0].clone_from(universe);
let mut cur_ranking_rule_index = 0;
drop(step);
/// Finish iterating over the current ranking rule, yielding
/// control to the parent (or finishing the search if not possible).
@@ -157,6 +163,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
distinct_fid,
&ranking_rule_scores,
$candidates,
progress,
)?;
};
}
@@ -185,6 +192,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx,
logger,
&ranking_rule_universes[cur_ranking_rule_index],
progress,
)? {
std::task::Poll::Ready(bucket) => bucket,
std::task::Poll::Pending => {
@@ -231,6 +239,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger,
&ranking_rule_universes[cur_ranking_rule_index],
&time_budget,
progress,
)?
else {
back!();
@@ -323,9 +332,11 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
distinct_fid: Option<u16>,
ranking_rule_scores: &[ScoreDetails],
candidates: RoaringBitmap,
progress: &Progress,
) -> Result<()> {
// First apply the distinct rule on the candidates, reducing the universes if necessary
let candidates = if let Some(distinct_fid) = distinct_fid {
progress.update_progress(ComputingBucketSortStep::ApplyingDistinctRule);
let DistinctOutput { remaining, excluded } =
apply_distinct_rule(ctx, distinct_fid, &candidates)?;
for universe in ranking_rule_universes.iter_mut() {
@@ -336,6 +347,8 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
} else {
candidates.clone()
};
progress.update_progress(ComputingBucketSortStep::MergingCandidates);
*all_candidates |= &candidates;
// if the candidates are empty, there is nothing to do;

View File

@@ -12,9 +12,9 @@ use super::interner::Interned;
use super::Word;
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
use crate::proximity::ProximityPrecision;
use crate::update::MergeDeCboRoaringBitmaps;
use crate::update::MergeCboRoaringBitmaps;
use crate::{
DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
};
/// A cache storing pointers to values in the LMDB databases.
@@ -72,11 +72,11 @@ impl<'ctx> DatabaseCache<'ctx> {
match (bitmap_bytes, universe) {
(bytes, Some(universe)) => {
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
.map(Some)
.map_err(Into::into)
}
(bytes, None) => DeCboRoaringBitmapCodec::bytes_decode_owned(bytes)
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
@@ -105,7 +105,7 @@ impl<'ctx> DatabaseCache<'ctx> {
None => return Ok(None),
};
DeCboRoaringBitmapLenCodec::bytes_decode_owned(bitmap_bytes)
CboRoaringBitmapLenCodec::bytes_decode_owned(bitmap_bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into)
@@ -157,11 +157,11 @@ impl<'ctx> DatabaseCache<'ctx> {
match (bitmap_bytes, universe) {
(bytes, Some(universe)) => {
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
.map(Some)
.map_err(Into::into)
}
(bytes, None) => DeCboRoaringBitmapCodec::bytes_decode_owned(bytes)
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
@@ -223,7 +223,7 @@ impl<'ctx> SearchContext<'ctx> {
&mut self.db_cache.word_docids,
self.index.word_fid_docids.remap_data_type::<Bytes>(),
universe,
MergeDeCboRoaringBitmaps,
MergeCboRoaringBitmaps,
)
}
None => DatabaseCache::get_value(
@@ -255,7 +255,7 @@ impl<'ctx> SearchContext<'ctx> {
&mut self.db_cache.exact_word_docids,
self.index.word_fid_docids.remap_data_type::<Bytes>(),
universe,
MergeDeCboRoaringBitmaps,
MergeCboRoaringBitmaps,
)
}
None => DatabaseCache::get_value(
@@ -312,7 +312,7 @@ impl<'ctx> SearchContext<'ctx> {
&mut self.db_cache.word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
universe,
MergeDeCboRoaringBitmaps,
MergeCboRoaringBitmaps,
)
}
None => DatabaseCache::get_value(
@@ -344,7 +344,7 @@ impl<'ctx> SearchContext<'ctx> {
&mut self.db_cache.exact_word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
universe,
MergeDeCboRoaringBitmaps,
MergeCboRoaringBitmaps,
)
}
None => DatabaseCache::get_value(
@@ -377,7 +377,7 @@ impl<'ctx> SearchContext<'ctx> {
{
docids
.as_ref()
.map(|d| DeCboRoaringBitmapCodec::bytes_decode_owned(d))
.map(|d| CboRoaringBitmapCodec::bytes_decode_owned(d))
.transpose()
.map_err(heed::Error::Decoding)?
} else {
@@ -395,7 +395,7 @@ impl<'ctx> SearchContext<'ctx> {
docids |= word1_docids & word2_docids;
}
}
let encoded = DeCboRoaringBitmapCodec::bytes_encode(&docids)
let encoded = CboRoaringBitmapCodec::bytes_encode(&docids)
.map(Cow::into_owned)
.map(Cow::Owned)
.map(Some)

View File

@@ -3,10 +3,13 @@ use roaring::{MultiOps, RoaringBitmap};
use super::query_graph::QueryGraph;
use super::ranking_rules::{RankingRule, RankingRuleOutput};
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::ExactTerm;
use crate::{DeCboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::ComputingBucketSortStep;
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
/// A ranking rule that produces 3 disjoint buckets:
///
@@ -24,8 +27,8 @@ impl ExactAttribute {
}
impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
fn id(&self) -> String {
"exact_attribute".to_owned()
fn id(&self) -> RankingRuleId {
RankingRuleId::Exactness
}
#[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
@@ -48,7 +51,9 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
_logger: &mut dyn SearchLogger<QueryGraph>,
universe: &roaring::RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let state = std::mem::take(&mut self.state);
let (state, output) = State::next(state, universe);
self.state = state;
@@ -219,7 +224,7 @@ impl State {
match bitmap_bytes {
Some(bytes) => {
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
}
None => RoaringBitmap::default(),
}

View File

@@ -6,7 +6,10 @@ use rstar::RTree;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use crate::documents::geo_sort::{fill_cache, next_bucket};
use crate::documents::{GeoSortParameter, GeoSortStrategy};
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::ComputingBucketSortStep;
use crate::{GeoPoint, Result, SearchContext, SearchLogger, TimeBudget};
pub struct GeoSort<Q: RankingRuleQueryTrait> {
@@ -73,8 +76,8 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
}
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
fn id(&self) -> String {
"geo_sort".to_owned()
fn id(&self) -> RankingRuleId {
RankingRuleId::GeoSort
}
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
@@ -112,7 +115,9 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
_logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Q>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let query = self.query.as_ref().unwrap().clone();
next_bucket(

View File

@@ -50,51 +50,54 @@ use super::ranking_rule_graph::{
};
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::progress::Progress;
use crate::score_details::Rank;
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::ranking_rule_graph::PathVisitor;
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::ComputingBucketSortStep;
use crate::{Result, TermsMatchingStrategy, TimeBudget};
pub type Words = GraphBasedRankingRule<WordsGraph>;
impl GraphBasedRankingRule<WordsGraph> {
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
Self::new_with_id("words".to_owned(), Some(terms_matching_strategy))
Self::new_with_id(RankingRuleId::Words, Some(terms_matching_strategy))
}
}
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
impl GraphBasedRankingRule<ProximityGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("proximity".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::Proximity, terms_matching_strategy)
}
}
pub type Fid = GraphBasedRankingRule<FidGraph>;
impl GraphBasedRankingRule<FidGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("fid".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::AttributePosition, terms_matching_strategy)
}
}
pub type Position = GraphBasedRankingRule<PositionGraph>;
impl GraphBasedRankingRule<PositionGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("position".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::WordPosition, terms_matching_strategy)
}
}
pub type Typo = GraphBasedRankingRule<TypoGraph>;
impl GraphBasedRankingRule<TypoGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("typo".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::Typo, terms_matching_strategy)
}
}
pub type Exactness = GraphBasedRankingRule<ExactnessGraph>;
impl GraphBasedRankingRule<ExactnessGraph> {
pub fn new() -> Self {
Self::new_with_id("exactness".to_owned(), None)
Self::new_with_id(RankingRuleId::Exactness, None)
}
}
/// A generic graph-based ranking rule
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
id: String,
id: RankingRuleId,
terms_matching_strategy: Option<TermsMatchingStrategy>,
// When the ranking rule is not iterating over its buckets,
// its state is `None`.
@@ -102,7 +105,10 @@ pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
}
impl<G: RankingRuleGraphTrait> GraphBasedRankingRule<G> {
/// Creates the ranking rule with the given identifier
pub fn new_with_id(id: String, terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
pub fn new_with_id(
id: RankingRuleId,
terms_matching_strategy: Option<TermsMatchingStrategy>,
) -> Self {
Self { id, terms_matching_strategy, state: None }
}
}
@@ -124,7 +130,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
}
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
fn id(&self) -> String {
fn id(&self) -> RankingRuleId {
self.id.clone()
}
@@ -219,7 +225,9 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
// should never happen
let mut state = self.state.take().unwrap();

View File

@@ -14,7 +14,7 @@ use crate::search::new::ranking_rule_graph::{
ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph,
WordsCondition, WordsGraph,
};
use crate::search::new::ranking_rules::BoxRankingRule;
use crate::search::new::ranking_rules::{BoxRankingRule, RankingRuleId};
use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
use crate::Result;
@@ -45,13 +45,26 @@ enum Location {
Other,
}
impl From<RankingRuleId> for Location {
fn from(ranking_rule_id: RankingRuleId) -> Self {
match ranking_rule_id {
RankingRuleId::Words => Self::Words,
RankingRuleId::Typo => Self::Typo,
RankingRuleId::Proximity => Self::Proximity,
RankingRuleId::AttributePosition => Self::Fid,
RankingRuleId::WordPosition => Self::Position,
_ => Self::Other,
}
}
}
#[derive(Default)]
pub struct VisualSearchLogger {
initial_query: Option<QueryGraph>,
initial_query_time: Option<Instant>,
query_for_universe: Option<QueryGraph>,
initial_universe: Option<RoaringBitmap>,
ranking_rules_ids: Option<Vec<String>>,
ranking_rules_ids: Option<Vec<RankingRuleId>>,
events: Vec<SearchEvents>,
location: Vec<Location>,
}
@@ -84,14 +97,7 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
ranking_rule_idx,
universe_len: universe.len(),
});
self.location.push(match ranking_rule.id().as_str() {
"words" => Location::Words,
"typo" => Location::Typo,
"proximity" => Location::Proximity,
"fid" => Location::Fid,
"position" => Location::Position,
_ => Location::Other,
});
self.location.push(ranking_rule.id().into());
}
fn next_bucket_ranking_rule(

View File

@@ -498,12 +498,14 @@ mod tests {
use super::*;
use crate::index::tests::TempIndex;
use crate::progress::Progress;
use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
impl<'a> MatcherBuilder<'a> {
fn new_test(rtxn: &'a heed::RoTxn<'a>, index: &'a TempIndex, query: &str) -> Self {
let progress = Progress::default();
let mut ctx = SearchContext::new(index, rtxn).unwrap();
let universe = filtered_universe(ctx.index, ctx.txn, &None).unwrap();
let universe = filtered_universe(ctx.index, ctx.txn, &None, &progress).unwrap();
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
&mut ctx,
Some(query),
@@ -523,6 +525,7 @@ mod tests {
TimeBudget::max(),
None,
None,
&progress,
)
.unwrap();

View File

@@ -56,8 +56,10 @@ use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::documents::GeoSortParameter;
use crate::index::PrefixSearch;
use crate::localized_attributes_rules::LocalizedFieldIds;
use crate::progress::Progress;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::search::steps::SearchStep;
use crate::vector::Embedder;
use crate::{
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
@@ -294,7 +296,9 @@ fn resolve_universe(
query_graph: &QueryGraph,
matching_strategy: TermsMatchingStrategy,
logger: &mut dyn SearchLogger<QueryGraph>,
progress: &Progress,
) -> Result<RoaringBitmap> {
let _step = progress.update_progress_scoped(SearchStep::ResolvingUniverse);
resolve_maximally_reduced_query_graph(
ctx,
initial_universe,
@@ -632,8 +636,10 @@ pub fn filtered_universe(
index: &Index,
txn: &RoTxn<'_>,
filters: &Option<Filter<'_>>,
progress: &Progress,
) -> Result<RoaringBitmap> {
Ok(if let Some(filters) = filters {
let _step = progress.update_progress_scoped(SearchStep::ComputingFilter);
filters.evaluate(txn, index)?
} else {
index.documents_ids(txn)?
@@ -658,6 +664,7 @@ pub fn execute_vector_search(
quantized: bool,
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
progress: &Progress,
) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?;
@@ -692,6 +699,7 @@ pub fn execute_vector_search(
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
progress,
)?;
Ok(PartialSearchResult {
@@ -725,12 +733,14 @@ pub fn execute_search(
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
locales: Option<&Vec<Language>>,
progress: &Progress,
) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?;
let mut used_negative_operator = false;
let mut located_query_terms = None;
let query_terms = if let Some(query) = query {
progress.update_progress(SearchStep::TokenizingQuery);
let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
let entered = span.enter();
@@ -834,8 +844,14 @@ pub fn execute_search(
terms_matching_strategy,
)?;
universe &=
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
universe &= resolve_universe(
ctx,
&universe,
&graph,
terms_matching_strategy,
query_graph_logger,
progress,
)?;
bucket_sort(
ctx,
@@ -851,6 +867,7 @@ pub fn execute_search(
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
progress,
)?
} else {
let ranking_rules =
@@ -869,6 +886,7 @@ pub fn execute_search(
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
progress,
)?
};

View File

@@ -59,19 +59,19 @@ impl RankingRuleGraphTrait for FidGraph {
let mut all_fields = FxHashSet::default();
let mut current_max_weight = 0;
for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
let fields = ctx.get_db_word_fids(word.interned())?;
let fields = dbg!(ctx.get_db_word_fids(word.interned())?);
all_fields.extend(fields);
}
for phrase in term.term_subset.all_phrases(ctx)? {
for &word in phrase.words(ctx).iter().flatten() {
let fields = ctx.get_db_word_fids(word)?;
let fields = dbg!(ctx.get_db_word_fids(word)?);
all_fields.extend(fields);
}
}
if let Some(word_prefix) = term.term_subset.use_prefix_db(ctx) {
let fields = ctx.get_db_word_prefix_fids(word_prefix.interned())?;
let fields = dbg!(ctx.get_db_word_prefix_fids(word_prefix.interned())?);
all_fields.extend(fields);
}
@@ -79,27 +79,16 @@ impl RankingRuleGraphTrait for FidGraph {
let mut edges = vec![];
for fid in all_fields.iter().copied() {
match weights_map.weight(fid) {
Some(weight) => {
if weight > current_max_weight {
current_max_weight = weight;
}
edges.push((
weight as u32 * term.term_ids.len() as u32,
conditions_interner
.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
));
}
None => {
// Hotfix: this is a temporary solution to handle the case where the weight is not found in the weights map.
// This is due to a database corruption in word_fid_docids database.
tracing::warn!(
"{:?}",
InternalError::FieldidsWeightsMapMissingEntry { key: fid }
);
}
let weight = weights_map
.weight(fid)
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
if weight > current_max_weight {
current_max_weight = weight;
}
edges.push((
weight as u32 * term.term_ids.len() as u32,
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
));
}
// always lookup the max_fid if we don't already and add an artificial condition for max scoring

View File

@@ -4,7 +4,9 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{QueryGraph, SearchContext};
use crate::progress::Progress;
use crate::score_details::ScoreDetails;
use crate::search::steps::ComputingBucketSortStep;
use crate::{Result, TimeBudget};
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
@@ -24,7 +26,7 @@ pub type BoxRankingRule<'ctx, Query> = Box<dyn RankingRule<'ctx, Query> + 'ctx>;
/// (i.e. the read transaction and the cache) and over `Query`, which
/// can be either [`PlaceholderQuery`] or [`QueryGraph`].
pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
fn id(&self) -> String;
fn id(&self) -> RankingRuleId;
/// Prepare the ranking rule such that it can start iterating over its
/// buckets using [`next_bucket`](RankingRule::next_bucket).
@@ -56,6 +58,7 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
logger: &mut dyn SearchLogger<Query>,
universe: &RoaringBitmap,
time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Query>>>;
/// Return the next bucket of this ranking rule, if doing so can be done without blocking
@@ -69,7 +72,9 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
_ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<Query>,
_universe: &RoaringBitmap,
progress: &Progress,
) -> Result<Poll<RankingRuleOutput<Query>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
Ok(Poll::Pending)
}
@@ -93,3 +98,54 @@ pub struct RankingRuleOutput<Q> {
/// The score for the candidates of the current bucket
pub score: ScoreDetails,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RankingRuleId {
Words,
Typo,
Proximity,
AttributePosition,
WordPosition,
Exactness,
Sort,
GeoSort,
VectorSort,
Asc(String),
Desc(String),
}
impl std::fmt::Display for RankingRuleId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RankingRuleId::Words => write!(f, "words"),
RankingRuleId::Typo => write!(f, "typo"),
RankingRuleId::Proximity => write!(f, "proximity"),
RankingRuleId::AttributePosition => write!(f, "attribute_position"),
RankingRuleId::WordPosition => write!(f, "word_position"),
RankingRuleId::Exactness => write!(f, "exactness"),
RankingRuleId::Sort => write!(f, "sort"),
RankingRuleId::GeoSort => write!(f, "geo_sort"),
RankingRuleId::VectorSort => write!(f, "vector_sort"),
RankingRuleId::Asc(field_name) => write!(f, "asc:{}", field_name),
RankingRuleId::Desc(field_name) => write!(f, "desc:{}", field_name),
}
}
}
impl From<RankingRuleId> for ComputingBucketSortStep {
fn from(ranking_rule_id: RankingRuleId) -> Self {
match ranking_rule_id {
RankingRuleId::Words => Self::Words,
RankingRuleId::Typo => Self::Typo,
RankingRuleId::Proximity => Self::Proximity,
RankingRuleId::AttributePosition => Self::AttributePosition,
RankingRuleId::WordPosition => Self::WordPosition,
RankingRuleId::Exactness => Self::Exactness,
RankingRuleId::Sort => Self::Sort,
RankingRuleId::GeoSort => Self::GeoSort,
RankingRuleId::VectorSort => Self::VectorSort,
RankingRuleId::Asc(_) => Self::Asc,
RankingRuleId::Desc(_) => Self::Desc,
}
}
}

View File

@@ -5,8 +5,11 @@ use super::logger::SearchLogger;
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::ComputingBucketSortStep;
use crate::{FieldId, Index, Result, TimeBudget};
pub trait RankingRuleOutputIter<'ctx, Query> {
@@ -84,9 +87,13 @@ impl<'ctx, Query> Sort<'ctx, Query> {
}
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> {
fn id(&self) -> String {
fn id(&self) -> RankingRuleId {
let Self { field_name, is_ascending, .. } = self;
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
if *is_ascending {
RankingRuleId::Asc(field_name.clone())
} else {
RankingRuleId::Desc(field_name.clone())
}
}
#[tracing::instrument(level = "trace", skip_all, target = "search::sort")]
@@ -196,7 +203,9 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
_logger: &mut dyn SearchLogger<Query>,
universe: &RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Query>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let iter = self.iter.as_mut().unwrap();
if let Some(mut bucket) = iter.next_bucket()? {
bucket.candidates &= universe;

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{db_snap, Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -119,7 +119,7 @@ fn test_attribute_fid_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -147,7 +147,7 @@ fn test_attribute_fid_ngrams() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{db_snap, Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -134,7 +134,7 @@ fn test_attribute_position_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -150,7 +150,7 @@ fn test_attribute_position_repeated() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("a a a a a");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -167,7 +167,7 @@ fn test_attribute_position_different_fields() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -184,7 +184,7 @@ fn test_attribute_position_ngrams() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -14,7 +14,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::update::Setting;
use crate::vector::settings::EmbeddingSettings;
use crate::vector::{Embedder, EmbedderOptions};
use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget};
use crate::{Criterion, Filter, FilterableAttributesRule, TimeBudget};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -61,7 +61,7 @@ fn basic_degraded_search() {
let index = create_index();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("hello puppy kefir");
search.limit(3);
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
@@ -75,7 +75,7 @@ fn degraded_search_cannot_skip_filter() {
let index = create_index();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("hello puppy kefir");
search.limit(100);
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
@@ -96,7 +96,7 @@ fn degraded_search_and_score_details() {
let index = create_index();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("hello puppy kefir");
search.limit(4);
search.scoring_strategy(ScoringStrategy::Detailed);
@@ -560,7 +560,7 @@ fn degraded_search_and_score_details_vector() {
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
let embedder = Arc::new(
Embedder::new(

View File

@@ -20,7 +20,7 @@ use maplit::hashset;
use super::collect_field_values;
use crate::index::tests::TempIndex;
use crate::{
AscDesc, Criterion, FilterableAttributesRule, Index, Member, Search, SearchResult,
AscDesc, Criterion, FilterableAttributesRule, Index, Member, SearchResult,
TermsMatchingStrategy,
};
@@ -246,7 +246,7 @@ fn test_distinct_placeholder_no_ranking_rules() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.distinct(S("letter"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
@@ -275,7 +275,7 @@ fn test_distinct_at_search_placeholder_no_ranking_rules() {
let txn = index.read_txn().unwrap();
let s = Search::new(&txn, &index);
let s = index.search(&txn);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
@@ -308,7 +308,7 @@ fn test_distinct_placeholder_sort() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -348,7 +348,7 @@ fn test_distinct_placeholder_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -388,7 +388,7 @@ fn test_distinct_placeholder_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.sort_criteria(vec![
AscDesc::Desc(Member::Field(S("letter"))),
AscDesc::Desc(Member::Field(S("rank1"))),
@@ -443,7 +443,7 @@ fn test_distinct_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
@@ -496,7 +496,7 @@ fn test_distinct_sort_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
@@ -569,7 +569,7 @@ fn test_distinct_all_candidates() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
s.exhaustive_number_hits(true);
@@ -592,7 +592,7 @@ fn test_distinct_typo() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);

View File

@@ -21,7 +21,7 @@ This module tests the following properties about the exactness ranking rule:
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index_simple_ordered() -> TempIndex {
let index = TempIndex::new();
@@ -471,7 +471,7 @@ fn test_exactness_simple_ordered() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -503,7 +503,7 @@ fn test_exactness_simple_reversed() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -526,7 +526,7 @@ fn test_exactness_simple_reversed() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -556,7 +556,7 @@ fn test_exactness_simple_random() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -585,7 +585,7 @@ fn test_exactness_attribute_starts_with_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("this balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -611,7 +611,7 @@ fn test_exactness_attribute_starts_with_phrase() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("\"overlooking the sea\" is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -631,7 +631,7 @@ fn test_exactness_attribute_starts_with_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("overlooking the sea is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -660,7 +660,7 @@ fn test_exactness_all_candidates_with_typo() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("overlocking the sea is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -696,7 +696,7 @@ fn test_exactness_after_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -744,7 +744,7 @@ fn test_words_after_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -792,7 +792,7 @@ fn test_proximity_after_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -829,7 +829,7 @@ fn test_proximity_after_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -862,7 +862,7 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("quick brown fox extra");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -897,7 +897,7 @@ fn test_typo_followed_by_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("extraordinarily quick brown fox");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -82,7 +82,7 @@ fn test_geo_sort() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
@@ -118,7 +118,7 @@ fn test_geo_sort_with_following_ranking_rules() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![
AscDesc::Asc(Member::Geo([0., 0.])),
@@ -159,7 +159,7 @@ fn test_geo_sort_reached_max_bucket_size() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.geo_max_bucket_size(2);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![
@@ -219,7 +219,7 @@ fn test_geo_sort_around_the_edge_of_the_flat_earth() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
// --- asc
@@ -295,7 +295,7 @@ fn geo_sort_mixed_with_words() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
@@ -406,7 +406,7 @@ fn geo_sort_without_any_geo_faceted_documents() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{Search, SearchResult};
use crate::SearchResult;
#[test]
fn test_kanji_language_detection() {
@@ -14,7 +14,7 @@ fn test_kanji_language_detection() {
.unwrap();
let txn = index.write_txn().unwrap();
let mut search = Search::new(&txn, &index);
let mut search = index.search(&txn);
search.query("東京");
let SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@@ -19,7 +19,7 @@ This module tests the following properties:
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -78,7 +78,7 @@ fn test_2gram_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("sun flower");
@@ -109,7 +109,7 @@ fn test_3gram_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flower s are");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -129,7 +129,7 @@ fn test_2gram_typo() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flawer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -159,7 +159,7 @@ fn test_no_disable_ngrams() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -185,7 +185,7 @@ fn test_2gram_prefix() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flow");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -214,7 +214,7 @@ fn test_3gram_prefix() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("su nf l");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -237,7 +237,7 @@ fn test_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunflower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -266,7 +266,7 @@ fn test_disable_split_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunflower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -286,7 +286,7 @@ fn test_2gram_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf lower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -310,7 +310,7 @@ fn test_3gram_no_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf lo wer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -333,7 +333,7 @@ fn test_3gram_no_typos() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf la wer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -352,7 +352,7 @@ fn test_no_ngram_phrases() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("\"sun\" flower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -366,7 +366,7 @@ fn test_no_ngram_phrases() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("\"sun\" \"flower\"");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -385,7 +385,7 @@ fn test_short_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("xyz");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -412,7 +412,7 @@ fn test_split_words_never_disabled() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the sunflower is tall");
let SearchResult { documents_ids, .. } = s.execute().unwrap();

View File

@@ -18,7 +18,7 @@ use std::collections::BTreeMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_simple_index() -> TempIndex {
let index = TempIndex::new();
@@ -268,7 +268,7 @@ fn test_proximity_simple() {
let index = create_simple_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -295,7 +295,7 @@ fn test_proximity_split_word() {
let index = create_edge_cases_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("sunflower wilting");
@@ -315,7 +315,7 @@ fn test_proximity_split_word() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("\"sun flower\" wilting");
@@ -342,7 +342,7 @@ fn test_proximity_split_word() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("xyz wilting");
@@ -365,7 +365,7 @@ fn test_proximity_prefix_db() {
let index = create_edge_cases_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best s");
@@ -390,7 +390,7 @@ fn test_proximity_prefix_db() {
"###);
// Difference when using the `su` prefix, which is not in the prefix DB
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best su");
@@ -417,7 +417,7 @@ fn test_proximity_prefix_db() {
// **proximity** prefix DB. In that case, its sprximity score will always be
// the maximum. This happens for prefixes that are larger than 2 bytes.
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best win");
@@ -441,7 +441,7 @@ fn test_proximity_prefix_db() {
// Now using `wint`, which is not in the prefix DB:
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wint");
@@ -465,7 +465,7 @@ fn test_proximity_prefix_db() {
// and using `wi` which is in the prefix DB and proximity prefix DB
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wi");

View File

@@ -8,7 +8,7 @@ implemented.
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -57,7 +57,7 @@ fn test_trap_basic() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("summer holiday");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -17,9 +17,7 @@ use meili_snap::insta;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{
score_details, AscDesc, Criterion, Member, Search, SearchResult, TermsMatchingStrategy,
};
use crate::{score_details, AscDesc, Criterion, Member, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -184,7 +182,7 @@ fn test_sort() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
@@ -219,7 +217,7 @@ fn test_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank")))]);
@@ -254,7 +252,7 @@ fn test_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Field(S("vague")))]);
@@ -289,7 +287,7 @@ fn test_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("vague")))]);
@@ -338,7 +336,7 @@ fn test_redacted() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![

View File

@@ -13,7 +13,7 @@ use std::collections::BTreeSet;
use std::iter::FromIterator;
use crate::index::tests::TempIndex;
use crate::{Search, SearchResult, TermsMatchingStrategy};
use crate::{SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -79,7 +79,7 @@ fn test_ignore_stop_words() {
let txn = index.read_txn().unwrap();
// `the` is treated as a prefix here, so it's not ignored
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("xyz to the");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -132,7 +132,7 @@ fn test_ignore_stop_words() {
"###);
// `xyz` is treated as a prefix here, so it's not ignored
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("to the xyz");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -185,7 +185,7 @@ fn test_ignore_stop_words() {
"###);
// `xyz` is not treated as a prefix anymore because of the trailing space, so it's ignored
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("to the xyz ");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -237,7 +237,7 @@ fn test_ignore_stop_words() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("to the dragon xyz");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -296,7 +296,7 @@ fn test_stop_words_in_phrase() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"how to train your dragon\"");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -389,7 +389,7 @@ fn test_stop_words_in_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("how \"to\" train \"the");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -441,7 +441,7 @@ fn test_stop_words_in_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("how \"to\" train \"The dragon");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -449,7 +449,7 @@ fn test_stop_words_in_phrase() {
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 6, 5]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"to\"");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -22,7 +22,7 @@ use std::collections::BTreeMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -157,7 +157,7 @@ fn test_no_typo() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -182,7 +182,7 @@ fn test_default_typo() {
insta::assert_debug_snapshot!(tt, @"9");
// 0 typo
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -202,7 +202,7 @@ fn test_default_typo() {
"###);
// 1 typo on one word, replaced letter
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -216,7 +216,7 @@ fn test_default_typo() {
"###);
// 1 typo on one word, missing letter, extra letter
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -235,7 +235,7 @@ fn test_phrase_no_typo_allowed() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the \"quick brewn\" fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -265,7 +265,7 @@ fn test_typo_exact_word() {
insta::assert_debug_snapshot!(tt, @"9");
// don't match quivk
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -279,7 +279,7 @@ fn test_typo_exact_word() {
"###);
// Don't match quick
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -287,7 +287,7 @@ fn test_typo_exact_word() {
insta::assert_snapshot!(format!("{document_scores:?}"), @"[]");
// words not in exact_words (quicest, jummps) have normal typo handling
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -301,7 +301,7 @@ fn test_typo_exact_word() {
"###);
// exact words do not disable prefix (sunflowering OK, but no sunflowar)
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
@@ -340,7 +340,7 @@ fn test_typo_exact_attribute() {
insta::assert_debug_snapshot!(tt, @"9");
// Exact match returns both exact attributes and tolerant ones.
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
@@ -365,7 +365,7 @@ fn test_typo_exact_attribute() {
"###);
// 1 typo only returns the tolerant attribute
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quidk brown fox jumps over the lazy dog");
@@ -386,7 +386,7 @@ fn test_typo_exact_attribute() {
"###);
// combine with exact words
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quivk brown fox jumps over the lazy dog");
@@ -414,7 +414,7 @@ fn test_typo_exact_attribute() {
"###);
// No result in tolerant attribute
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quicest brownest fox jummps over the laziest dog");
@@ -428,7 +428,7 @@ fn test_ngram_typos() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the extra lagant fox skyrocketed over the languorous dog");
@@ -442,7 +442,7 @@ fn test_ngram_typos() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the ex tra lagant fox skyrocketed over the languorous dog");
@@ -463,7 +463,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
@@ -499,7 +499,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
})
.unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
@@ -517,7 +517,7 @@ fn test_typo_bucketing() {
let txn = index.read_txn().unwrap();
// First do the search with just the Words ranking rule
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
@@ -545,7 +545,7 @@ fn test_typo_bucketing() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
@@ -564,7 +564,7 @@ fn test_typo_bucketing() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sun flower");
@@ -600,7 +600,7 @@ fn test_typo_synonyms() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lackadaisical dog");
@@ -616,7 +616,7 @@ fn test_typo_synonyms() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the fast brownish fox jumps over the lackadaisical dog");

View File

@@ -17,7 +17,7 @@ because the typo ranking rule before it only used the derivation `beautiful`.
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -87,7 +87,7 @@ fn test_trap_basic_and_complex1() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("beautiful summer");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -110,7 +110,7 @@ fn test_trap_complex2() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("delicious sweet dessert");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -14,7 +14,7 @@ This module tests the following properties:
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -131,7 +131,7 @@ fn test_words_tms_last_simple() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -166,7 +166,7 @@ fn test_words_tms_last_simple() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("extravagant the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -180,7 +180,7 @@ fn test_words_tms_last_phrase() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox\" jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -205,7 +205,7 @@ fn test_words_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox\" jumps over the \"lazy\" dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -227,7 +227,7 @@ fn test_words_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox jumps over the lazy dog\"");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -243,7 +243,7 @@ fn test_words_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -270,7 +270,7 @@ fn test_words_proximity_tms_last_simple() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -305,7 +305,7 @@ fn test_words_proximity_tms_last_simple() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the brown quick fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -351,7 +351,7 @@ fn test_words_proximity_tms_last_phrase() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the \"quick brown\" fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -382,7 +382,7 @@ fn test_words_proximity_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the \"quick brown\" \"fox jumps\" over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -421,7 +421,7 @@ fn test_words_tms_all() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -447,7 +447,7 @@ fn test_words_tms_all() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("extravagant");
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -6,7 +6,10 @@ use roaring::RoaringBitmap;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use super::VectorStoreStats;
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::ComputingBucketSortStep;
use crate::vector::{DistributionShift, Embedder, VectorStore};
use crate::{DocumentId, Result, SearchContext, SearchLogger, TimeBudget};
@@ -94,8 +97,8 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
}
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
fn id(&self) -> String {
"vector_sort".to_owned()
fn id(&self) -> RankingRuleId {
RankingRuleId::VectorSort
}
#[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
@@ -123,7 +126,9 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
_logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Q>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let query = self.query.as_ref().unwrap().clone();
let vector_candidates = &self.vector_candidates & universe;
@@ -158,7 +163,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
}));
}
self.next_bucket(ctx, _logger, universe, time_budget)
self.next_bucket(ctx, _logger, universe, time_budget, progress)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
@@ -171,7 +176,9 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
_ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
progress: &Progress,
) -> Result<Poll<RankingRuleOutput<Q>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let query = self.query.as_ref().unwrap().clone();
let vector_candidates = &self.vector_candidates & universe;

View File

@@ -57,7 +57,12 @@ impl<'a> Similar<'a> {
}
pub fn execute(&self) -> Result<SearchResult> {
let mut universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
let mut universe = filtered_universe(
self.index,
self.rtxn,
&self.filter,
&crate::progress::Progress::default(),
)?;
// we never want to receive the docid
universe.remove(self.id);

View File

@@ -0,0 +1,52 @@
use crate::make_enum_progress;
make_enum_progress! {
pub enum SearchStep {
PreparingSearch,
TokenizingQuery,
EmbeddingQuery,
ComputingFilter,
ResolvingUniverse,
ComputingBucketSort,
FormattingResults,
ComputingFacetDistribution,
FederatingResults,
ApplyingPersonalization,
}
}
make_enum_progress! {
pub enum ComputingBucketSortStep {
Initializing,
MergingCandidates,
ApplyingDistinctRule,
Words,
Typo,
Proximity,
AttributePosition,
WordPosition,
Exactness,
Sort,
GeoSort,
VectorSort,
Asc,
Desc,
}
}
make_enum_progress! {
pub enum FederatingResultsStep {
WaitingForRemoteResults,
MergingFacets,
MergingResults,
}
}
make_enum_progress! {
pub enum TotalProcessingTimeStep {
WaitingForPermit,
Searching,
FetchingSimilar,
PublishingAnalytics,
}
}

View File

@@ -26,6 +26,7 @@ pub(crate) struct TempIndex {
pub inner: Index,
pub indexer_config: IndexerConfig,
pub index_documents_config: IndexDocumentsConfig,
pub progress: Progress,
_tempdir: TempDir,
}
@@ -47,7 +48,9 @@ impl TempIndex {
let inner = Index::new(options, _tempdir.path(), true).unwrap();
let indexer_config = IndexerConfig::default();
let index_documents_config = IndexDocumentsConfig::default();
Self { inner, indexer_config, index_documents_config, _tempdir }
let progress = Progress::default();
Self { inner, indexer_config, index_documents_config, progress, _tempdir }
}
/// Creates a temporary index, with a default `4096 * 2000` size. This should be enough for
/// most tests.
@@ -210,6 +213,10 @@ impl TempIndex {
pub fn delete_document(&self, external_document_id: &str) {
self.delete_documents(vec![external_document_id.to_string()])
}
pub fn search<'a>(&'a self, rtxn: &'a heed::RoTxn<'a>) -> Search<'a> {
self.inner.search(rtxn, &self.progress)
}
}
#[test]
@@ -1095,7 +1102,7 @@ fn bug_3021_fourth() {
"###);
let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index);
let search = index.search(&rtxn);
let SearchResult {
matching_words: _,
candidates: _,

View File

@@ -14,8 +14,8 @@ use crate::heed_codec::facet::{
use crate::heed_codec::BytesRefCodec;
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
use crate::update::MergeDeladdDeCboRoaringBitmaps;
use crate::{DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec, FieldId, Index, Result};
use crate::update::MergeDeladdCboRoaringBitmaps;
use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};
/// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
/// by rebuilding the database "from scratch".
@@ -29,7 +29,7 @@ pub struct FacetsUpdateBulk<'i> {
facet_type: FacetType,
field_ids: Vec<FieldId>,
// None if level 0 does not need to be updated
delta_data: Option<Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>>,
delta_data: Option<Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>>,
}
impl<'i> FacetsUpdateBulk<'i> {
@@ -37,7 +37,7 @@ impl<'i> FacetsUpdateBulk<'i> {
index: &'i Index,
field_ids: Vec<FieldId>,
facet_type: FacetType,
delta_data: Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>,
delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
group_size: u8,
min_level_size: u8,
) -> FacetsUpdateBulk<'i> {
@@ -90,7 +90,7 @@ impl<'i> FacetsUpdateBulk<'i> {
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub delta_data: Option<Merger<R, MergeDeladdDeCboRoaringBitmaps>>,
pub delta_data: Option<Merger<R, MergeDeladdCboRoaringBitmaps>>,
pub group_size: u8,
pub min_level_size: u8,
}
@@ -143,7 +143,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
}
} else {
let mut buffer = Vec::new();
let mut tmp_buffer = Vec::new();
let database = self.db.remap_types::<Bytes, Bytes>();
let mut iter = delta_data.into_stream_merger_iter()?;
@@ -163,12 +162,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
Some(prev_value) => {
// prev_value is the group size for level 0, followed by the previous bitmap.
let old_bitmap = &prev_value[1..];
DeCboRoaringBitmapCodec::merge_deladd_into(
value,
old_bitmap,
&mut buffer,
&mut tmp_buffer,
)?;
CboRoaringBitmapCodec::merge_deladd_into(value, old_bitmap, &mut buffer)?;
}
None => {
// it is safe to ignore the del in that case.
@@ -182,7 +176,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
};
let new_bitmap = &buffer[1..];
// if the new bitmap is empty, let's remove it
if DeCboRoaringBitmapLenCodec::bytes_decode(new_bitmap).unwrap_or_default() == 0 {
if CboRoaringBitmapLenCodec::bytes_decode(new_bitmap).unwrap_or_default() == 0 {
database.delete(wtxn, key)?;
} else {
database.put(wtxn, key, &buffer)?;

View File

@@ -15,8 +15,8 @@ use crate::heed_codec::BytesRefCodec;
use crate::search::facet::get_highest_level;
use crate::update::del_add::DelAdd;
use crate::update::index_documents::valid_lmdb_key;
use crate::update::MergeDeladdDeCboRoaringBitmaps;
use crate::{DeCboRoaringBitmapCodec, Index, Result};
use crate::update::MergeDeladdCboRoaringBitmaps;
use crate::{CboRoaringBitmapCodec, Index, Result};
/// Enum used as a return value for the facet incremental indexing.
///
@@ -58,14 +58,14 @@ enum ModificationResult {
/// `facet_id_(string/f64)_docids` databases.
pub struct FacetsUpdateIncremental {
inner: FacetsUpdateIncrementalInner,
delta_data: Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>,
delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
}
impl FacetsUpdateIncremental {
pub fn new(
index: &Index,
facet_type: FacetType,
delta_data: Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>,
delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
group_size: u8,
min_level_size: u8,
max_group_size: u8,
@@ -112,13 +112,13 @@ impl FacetsUpdateIncremental {
let value = KvReader::from_slice(value);
let docids_to_delete = value
.get(DelAdd::Deletion)
.map(DeCboRoaringBitmapCodec::bytes_decode)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.map_err(heed::Error::Encoding))
.transpose()?;
let docids_to_add = value
.get(DelAdd::Addition)
.map(DeCboRoaringBitmapCodec::bytes_decode)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.map_err(heed::Error::Encoding))
.transpose()?;

View File

@@ -90,7 +90,7 @@ use tracing::debug;
use self::incremental::FacetsUpdateIncremental;
use super::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdDeCboRoaringBitmaps};
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
use crate::facet::FacetType;
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
@@ -112,7 +112,7 @@ pub struct FacetsUpdate<'i> {
index: &'i Index,
database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
facet_type: FacetType,
delta_data: Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>,
delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
normalized_delta_data: Option<Merger<BufReader<File>, MergeDeladdBtreesetString>>,
group_size: u8,
max_group_size: u8,
@@ -124,7 +124,7 @@ impl<'i> FacetsUpdate<'i> {
pub fn new(
index: &'i Index,
facet_type: FacetType,
delta_data: Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>,
delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
normalized_delta_data: Option<Merger<BufReader<File>, MergeDeladdBtreesetString>>,
data_size: u64,
) -> Self {
@@ -364,9 +364,9 @@ pub(crate) mod test_helpers {
use crate::search::facet::get_highest_level;
use crate::snapshot_tests::display_bitmap;
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::index_documents::MergeDeladdDeCboRoaringBitmaps;
use crate::update::index_documents::MergeDeladdCboRoaringBitmaps;
use crate::update::FacetsUpdateIncrementalInner;
use crate::DeCboRoaringBitmapCodec;
use crate::CboRoaringBitmapCodec;
/// Utility function to generate a string whose position in a lexicographically
/// ordered list is `i`.
@@ -496,13 +496,13 @@ pub(crate) mod test_helpers {
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key).unwrap();
let mut inner_writer = KvWriterDelAdd::memory();
let value = DeCboRoaringBitmapCodec::bytes_encode(docids).unwrap();
let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
inner_writer.insert(DelAdd::Addition, value).unwrap();
writer.insert(&key, inner_writer.into_inner().unwrap()).unwrap();
}
writer.finish().unwrap();
let reader = grenad::Reader::new(std::io::Cursor::new(new_data)).unwrap();
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
builder.push(reader.into_cursor().unwrap());
let merger = builder.build();

View File

@@ -4,7 +4,7 @@ use std::io::{self, BufReader};
use heed::{BytesDecode, BytesEncode};
use super::helpers::{
create_sorter, sorter_into_reader, GrenadParameters, MergeDeladdDeCboRoaringBitmaps,
create_sorter, sorter_into_reader, GrenadParameters, MergeDeladdCboRoaringBitmaps,
};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec,
@@ -27,7 +27,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
let mut facet_number_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,

View File

@@ -14,7 +14,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::helpers::{
MergeDeladdBtreesetString, MergeDeladdDeCboRoaringBitmaps,
MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
};
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
@@ -54,7 +54,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
let mut facet_string_docids_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,
@@ -154,7 +154,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
let mut facet_string_docids_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,

View File

@@ -19,7 +19,7 @@ use crate::facet::value_encoding::f64_into_bytes;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::{DeCboRoaringBitmapCodec, DocumentId, FieldId, Result, MAX_FACET_VALUE_LENGTH};
use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, MAX_FACET_VALUE_LENGTH};
/// The length of the elements that are always in the buffer when inserting new values.
const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
@@ -311,8 +311,8 @@ fn deladd_obkv_cbo_roaring_bitmaps(
) -> io::Result<()> {
buffer.clear();
let mut obkv = KvWriterDelAdd::new(buffer);
let del_bitmap_bytes = DeCboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
let add_bitmap_bytes = DeCboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
let del_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
let add_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
obkv.insert(DelAdd::Deletion, del_bitmap_bytes)?;
obkv.insert(DelAdd::Addition, add_bitmap_bytes)?;
obkv.finish()

View File

@@ -5,7 +5,7 @@ use obkv::KvReaderU16;
use super::helpers::{
create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
};
use crate::error::SerializationError;
use crate::index::db_name::DOCID_WORD_POSITIONS;
@@ -30,7 +30,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
let mut fid_word_count_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,

View File

@@ -7,7 +7,7 @@ use obkv::KvReaderU16;
use super::helpers::{
create_sorter, create_writer, try_split_array_at, writer_into_reader, GrenadParameters,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
};
use crate::error::SerializationError;
use crate::heed_codec::StrBEU16Codec;
@@ -38,7 +38,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
let mut word_fid_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,
@@ -93,7 +93,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
let mut word_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,
@@ -103,7 +103,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
let mut exact_word_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,
@@ -166,7 +166,7 @@ fn words_into_sorter(
key_buffer: &mut Vec<u8>,
del_words: &BTreeSet<Vec<u8>>,
add_words: &BTreeSet<Vec<u8>>,
word_fid_docids_sorter: &mut grenad::Sorter<MergeDeladdDeCboRoaringBitmaps>,
word_fid_docids_sorter: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
) -> Result<()> {
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};

View File

@@ -7,7 +7,7 @@ use obkv::KvReaderU16;
use super::helpers::{
create_sorter, create_writer, try_split_array_at, writer_into_reader, GrenadParameters,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
};
use crate::error::SerializationError;
use crate::index::db_name::DOCID_WORD_POSITIONS;
@@ -44,7 +44,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
.map(|_| {
create_sorter(
grenad::SortAlgorithm::Unstable,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,
@@ -198,7 +198,7 @@ fn document_word_positions_into_sorter(
document_id: DocumentId,
del_word_pair_proximity: &BTreeMap<(String, String), u8>,
add_word_pair_proximity: &BTreeMap<(String, String), u8>,
word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeDeladdDeCboRoaringBitmaps>],
word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeDeladdCboRoaringBitmaps>],
) -> Result<()> {
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};

View File

@@ -6,7 +6,7 @@ use obkv::KvReaderU16;
use super::helpers::{
create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
};
use crate::error::SerializationError;
use crate::index::db_name::DOCID_WORD_POSITIONS;
@@ -28,7 +28,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
let mut word_position_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
indexer.max_nb_chunks,
@@ -100,7 +100,7 @@ fn words_position_into_sorter(
key_buffer: &mut Vec<u8>,
del_word_positions: &BTreeSet<(u16, Vec<u8>)>,
add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
word_position_docids_sorter: &mut grenad::Sorter<MergeDeladdDeCboRoaringBitmaps>,
word_position_docids_sorter: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
) -> Result<()> {
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};

View File

@@ -7,7 +7,7 @@ use either::Either;
use grenad::MergeFunction;
use roaring::RoaringBitmap;
use crate::heed_codec::DeCboRoaringBitmapCodec;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::transform::Operation;
use crate::Result;
@@ -189,10 +189,10 @@ impl MergeFunction for ObkvsKeepLastAdditionMergeDeletions {
}
}
/// Do a union of all the DeCboRoaringBitmaps in the values.
pub struct MergeDeCboRoaringBitmaps;
/// Do a union of all the CboRoaringBitmaps in the values.
pub struct MergeCboRoaringBitmaps;
impl MergeFunction for MergeDeCboRoaringBitmaps {
impl MergeFunction for MergeCboRoaringBitmaps {
type Error = crate::Error;
fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
@@ -200,17 +200,17 @@ impl MergeFunction for MergeDeCboRoaringBitmaps {
Ok(values[0].clone())
} else {
let mut vec = Vec::new();
DeCboRoaringBitmapCodec::merge_into(values, &mut vec)?;
CboRoaringBitmapCodec::merge_into(values, &mut vec)?;
Ok(Cow::from(vec))
}
}
}
/// Do a union of DeCboRoaringBitmaps on both sides of a DelAdd obkv
/// Do a union of CboRoaringBitmaps on both sides of a DelAdd obkv
/// separately and outputs a new DelAdd with both unions.
pub struct MergeDeladdDeCboRoaringBitmaps;
pub struct MergeDeladdCboRoaringBitmaps;
impl MergeFunction for MergeDeladdDeCboRoaringBitmaps {
impl MergeFunction for MergeDeladdCboRoaringBitmaps {
type Error = crate::Error;
fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
@@ -232,10 +232,10 @@ impl MergeFunction for MergeDeladdDeCboRoaringBitmaps {
let mut output_deladd_obkv = KvWriterDelAdd::memory();
let mut buffer = Vec::new();
DeCboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
CboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?;
buffer.clear();
DeCboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
CboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
output_deladd_obkv.insert(DelAdd::Addition, &buffer)?;
output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
}
@@ -246,16 +246,15 @@ impl MergeFunction for MergeDeladdDeCboRoaringBitmaps {
///
/// The first argument is the DelAdd obkv of CboRoaringBitmaps and
/// the second one is the CboRoaringBitmap to merge into.
pub fn merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
deladd_obkv: &[u8],
previous: &[u8],
buffer: &'a mut Vec<u8>,
) -> Result<Option<&'a [u8]>> {
Ok(DeCboRoaringBitmapCodec::merge_deladd_into(
Ok(CboRoaringBitmapCodec::merge_deladd_into(
KvReaderDelAdd::from_slice(deladd_obkv),
previous,
buffer,
&mut Vec::new(), // tmp_buffer
)?)
}

View File

@@ -40,7 +40,7 @@ use crate::update::{
};
use crate::vector::db::EmbedderInfo;
use crate::vector::{RuntimeEmbedders, VectorStore};
use crate::{DeCboRoaringBitmapCodec, Index, Result, UserError};
use crate::{CboRoaringBitmapCodec, Index, Result, UserError};
static MERGED_DATABASE_COUNT: usize = 7;
static PREFIX_DATABASE_COUNT: usize = 4;
@@ -415,7 +415,7 @@ where
let cloneable_chunk =
unsafe { as_cloneable_grenad(&word_docids_reader)? };
let word_docids = word_docids.get_or_insert_with(|| {
MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps)
MergerBuilder::new(MergeDeladdCboRoaringBitmaps)
});
word_docids.push(cloneable_chunk.into_cursor()?);
let cloneable_chunk =
@@ -423,14 +423,14 @@ where
let exact_word_docids =
exact_word_docids.get_or_insert_with(|| {
MergerBuilder::new(
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
)
});
exact_word_docids.push(cloneable_chunk.into_cursor()?);
let cloneable_chunk =
unsafe { as_cloneable_grenad(&word_fid_docids_reader)? };
let word_fid_docids = word_fid_docids.get_or_insert_with(|| {
MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps)
MergerBuilder::new(MergeDeladdCboRoaringBitmaps)
});
word_fid_docids.push(cloneable_chunk.into_cursor()?);
TypedChunk::WordDocids {
@@ -444,7 +444,7 @@ where
let word_position_docids =
word_position_docids.get_or_insert_with(|| {
MergerBuilder::new(
MergeDeladdDeCboRoaringBitmaps,
MergeDeladdCboRoaringBitmaps,
)
});
word_position_docids.push(cloneable_chunk.into_cursor()?);
@@ -577,10 +577,10 @@ where
)]
pub fn execute_prefix_databases(
self,
word_docids: Option<Merger<CursorClonableMmap, MergeDeladdDeCboRoaringBitmaps>>,
exact_word_docids: Option<Merger<CursorClonableMmap, MergeDeladdDeCboRoaringBitmaps>>,
word_position_docids: Option<Merger<CursorClonableMmap, MergeDeladdDeCboRoaringBitmaps>>,
word_fid_docids: Option<Merger<CursorClonableMmap, MergeDeladdDeCboRoaringBitmaps>>,
word_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
exact_word_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
word_position_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
word_fid_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
) -> Result<()>
where
FP: Fn(UpdateIndexingStep) + Sync,
@@ -778,9 +778,9 @@ where
)]
fn execute_word_prefix_docids(
txn: &mut heed::RwTxn<'_>,
merger: Merger<CursorClonableMmap, MergeDeladdDeCboRoaringBitmaps>,
word_docids_db: Database<Str, DeCboRoaringBitmapCodec>,
word_prefix_docids_db: Database<Str, DeCboRoaringBitmapCodec>,
merger: Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>,
word_docids_db: Database<Str, CboRoaringBitmapCodec>,
word_prefix_docids_db: Database<Str, CboRoaringBitmapCodec>,
indexer_config: &IndexerConfig,
new_prefix_fst_words: &[String],
common_prefix_fst_words: &[&[String]],
@@ -1292,7 +1292,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
// testing the simple query search
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("document");
search.terms_matching_strategy(TermsMatchingStrategy::default());
// all documents should be returned
@@ -1333,7 +1333,7 @@ mod tests {
assert!(documents_ids.is_empty()); // nested is not searchable
// testing the filters
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.filter(crate::Filter::from_str(r#"title = "The first document""#).unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![1]);
@@ -1358,6 +1358,7 @@ mod tests {
#[test]
fn index_documents_with_nested_primary_key() {
let index = TempIndex::new();
let progress = Progress::default();
index
.update_settings(|settings| {
@@ -1397,7 +1398,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
// testing the simple query search
let mut search = crate::Search::new(&rtxn, &index);
let mut search = crate::Search::new(&rtxn, &index, &progress);
search.query("document");
search.terms_matching_strategy(TermsMatchingStrategy::default());
// all documents should be returned
@@ -1453,6 +1454,7 @@ mod tests {
#[test]
fn test_facets_generation() {
let index = TempIndex::new();
let progress = Progress::default();
index
.add_documents(documents!([
@@ -1507,7 +1509,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
let mut search = crate::Search::new(&rtxn, &index);
let mut search = crate::Search::new(&rtxn, &index, &progress);
let filter = format!(r#""dog.race.bernese mountain" = {s}"#);
search.filter(crate::Filter::from_str(&filter).unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
@@ -1545,7 +1547,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
let mut search = crate::Search::new(&rtxn, &index, &progress);
search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
"dog.race.bernese mountain",
)))]);
@@ -2911,7 +2913,6 @@ mod tests {
]
*/
let index = TempIndex::new();
// START OF BATCH
println!("--- ENTERING BATCH 1");
@@ -3601,6 +3602,7 @@ mod tests {
#[test]
fn delete_words_exact_attributes() {
let index = TempIndex::new();
let progress = Progress::default();
index
.update_settings(|settings| {
@@ -3639,7 +3641,7 @@ mod tests {
let words = index.words_fst(&txn).unwrap().into_stream().into_strs().unwrap();
insta::assert_snapshot!(format!("{words:?}"), @r###"["hello"]"###);
let mut s = Search::new(&txn, &index);
let mut s = Search::new(&txn, &index, &progress);
s.query("hello");
let crate::SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");

View File

@@ -12,8 +12,8 @@ use obkv::{KvReader, KvWriter};
use roaring::RoaringBitmap;
use super::helpers::{
self, merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
CursorClonableMmap, KeepFirst, MergeDeladdBtreesetString, MergeDeladdDeCboRoaringBitmaps,
self, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
CursorClonableMmap, KeepFirst, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
MergeIgnoreValues,
};
use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
@@ -29,7 +29,7 @@ use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
use crate::vector::VectorStore;
use crate::{
lat_lng_to_xyz, DeCboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
Result, SerializationError, U8StrStrCodec, UserError,
};
@@ -241,7 +241,7 @@ pub(crate) fn write_typed_chunk_into_index(
tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids");
let _entered = span.enter();
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
for typed_chunk in typed_chunks {
let TypedChunk::FieldIdWordCountDocids(chunk) = typed_chunk else {
unreachable!();
@@ -256,7 +256,7 @@ pub(crate) fn write_typed_chunk_into_index(
&index.field_id_word_count_docids,
wtxn,
deladd_serialize_add_side,
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
)?;
is_merged_database = true;
}
@@ -264,9 +264,9 @@ pub(crate) fn write_typed_chunk_into_index(
let span = tracing::trace_span!(target: "indexing::write_db", "word_docids");
let _entered = span.enter();
let mut word_docids_builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut exact_word_docids_builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut word_fid_docids_builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut word_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
let mut exact_word_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
let mut word_fid_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
let mut fst_merger_builder = MergerBuilder::new(MergeIgnoreValues);
for typed_chunk in typed_chunks {
let TypedChunk::WordDocids {
@@ -291,7 +291,7 @@ pub(crate) fn write_typed_chunk_into_index(
&index.word_docids,
wtxn,
deladd_serialize_add_side,
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
)?;
let exact_word_docids_merger = exact_word_docids_builder.build();
@@ -300,7 +300,7 @@ pub(crate) fn write_typed_chunk_into_index(
&index.exact_word_docids,
wtxn,
deladd_serialize_add_side,
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
)?;
let word_fid_docids_merger = word_fid_docids_builder.build();
@@ -309,7 +309,7 @@ pub(crate) fn write_typed_chunk_into_index(
&index.word_fid_docids,
wtxn,
deladd_serialize_add_side,
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
)?;
// create fst from word docids
@@ -329,7 +329,7 @@ pub(crate) fn write_typed_chunk_into_index(
let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids");
let _entered = span.enter();
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
for typed_chunk in typed_chunks {
let TypedChunk::WordPositionDocids(chunk) = typed_chunk else {
unreachable!();
@@ -344,7 +344,7 @@ pub(crate) fn write_typed_chunk_into_index(
&index.word_position_docids,
wtxn,
deladd_serialize_add_side,
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
)?;
is_merged_database = true;
}
@@ -353,7 +353,7 @@ pub(crate) fn write_typed_chunk_into_index(
tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids");
let _entered = span.enter();
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
let mut data_size = 0;
for typed_chunk in typed_chunks {
let TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids) = typed_chunk
@@ -375,7 +375,7 @@ pub(crate) fn write_typed_chunk_into_index(
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids");
let _entered = span.enter();
let mut facet_id_string_builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut facet_id_string_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
let mut normalized_facet_id_string_builder =
MergerBuilder::new(MergeDeladdBtreesetString);
let mut data_size = 0;
@@ -411,7 +411,7 @@ pub(crate) fn write_typed_chunk_into_index(
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids");
let _entered = span.enter();
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
for typed_chunk in typed_chunks {
let TypedChunk::FieldIdFacetExistsDocids(chunk) = typed_chunk else {
unreachable!();
@@ -426,7 +426,7 @@ pub(crate) fn write_typed_chunk_into_index(
&index.facet_id_exists_docids,
wtxn,
deladd_serialize_add_side,
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
)?;
is_merged_database = true;
}
@@ -435,7 +435,7 @@ pub(crate) fn write_typed_chunk_into_index(
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids");
let _entered = span.enter();
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
for typed_chunk in typed_chunks {
let TypedChunk::FieldIdFacetIsNullDocids(chunk) = typed_chunk else {
unreachable!();
@@ -450,7 +450,7 @@ pub(crate) fn write_typed_chunk_into_index(
&index.facet_id_is_null_docids,
wtxn,
deladd_serialize_add_side,
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
)?;
is_merged_database = true;
}
@@ -458,7 +458,7 @@ pub(crate) fn write_typed_chunk_into_index(
let span = tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_empty_docids");
let _entered = span.enter();
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
for typed_chunk in typed_chunks {
let TypedChunk::FieldIdFacetIsEmptyDocids(chunk) = typed_chunk else {
unreachable!();
@@ -473,7 +473,7 @@ pub(crate) fn write_typed_chunk_into_index(
&index.facet_id_is_empty_docids,
wtxn,
deladd_serialize_add_side,
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
)?;
is_merged_database = true;
}
@@ -482,7 +482,7 @@ pub(crate) fn write_typed_chunk_into_index(
tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids");
let _entered = span.enter();
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
for typed_chunk in typed_chunks {
let TypedChunk::WordPairProximityDocids(chunk) = typed_chunk else {
unreachable!();
@@ -504,7 +504,7 @@ pub(crate) fn write_typed_chunk_into_index(
&index.word_pair_proximity_docids,
wtxn,
deladd_serialize_add_side,
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
)?;
}
@@ -866,7 +866,7 @@ where
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
fn write_proximity_entries_into_database_additional_searchables<R, MF>(
merger: Merger<R, MF>,
database: &heed::Database<U8StrStrCodec, DeCboRoaringBitmapCodec>,
database: &heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
wtxn: &mut RwTxn<'_>,
) -> Result<()>
where
@@ -881,7 +881,7 @@ where
U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
let data_to_insert = match KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
Some(value) => {
DeCboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
}
None => continue,
};

View File

@@ -27,7 +27,7 @@ use crate::index::db_name;
use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
use crate::update::new::KvReaderFieldId;
use crate::vector::Embedding;
use crate::{DeCboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
/// Note that the FrameProducer requires up to 9 bytes to
/// encode the length, the max grant has been computed accordingly.
@@ -971,9 +971,7 @@ pub struct WordDocidsSender<'a, 'b, D> {
impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
let mut tmp_buffer = Vec::new();
let value_length =
DeCboRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, &mut tmp_buffer);
let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| {
InternalError::StorePut {
database_name: D::DATABASE.database_name(),
@@ -988,10 +986,7 @@ impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
value_length,
|key_buffer, value_buffer| {
key_buffer.copy_from_slice(key);
DeCboRoaringBitmapCodec::serialize_into(
bitmap,
&mut io::Cursor::new(value_buffer),
)?;
CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_buffer)?;
Ok(())
},
)
@@ -1012,9 +1007,7 @@ impl FacetDocidsSender<'_, '_> {
let (facet_kind, key) = FacetKind::extract_from_key(key);
let database = Database::from(facet_kind);
let mut tmp_buffer = Vec::new();
let value_length =
DeCboRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, &mut tmp_buffer);
let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
let value_length = match facet_kind {
// We must take the facet group size into account
// when we serialize strings and numbers.
@@ -1048,7 +1041,7 @@ impl FacetDocidsSender<'_, '_> {
FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_out,
};
DeCboRoaringBitmapCodec::serialize_into(bitmap, &mut io::Cursor::new(value_out))?;
CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
Ok(())
},

View File

@@ -81,8 +81,8 @@ use rustc_hash::FxBuildHasher;
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::new::thread_local::MostlySend;
use crate::update::new::KvReaderDelAdd;
use crate::update::MergeDeladdDeCboRoaringBitmaps;
use crate::{DeCboRoaringBitmapCodec, Result};
use crate::update::MergeDeladdCboRoaringBitmaps;
use crate::{CboRoaringBitmapCodec, Result};
/// A cache that stores bytes keys associated to CboDelAddRoaringBitmaps.
///
@@ -320,10 +320,9 @@ struct SpillingCaches<'extractor> {
&'extractor Bump,
>,
>,
spilled_entries: Vec<grenad::Sorter<MergeDeladdDeCboRoaringBitmaps>>,
spilled_entries: Vec<grenad::Sorter<MergeDeladdCboRoaringBitmaps>>,
deladd_buffer: Vec<u8>,
cbo_buffer: Vec<u8>,
tmp_buffer: Vec<u32>,
}
impl<'extractor> SpillingCaches<'extractor> {
@@ -339,7 +338,7 @@ impl<'extractor> SpillingCaches<'extractor> {
) -> SpillingCaches<'extractor> {
SpillingCaches {
spilled_entries: iter::repeat_with(|| {
let mut builder = grenad::SorterBuilder::new(MergeDeladdDeCboRoaringBitmaps);
let mut builder = grenad::SorterBuilder::new(MergeDeladdCboRoaringBitmaps);
builder.dump_threshold(0);
builder.allow_realloc(false);
builder.build()
@@ -349,7 +348,6 @@ impl<'extractor> SpillingCaches<'extractor> {
caches,
deladd_buffer: Vec::new(),
cbo_buffer: Vec::new(),
tmp_buffer: Vec::new(),
}
}
@@ -372,7 +370,6 @@ impl<'extractor> SpillingCaches<'extractor> {
&mut self.spilled_entries[bucket],
&mut self.deladd_buffer,
&mut self.cbo_buffer,
&mut self.tmp_buffer,
key,
DelAddRoaringBitmap::new_del_u32(n),
),
@@ -398,7 +395,6 @@ impl<'extractor> SpillingCaches<'extractor> {
&mut self.spilled_entries[bucket],
&mut self.deladd_buffer,
&mut self.cbo_buffer,
&mut self.tmp_buffer,
key,
DelAddRoaringBitmap::new_add_u32(n),
),
@@ -412,10 +408,9 @@ fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize {
}
fn spill_entry_to_sorter(
spilled_entries: &mut grenad::Sorter<MergeDeladdDeCboRoaringBitmaps>,
spilled_entries: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
deladd_buffer: &mut Vec<u8>,
cbo_buffer: &mut Vec<u8>,
tmp_buffer: &mut Vec<u32>,
key: &[u8],
deladd: DelAddRoaringBitmap,
) -> Result<()> {
@@ -425,21 +420,21 @@ fn spill_entry_to_sorter(
match deladd {
DelAddRoaringBitmap { del: Some(del), add: None } => {
cbo_buffer.clear();
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&del, cbo_buffer, tmp_buffer)?;
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
}
DelAddRoaringBitmap { del: None, add: Some(add) } => {
cbo_buffer.clear();
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&add, cbo_buffer, tmp_buffer)?;
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
}
DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
cbo_buffer.clear();
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&del, cbo_buffer, tmp_buffer)?;
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
cbo_buffer.clear();
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&add, cbo_buffer, tmp_buffer)?;
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
}
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
@@ -642,22 +637,15 @@ pub struct DelAddRoaringBitmap {
impl DelAddRoaringBitmap {
fn from_bytes(bytes: &[u8]) -> io::Result<DelAddRoaringBitmap> {
let mut tmp_buffer = Vec::new();
let reader = KvReaderDelAdd::from_slice(bytes);
let del = match reader.get(DelAdd::Deletion) {
Some(bytes) => {
DeCboRoaringBitmapCodec::deserialize_from_with_tmp_buffer(bytes, &mut tmp_buffer)
.map(Some)?
}
Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
None => None,
};
let add = match reader.get(DelAdd::Addition) {
Some(bytes) => {
DeCboRoaringBitmapCodec::deserialize_from_with_tmp_buffer(bytes, &mut tmp_buffer)
.map(Some)?
}
Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
None => None,
};

View File

@@ -14,7 +14,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
use crate::heed_codec::BytesRefCodec;
use crate::update::facet::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
use crate::update::{create_writer, writer_into_reader};
use crate::{DeCboRoaringBitmapCodec, FieldId, Index};
use crate::{CboRoaringBitmapCodec, FieldId, Index};
/// Generate the facet level based on the level 0.
///
@@ -123,7 +123,7 @@ fn compute_level(
ser_buffer.push(group_len);
let group_docids = mem::take(&mut group_docids);
let docids = group_docids.into_iter().union();
DeCboRoaringBitmapCodec::serialize_into(&docids, &mut ser_buffer)?;
CboRoaringBitmapCodec::serialize_into_vec(&docids, &mut ser_buffer);
writer.insert(left_bound, &ser_buffer)?;
}
left_bound = Some(key.left_bound);
@@ -142,7 +142,7 @@ fn compute_level(
let group_len: u8 = group_docids.len().try_into().unwrap();
ser_buffer.push(group_len);
let group_docids = group_docids.into_iter().union();
DeCboRoaringBitmapCodec::serialize_into(&group_docids, &mut ser_buffer)?;
CboRoaringBitmapCodec::serialize_into_vec(&group_docids, &mut ser_buffer);
writer.insert(left_bound, &ser_buffer)?;
}

View File

@@ -14,7 +14,7 @@ use super::extract::{
};
use crate::update::facet::new_incremental::FacetFieldIdChange;
use crate::update::new::extract::cellulite::GeoJsonExtractorData;
use crate::{DeCboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
pub fn merge_and_send_rtree<'extractor, MSP>(
@@ -106,7 +106,7 @@ where
}
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
let current = database.get(&rtxn, key)?;
match merge_de_cbo_bitmaps(current, del, add)? {
match merge_cbo_bitmaps(current, del, add)? {
Operation::Write(bitmap) => docids_sender.write(key, &bitmap),
Operation::Delete => docids_sender.delete(key),
Operation::Ignore => Ok(()),
@@ -134,8 +134,8 @@ pub fn merge_and_send_facet_docids(
FacetFieldIdsDelta::new(max_string_count, max_number_count);
let rtxn = index.read_txn()?;
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
let current = database.get_de_cbo_roaring_bytes_value(&rtxn, key)?;
match merge_de_cbo_bitmaps(current, del, add)? {
let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
match merge_cbo_bitmaps(current, del, add)? {
Operation::Write(bitmap) => {
facet_field_ids_delta.register_from_key(key);
docids_sender.write(key, &bitmap)?;
@@ -166,7 +166,7 @@ impl<'a> FacetDatabases<'a> {
Self { index }
}
fn get_de_cbo_roaring_bytes_value<'t>(
fn get_cbo_roaring_bytes_value<'t>(
&self,
rtxn: &'t RoTxn<'_>,
key: &[u8],
@@ -320,12 +320,12 @@ enum Operation {
}
/// A function that merges the DelAdd CboRoaringBitmaps with the current bitmap.
fn merge_de_cbo_bitmaps(
fn merge_cbo_bitmaps(
current: Option<&[u8]>,
del: Option<RoaringBitmap>,
add: Option<RoaringBitmap>,
) -> Result<Operation> {
let current = current.map(DeCboRoaringBitmapCodec::deserialize_from).transpose()?;
let current = current.map(CboRoaringBitmapCodec::deserialize_from).transpose()?;
match (current, del, add) {
(None, None, None) => Ok(Operation::Ignore), // but it's strange
(None, None, Some(add)) => Ok(Operation::Write(add)),

View File

@@ -14,12 +14,12 @@ use thread_local::ThreadLocal;
use super::ref_cell_ext::RefCellExt as _;
use crate::heed_codec::StrBEU16Codec;
use crate::update::GrenadParameters;
use crate::{DeCboRoaringBitmapCodec, Index, Prefix, Result};
use crate::{CboRoaringBitmapCodec, Index, Prefix, Result};
struct WordPrefixDocids<'i> {
index: &'i Index,
database: Database<Bytes, DeCboRoaringBitmapCodec>,
prefix_database: Database<Bytes, DeCboRoaringBitmapCodec>,
database: Database<Bytes, CboRoaringBitmapCodec>,
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
max_memory_by_thread: Option<usize>,
/// Do not use an experimental LMDB feature to read uncommitted data in parallel.
no_experimental_post_processing: bool,
@@ -28,8 +28,8 @@ struct WordPrefixDocids<'i> {
impl<'i> WordPrefixDocids<'i> {
fn new(
index: &'i Index,
database: Database<Bytes, DeCboRoaringBitmapCodec>,
prefix_database: Database<Bytes, DeCboRoaringBitmapCodec>,
database: Database<Bytes, CboRoaringBitmapCodec>,
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
grenad_parameters: &GrenadParameters,
) -> WordPrefixDocids<'i> {
WordPrefixDocids {
@@ -87,12 +87,12 @@ impl<'i> WordPrefixDocids<'i> {
let output = self
.database
.prefix_iter(&rtxn, prefix.as_bytes())?
.remap_types::<Str, DeCboRoaringBitmapCodec>()
.remap_types::<Str, CboRoaringBitmapCodec>()
.map(|result| result.map(|(_word, bitmap)| bitmap))
.union()?;
buffer.clear();
DeCboRoaringBitmapCodec::serialize_into(&output, &mut buffer)?;
CboRoaringBitmapCodec::serialize_into_vec(&output, &mut buffer);
indexes.push(PrefixEntry { prefix, serialized_length: buffer.len() });
file.write_all(&buffer)?;
}
@@ -150,11 +150,11 @@ impl<'i> WordPrefixDocids<'i> {
.bitmaps(prefix)
.unwrap()
.iter()
.map(|bytes| DeCboRoaringBitmapCodec::deserialize_from(bytes))
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
.union()?;
buffer.clear();
DeCboRoaringBitmapCodec::serialize_into(&output, buffer)?;
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
index.push(PrefixEntry { prefix, serialized_length: buffer.len() });
file.write_all(buffer)
})?;
@@ -203,7 +203,7 @@ struct FrozenPrefixBitmaps<'a, 'rtxn> {
impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
pub fn from_prefixes(
database: Database<Bytes, DeCboRoaringBitmapCodec>,
database: Database<Bytes, CboRoaringBitmapCodec>,
rtxn: &'rtxn RoTxn,
prefixes: &'a BTreeSet<Prefix>,
) -> heed::Result<Self> {
@@ -231,8 +231,8 @@ unsafe impl Sync for FrozenPrefixBitmaps<'_, '_> {}
struct WordPrefixIntegerDocids<'i> {
index: &'i Index,
database: Database<Bytes, DeCboRoaringBitmapCodec>,
prefix_database: Database<Bytes, DeCboRoaringBitmapCodec>,
database: Database<Bytes, CboRoaringBitmapCodec>,
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
max_memory_by_thread: Option<usize>,
/// Do not use an experimental LMDB feature to read uncommitted data in parallel.
no_experimental_post_processing: bool,
@@ -241,8 +241,8 @@ struct WordPrefixIntegerDocids<'i> {
impl<'i> WordPrefixIntegerDocids<'i> {
fn new(
index: &'i Index,
database: Database<Bytes, DeCboRoaringBitmapCodec>,
prefix_database: Database<Bytes, DeCboRoaringBitmapCodec>,
database: Database<Bytes, CboRoaringBitmapCodec>,
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
grenad_parameters: &'_ GrenadParameters,
) -> WordPrefixIntegerDocids<'i> {
WordPrefixIntegerDocids {
@@ -338,10 +338,10 @@ impl<'i> WordPrefixIntegerDocids<'i> {
} else {
let output = bitmaps_bytes
.into_iter()
.map(DeCboRoaringBitmapCodec::deserialize_from)
.map(CboRoaringBitmapCodec::deserialize_from)
.union()?;
buffer.clear();
DeCboRoaringBitmapCodec::serialize_into(&output, &mut buffer)?;
CboRoaringBitmapCodec::serialize_into_vec(&output, &mut buffer);
indexes.push(PrefixIntegerEntry {
prefix,
pos,
@@ -419,10 +419,10 @@ impl<'i> WordPrefixIntegerDocids<'i> {
} else {
let output = bitmaps_bytes
.iter()
.map(|bytes| DeCboRoaringBitmapCodec::deserialize_from(bytes))
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
.union()?;
buffer.clear();
DeCboRoaringBitmapCodec::serialize_into(&output, buffer)?;
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
index.push(PrefixIntegerEntry {
prefix,
pos,
@@ -486,7 +486,7 @@ struct FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
pub fn from_prefixes(
database: Database<Bytes, DeCboRoaringBitmapCodec>,
database: Database<Bytes, CboRoaringBitmapCodec>,
rtxn: &'rtxn RoTxn,
prefixes: &'a BTreeSet<Prefix>,
) -> heed::Result<Self> {
@@ -516,7 +516,7 @@ unsafe impl Sync for FrozenPrefixIntegerBitmaps<'_, '_> {}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
fn delete_prefixes(
wtxn: &mut RwTxn,
prefix_database: &Database<Bytes, DeCboRoaringBitmapCodec>,
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
prefixes: &BTreeSet<Prefix>,
) -> Result<()> {
// We remove all the entries that are no more required in this word prefix docids database.

Some files were not shown because too many files have changed in this diff Show More