mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-23 12:57:17 +00:00
Compare commits
42 Commits
log-search
...
delta-enco
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e8cdc2bd07 | ||
|
|
05f415f635 | ||
|
|
77608804fe | ||
|
|
60209bb6a8 | ||
|
|
20b772f6c1 | ||
|
|
9c6f9729b8 | ||
|
|
201d63d214 | ||
|
|
b9ad0432e3 | ||
|
|
566146fce8 | ||
|
|
e18760c735 | ||
|
|
a06949180f | ||
|
|
152b98d15d | ||
|
|
a773eb8fcd | ||
|
|
187fc80cc8 | ||
|
|
69136d5464 | ||
|
|
4791ce72b7 | ||
|
|
337394ffa0 | ||
|
|
704f1477b2 | ||
|
|
1fb1ade2bd | ||
|
|
19a88fef77 | ||
|
|
e71e0cb1aa | ||
|
|
7f87e37049 | ||
|
|
c904605f08 | ||
|
|
7c5dd02ffd | ||
|
|
7e8e6e787f | ||
|
|
b7737d6810 | ||
|
|
55546f75d6 | ||
|
|
92354d0e03 | ||
|
|
dc7fca4b47 | ||
|
|
a2af104cd0 | ||
|
|
4c666e9774 | ||
|
|
7a7e75fc41 | ||
|
|
9db2b16eed | ||
|
|
7d0633df22 | ||
|
|
055c65211f | ||
|
|
ae77ca9033 | ||
|
|
2138504df9 | ||
|
|
2ba3fafcc3 | ||
|
|
614affd0b1 | ||
|
|
8582975fc5 | ||
|
|
14db3dbcc4 | ||
|
|
a61ef955fc |
2
.github/workflows/test-suite.yml
vendored
2
.github/workflows/test-suite.yml
vendored
@@ -15,7 +15,7 @@ env:
|
||||
|
||||
jobs:
|
||||
test-linux:
|
||||
name: Tests on Ubuntu
|
||||
name: Tests on ${{ matrix.runner }} ${{ matrix.features }}
|
||||
runs-on: ${{ matrix.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
606
Cargo.lock
generated
606
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -172,8 +172,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
||||
|b, &query| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let progress = Progress::default();
|
||||
let mut search = index.search(&rtxn, &progress);
|
||||
let mut search = index.search(&rtxn);
|
||||
search
|
||||
.query(query)
|
||||
.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
@@ -153,8 +153,7 @@ fn main() {
|
||||
.unwrap();
|
||||
|
||||
// after executing a batch we check if the database is corrupted
|
||||
let progress = Progress::default();
|
||||
let res = index.search(&wtxn, &progress).execute().unwrap();
|
||||
let res = index.search(&wtxn).execute().unwrap();
|
||||
index.documents(&wtxn, res.documents_ids).unwrap();
|
||||
progression.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::fmt::Write;
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Details, Kind, Status, Task};
|
||||
use meilisearch_types::versioning::{self, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -188,7 +188,7 @@ pub fn snapshot_all_batches(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Batch>>)
|
||||
|
||||
pub fn snapshot_batches_to_tasks_mappings(
|
||||
rtxn: &RoTxn,
|
||||
db: Database<BEU32, CboRoaringBitmapCodec>,
|
||||
db: Database<BEU32, DeCboRoaringBitmapCodec>,
|
||||
) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
@@ -199,7 +199,7 @@ pub fn snapshot_batches_to_tasks_mappings(
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
|
||||
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, DeCboRoaringBitmapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::ops::{Bound, RangeBounds};
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
@@ -42,11 +42,11 @@ pub struct BatchQueue {
|
||||
/// Store the batches associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
pub(crate) enqueued_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
/// Store the batches containing finished tasks started at a specific date
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
pub(crate) started_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
pub(crate) finished_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl BatchQueue {
|
||||
|
||||
@@ -14,7 +14,7 @@ use std::time::Duration;
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::network::DbTaskNetwork;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -131,7 +131,7 @@ pub struct Queue {
|
||||
pub(crate) batches: batches::BatchQueue,
|
||||
|
||||
/// Matches a batch id with the associated task ids.
|
||||
pub(crate) batch_to_tasks_mapping: Database<BEU32, CboRoaringBitmapCodec>,
|
||||
pub(crate) batch_to_tasks_mapping: Database<BEU32, DeCboRoaringBitmapCodec>,
|
||||
|
||||
/// The list of files referenced by the tasks.
|
||||
pub(crate) file_store: FileStore,
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::milli::{DeCboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::network::DbTaskNetwork;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
@@ -44,11 +44,11 @@ pub struct TaskQueue {
|
||||
/// Store the tasks that were canceled by a task uid
|
||||
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
|
||||
/// Store the task ids of tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
pub(crate) enqueued_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
/// Store the task ids of finished tasks which started being processed at a specific date
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
pub(crate) started_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
/// Store the task ids of tasks which finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
pub(crate) finished_at: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl TaskQueue {
|
||||
|
||||
@@ -662,13 +662,8 @@ impl IndexScheduler {
|
||||
// 2. Get the task set for index = name that appeared before the index swap task
|
||||
let mut index_lhs_task_ids = self.queue.tasks.index_tasks(wtxn, lhs)?;
|
||||
index_lhs_task_ids.remove_range(task_id..);
|
||||
let index_rhs_task_ids = if rename {
|
||||
let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?;
|
||||
index_rhs_task_ids.remove_range(task_id..);
|
||||
index_rhs_task_ids
|
||||
} else {
|
||||
RoaringBitmap::new()
|
||||
};
|
||||
let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?;
|
||||
index_rhs_task_ids.remove_range(task_id..);
|
||||
|
||||
// 3. before_name -> new_name in the task's KindWithContent
|
||||
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
|
||||
|
||||
@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
|
||||
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
|
||||
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
|
||||
4 {uid: 4, batch_uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "b"), rename: false }, IndexSwap { indexes: ("c", "d"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b"), rename: false }, IndexSwap { indexes: ("c", "d"), rename: false }] }}
|
||||
5 {uid: 5, status: enqueued, details: { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -22,10 +22,10 @@ succeeded [0,1,2,3,4,]
|
||||
"indexSwap" [4,5,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
a [4,5,]
|
||||
b [0,1,4,]
|
||||
c [4,5,]
|
||||
d [2,3,4,]
|
||||
a [1,4,5,]
|
||||
b [0,4,]
|
||||
c [3,4,5,]
|
||||
d [2,4,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
a: { number_of_documents: 0, field_distribution: {} }
|
||||
|
||||
@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
|
||||
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
|
||||
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
|
||||
4 {uid: 4, batch_uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }}
|
||||
5 {uid: 5, batch_uid: 5, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -22,10 +22,10 @@ succeeded [0,1,2,3,4,5,]
|
||||
"indexSwap" [4,5,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
a [5,]
|
||||
b [0,1,4,]
|
||||
c [4,5,]
|
||||
d [2,3,4,]
|
||||
a [3,4,5,]
|
||||
b [0,4,]
|
||||
c [1,4,5,]
|
||||
d [2,4,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
a: { number_of_documents: 0, field_distribution: {} }
|
||||
|
||||
@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
|
||||
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
|
||||
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
|
||||
4 {uid: 4, batch_uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }}
|
||||
5 {uid: 5, batch_uid: 5, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }}
|
||||
6 {uid: 6, batch_uid: 6, status: succeeded, details: { swaps: [] }, kind: IndexSwap { swaps: [] }}
|
||||
@@ -23,10 +23,10 @@ succeeded [0,1,2,3,4,5,6,]
|
||||
"indexSwap" [4,5,6,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
a [5,]
|
||||
b [0,1,4,]
|
||||
c [4,5,]
|
||||
d [2,3,4,]
|
||||
a [3,4,5,]
|
||||
b [0,4,]
|
||||
c [1,4,5,]
|
||||
d [2,4,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
a: { number_of_documents: 0, field_distribution: {} }
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{
|
||||
index_creation_task, read_json, replace_document_import_task, sample_documents,
|
||||
};
|
||||
use crate::IndexScheduler;
|
||||
use crate::{IndexScheduler, Query};
|
||||
use big_s::S;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_auth::AuthFilter;
|
||||
@@ -404,6 +404,103 @@ fn swap_indexes() {
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swap_indexes_with_correct_task_allocations() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let to_enqueue = [index_creation_task("a", "id"), index_creation_task("b", "id")];
|
||||
|
||||
for task in to_enqueue {
|
||||
let _ = index_scheduler.register(task, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
|
||||
handle.advance_n_successful_batches(2);
|
||||
|
||||
let (file0, count0) = sample_documents(&index_scheduler, 1, 1);
|
||||
let (file1, count1) = sample_documents(&index_scheduler, 2, 2);
|
||||
let (file2, count2) = sample_documents(&index_scheduler, 3, 3);
|
||||
let (file3, count3) = sample_documents(&index_scheduler, 4, 4);
|
||||
|
||||
file0.persist().unwrap();
|
||||
file1.persist().unwrap();
|
||||
file2.persist().unwrap();
|
||||
file3.persist().unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(replace_document_import_task("a", Some("id"), 1, count0), None, false)
|
||||
.unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(replace_document_import_task("a", Some("id"), 2, count1), None, false)
|
||||
.unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(replace_document_import_task("b", Some("id"), 3, count2), None, false)
|
||||
.unwrap();
|
||||
|
||||
index_scheduler
|
||||
.register(replace_document_import_task("b", Some("id"), 4, count3), None, false)
|
||||
.unwrap();
|
||||
|
||||
handle.advance_n_successful_batches(2);
|
||||
|
||||
let (a_tasks, _) = index_scheduler
|
||||
.get_tasks_from_authorized_indexes(
|
||||
&Query { index_uids: Some(vec!["a".to_string()]), ..Default::default() },
|
||||
&AuthFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(a_tasks.len(), 3);
|
||||
|
||||
let (b_tasks, _) = index_scheduler
|
||||
.get_tasks_from_authorized_indexes(
|
||||
&Query { index_uids: Some(vec!["b".to_string()]), ..Default::default() },
|
||||
&AuthFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(b_tasks.len(), 3);
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("a".to_owned(), "b".to_owned()), rename: false }],
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
let (a_after_tasks, _) = index_scheduler
|
||||
.get_tasks_from_authorized_indexes(
|
||||
&Query { index_uids: Some(vec!["a".to_string()]), ..Default::default() },
|
||||
&AuthFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let (b_after_tasks, _) = index_scheduler
|
||||
.get_tasks_from_authorized_indexes(
|
||||
&Query { index_uids: Some(vec!["b".to_string()]), ..Default::default() },
|
||||
&AuthFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(a_after_tasks.len(), 3);
|
||||
assert_eq!(a_after_tasks.len(), b_after_tasks.len());
|
||||
|
||||
for (a, b) in a_tasks.iter().zip(b_after_tasks.iter()) {
|
||||
assert_eq!(a.uid, b.uid);
|
||||
}
|
||||
|
||||
for (b, a) in b_tasks.iter().zip(a_after_tasks.iter()) {
|
||||
assert_eq!(b.uid, a.uid);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn swap_indexes_errors() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
@@ -8,7 +8,7 @@ use convert_case::{Case, Casing as _};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
|
||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, ChannelCongestion};
|
||||
use meilisearch_types::milli::{ChannelCongestion, DeCboRoaringBitmapCodec};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{
|
||||
BatchStopReason, Details, IndexSwap, Kind, KindWithContent, Status,
|
||||
@@ -211,7 +211,7 @@ impl ProcessingBatch {
|
||||
|
||||
pub(crate) fn insert_task_datetime(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
time: OffsetDateTime,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
@@ -224,7 +224,7 @@ pub(crate) fn insert_task_datetime(
|
||||
|
||||
pub(crate) fn remove_task_datetime(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
time: OffsetDateTime,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
@@ -243,7 +243,7 @@ pub(crate) fn remove_task_datetime(
|
||||
|
||||
pub(crate) fn remove_n_tasks_datetime_earlier_than(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
earlier_than: OffsetDateTime,
|
||||
mut count: usize,
|
||||
task_id: TaskId,
|
||||
@@ -271,7 +271,7 @@ pub(crate) fn remove_n_tasks_datetime_earlier_than(
|
||||
pub(crate) fn keep_ids_within_datetimes(
|
||||
rtxn: &RoTxn,
|
||||
ids: &mut RoaringBitmap,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, DeCboRoaringBitmapCodec>,
|
||||
after: Option<OffsetDateTime>,
|
||||
before: Option<OffsetDateTime>,
|
||||
) -> Result<()> {
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::collections::BTreeMap;
|
||||
|
||||
use base64::Engine as _;
|
||||
use itertools::{EitherOrBoth, Itertools as _};
|
||||
use milli::{CboRoaringBitmapCodec, DocumentId};
|
||||
use milli::{DeCboRoaringBitmapCodec, DocumentId};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
@@ -461,7 +461,8 @@ impl Serialize for TaskKeys {
|
||||
{
|
||||
let TaskKeys(task_keys) = self;
|
||||
let mut bytes = Vec::new();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(task_keys, &mut bytes);
|
||||
// TODO correctly handle this io::Error
|
||||
DeCboRoaringBitmapCodec::serialize_into(task_keys, &mut bytes).unwrap();
|
||||
let encoded = base64::prelude::BASE64_STANDARD.encode(&bytes);
|
||||
serializer.serialize_str(&encoded)
|
||||
}
|
||||
@@ -498,7 +499,7 @@ impl<'de> serde::de::Visitor<'de> for TaskKeysVisitor {
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
let task_keys = CboRoaringBitmapCodec::deserialize_from(decoded).map_err(|_err| {
|
||||
let task_keys = DeCboRoaringBitmapCodec::deserialize_from(decoded).map_err(|_err| {
|
||||
E::invalid_value(serde::de::Unexpected::Bytes(decoded), &"a cbo roaring bitmap")
|
||||
})?;
|
||||
Ok(TaskKeys(task_keys))
|
||||
|
||||
@@ -300,6 +300,7 @@ impl Infos {
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
skip_index_budget: _,
|
||||
experimental_disable_delta_encoding: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
|
||||
@@ -21,6 +21,7 @@ use meilisearch::{
|
||||
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use meilisearch_types::milli::heed_codec::DELTA_ENCODING_STATUS;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt as _;
|
||||
@@ -95,6 +96,14 @@ async fn main() -> anyhow::Result<()> {
|
||||
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
// Disables the delta encoding of bitmaps as soon as possible
|
||||
if opt.indexer_options.experimental_disable_delta_encoding {
|
||||
DELTA_ENCODING_STATUS.set_to_disabled()
|
||||
} else {
|
||||
DELTA_ENCODING_STATUS.set_to_enabled()
|
||||
}
|
||||
.expect("the delta-encoding status to be set only once");
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
|
||||
anyhow::ensure!(
|
||||
|
||||
@@ -60,6 +60,7 @@ const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING: &str = "MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
||||
@@ -848,6 +849,14 @@ pub struct IndexerOpts {
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
|
||||
/// Experimental disable delta-encoding for bitmaps. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/875>
|
||||
///
|
||||
/// Enables the experimental disable delta-encoding for bitmaps feature.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING)]
|
||||
#[serde(default)]
|
||||
pub experimental_disable_delta_encoding: bool,
|
||||
}
|
||||
|
||||
impl IndexerOpts {
|
||||
@@ -861,6 +870,7 @@ impl IndexerOpts {
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
experimental_disable_delta_encoding,
|
||||
} = self;
|
||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||
export_to_env_if_not_present(
|
||||
@@ -898,6 +908,12 @@ impl IndexerOpts {
|
||||
experimental_no_edition_2024_for_facet_post_processing.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_disable_delta_encoding {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING,
|
||||
experimental_disable_delta_encoding.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -913,6 +929,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
experimental_disable_delta_encoding: _, // managed in try_main
|
||||
} = other;
|
||||
|
||||
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
||||
@@ -1292,7 +1309,7 @@ where
|
||||
T: AsRef<OsStr>,
|
||||
{
|
||||
if let Err(VarError::NotPresent) = std::env::var(key) {
|
||||
std::env::set_var(key, value);
|
||||
unsafe { std::env::set_var(key, value) }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::{SearchStep, TimeBudget};
|
||||
use meilisearch_types::milli::TimeBudget;
|
||||
use rand::Rng;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -347,11 +346,9 @@ impl PersonalizationService {
|
||||
personalize: &Personalize,
|
||||
query: Option<&str>,
|
||||
time_budget: TimeBudget,
|
||||
progress: &Progress,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
match self {
|
||||
Self::Cohere(cohere_service) => {
|
||||
let _ = progress.update_progress_scoped(SearchStep::ApplyingPersonalization);
|
||||
cohere_service
|
||||
.rerank_search_results(search_result, personalize, query, time_budget)
|
||||
.await
|
||||
|
||||
@@ -30,11 +30,7 @@ use meilisearch_types::features::{
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::index::ChatConfig;
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::{
|
||||
all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget,
|
||||
TotalProcessingTimeStep,
|
||||
};
|
||||
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget};
|
||||
use meilisearch_types::{Document, Index};
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
@@ -266,7 +262,6 @@ async fn process_search_request(
|
||||
filter: Option<String>,
|
||||
) -> Result<(Index, Vec<Document>, String), ResponseError> {
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let progress = Progress::default();
|
||||
let rtxn = index.static_read_txn()?;
|
||||
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
|
||||
let mut query = SearchQuery {
|
||||
@@ -290,9 +285,7 @@ async fn process_search_request(
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
|
||||
progress.update_progress(TotalProcessingTimeStep::WaitingForPermit);
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
progress.update_progress(TotalProcessingTimeStep::Searching);
|
||||
let features = index_scheduler.features();
|
||||
let index_cloned = index.clone();
|
||||
let output = tokio::task::spawn_blocking(move || -> Result<_, ResponseError> {
|
||||
@@ -304,15 +297,8 @@ async fn process_search_request(
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
let (search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search(
|
||||
&index_cloned,
|
||||
&rtxn,
|
||||
&query,
|
||||
&search_kind,
|
||||
time_budget,
|
||||
features,
|
||||
&progress,
|
||||
)?;
|
||||
let (search, _is_finite_pagination, _max_total_hits, _offset) =
|
||||
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
|
||||
|
||||
match search_from_kind(index_uid, search_kind, search) {
|
||||
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),
|
||||
|
||||
@@ -8,8 +8,7 @@ use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::locales::Locale;
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::{self, TotalProcessingTimeStep};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
@@ -337,10 +336,6 @@ pub async fn search_with_url_query(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let request_uid = Uuid::now_v7();
|
||||
debug!(request_uid = ?request_uid, parameters = ?params, "Search get");
|
||||
let progress = Progress::default();
|
||||
progress.update_progress(TotalProcessingTimeStep::WaitingForPermit);
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
progress.update_progress(TotalProcessingTimeStep::Searching);
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let mut query: SearchQuery = params.into_inner().try_into()?;
|
||||
@@ -364,9 +359,9 @@ pub async fn search_with_url_query(
|
||||
// Save the query string for personalization if requested
|
||||
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
|
||||
let progress_clone = progress.clone();
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
SearchParams {
|
||||
@@ -379,21 +374,15 @@ pub async fn search_with_url_query(
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
&progress_clone,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
let search_result = search_result?;
|
||||
|
||||
let analytics_step =
|
||||
progress.update_progress_scoped(TotalProcessingTimeStep::PublishingAnalytics);
|
||||
if let Ok((search_result, _)) = search_result.as_ref() {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
analytics.publish(aggregate, &req);
|
||||
// early finish progress step
|
||||
drop(analytics_step);
|
||||
|
||||
let (mut search_result, time_budget) = search_result?;
|
||||
|
||||
@@ -405,12 +394,11 @@ pub async fn search_with_url_query(
|
||||
personalize,
|
||||
personalize_query.as_deref(),
|
||||
time_budget,
|
||||
&progress,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
debug!(request_uid = ?request_uid, returns = ?search_result, progress = ?progress.accumulated_durations(), "Search get");
|
||||
debug!(request_uid = ?request_uid, returns = ?search_result, "Search get");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
@@ -482,11 +470,6 @@ pub async fn search_with_post(
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let request_uid = Uuid::now_v7();
|
||||
|
||||
let progress = Progress::default();
|
||||
progress.update_progress(TotalProcessingTimeStep::WaitingForPermit);
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
progress.update_progress(TotalProcessingTimeStep::Searching);
|
||||
|
||||
let mut query = params.into_inner();
|
||||
debug!(request_uid = ?request_uid, parameters = ?query, "Search post");
|
||||
|
||||
@@ -511,7 +494,7 @@ pub async fn search_with_post(
|
||||
// Save the query string for personalization if requested
|
||||
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
|
||||
|
||||
let progress_clone = progress.clone();
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
SearchParams {
|
||||
@@ -524,14 +507,11 @@ pub async fn search_with_post(
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
&progress_clone,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
let search_result = search_result?;
|
||||
let analytics_step =
|
||||
progress.update_progress_scoped(TotalProcessingTimeStep::PublishingAnalytics);
|
||||
if let Ok((ref search_result, _)) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
if search_result.degraded {
|
||||
@@ -539,8 +519,6 @@ pub async fn search_with_post(
|
||||
}
|
||||
}
|
||||
analytics.publish(aggregate, &req);
|
||||
// early finish progress step
|
||||
drop(analytics_step);
|
||||
|
||||
let (mut search_result, time_budget) = search_result?;
|
||||
|
||||
@@ -552,12 +530,11 @@ pub async fn search_with_post(
|
||||
personalize,
|
||||
personalize_query.as_deref(),
|
||||
time_budget,
|
||||
&progress,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
debug!(request_uid = ?request_uid, returns = ?search_result, progress = ?progress.accumulated_durations(), "Search post");
|
||||
debug!(request_uid = ?request_uid, returns = ?search_result, "Search post");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
|
||||
@@ -8,8 +8,6 @@ use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::TotalProcessingTimeStep;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
@@ -219,7 +217,7 @@ async fn similar(
|
||||
mut query: SimilarQuery,
|
||||
) -> Result<SimilarResult, ResponseError> {
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
let progress = Progress::default();
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut query.filter, search_rules);
|
||||
@@ -236,10 +234,7 @@ async fn similar(
|
||||
Route::Similar,
|
||||
)?;
|
||||
|
||||
let progress_clone = progress.clone();
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
let _step = progress_clone.update_progress_scoped(TotalProcessingTimeStep::Searching);
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
perform_similar(
|
||||
&index,
|
||||
query,
|
||||
@@ -248,14 +243,9 @@ async fn similar(
|
||||
quantized,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
&progress_clone,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
debug!(progress = ?progress.accumulated_durations(), "Similar");
|
||||
|
||||
result?
|
||||
.await?
|
||||
}
|
||||
|
||||
#[derive(Debug, deserr::Deserr, IntoParams)]
|
||||
|
||||
@@ -6,8 +6,6 @@ use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::TotalProcessingTimeStep;
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
@@ -155,10 +153,7 @@ pub async fn multi_search_with_post(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
// Since we don't want to process half of the search requests and then get a permit refused
|
||||
// we're going to get one permit for the whole duration of the multi-search request.
|
||||
let progress = Progress::default();
|
||||
progress.update_progress(TotalProcessingTimeStep::WaitingForPermit);
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
progress.update_progress(TotalProcessingTimeStep::Searching);
|
||||
let request_uid = Uuid::now_v7();
|
||||
|
||||
let federated_search = params.into_inner();
|
||||
@@ -218,7 +213,6 @@ pub async fn multi_search_with_post(
|
||||
is_proxy,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
&progress,
|
||||
)
|
||||
.await;
|
||||
permit.drop().await;
|
||||
@@ -294,7 +288,6 @@ pub async fn multi_search_with_post(
|
||||
.with_index(query_index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let progress_clone = progress.clone();
|
||||
let (mut search_result, time_budget) = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
SearchParams {
|
||||
@@ -307,7 +300,6 @@ pub async fn multi_search_with_post(
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
&progress_clone,
|
||||
)
|
||||
})
|
||||
.await
|
||||
@@ -322,7 +314,6 @@ pub async fn multi_search_with_post(
|
||||
personalize,
|
||||
personalize_query.as_deref(),
|
||||
time_budget,
|
||||
&progress,
|
||||
)
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
@@ -333,19 +324,15 @@ pub async fn multi_search_with_post(
|
||||
result: search_result,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(search_results)
|
||||
}
|
||||
.await;
|
||||
permit.drop().await;
|
||||
|
||||
let analytics_step =
|
||||
progress.update_progress_scoped(TotalProcessingTimeStep::PublishingAnalytics);
|
||||
if search_results.is_ok() {
|
||||
multi_aggregate.succeed();
|
||||
}
|
||||
analytics.publish(multi_aggregate, &req);
|
||||
drop(analytics_step);
|
||||
|
||||
let search_results = search_results.map_err(|(mut err, query_index)| {
|
||||
// Add the query index that failed as context for the error message.
|
||||
@@ -358,7 +345,6 @@ pub async fn multi_search_with_post(
|
||||
debug!(
|
||||
request_uid = ?request_uid,
|
||||
returns = ?search_results,
|
||||
progress = ?progress.accumulated_durations(),
|
||||
"Multi-search"
|
||||
);
|
||||
|
||||
|
||||
@@ -11,13 +11,9 @@ use index_scheduler::{IndexScheduler, RoFeatures};
|
||||
use itertools::Itertools;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::milli::order_by_map::OrderByMap;
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
|
||||
use meilisearch_types::milli::vector::Embedding;
|
||||
use meilisearch_types::milli::{
|
||||
self, DocumentId, FederatingResultsStep, OrderBy, SearchStep, TimeBudget,
|
||||
DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
|
||||
use meilisearch_types::network::{Network, Remote};
|
||||
use roaring::RoaringBitmap;
|
||||
use tokio::task::JoinHandle;
|
||||
@@ -47,7 +43,6 @@ pub async fn perform_federated_search(
|
||||
is_proxy: bool,
|
||||
request_uid: Uuid,
|
||||
include_metadata: bool,
|
||||
progress: &Progress,
|
||||
) -> Result<FederatedSearchResult, ResponseError> {
|
||||
if is_proxy {
|
||||
features.check_network("Performing a remote federated search")?;
|
||||
@@ -116,7 +111,7 @@ pub async fn perform_federated_search(
|
||||
|
||||
for (index_uid, queries) in partitioned_queries.local_queries_by_index {
|
||||
// note: this is the only place we open `index_uid`
|
||||
search_by_index.execute(index_uid, queries, ¶ms, progress)?;
|
||||
search_by_index.execute(index_uid, queries, ¶ms)?;
|
||||
}
|
||||
|
||||
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
|
||||
@@ -131,8 +126,6 @@ pub async fn perform_federated_search(
|
||||
facet_order,
|
||||
} = search_by_index;
|
||||
|
||||
progress.update_progress(SearchStep::FederatingResults);
|
||||
progress.update_progress(FederatingResultsStep::WaitingForRemoteResults);
|
||||
let before_waiting_remote_results = std::time::Instant::now();
|
||||
|
||||
// 2.3. Wait for proxy search requests to complete
|
||||
@@ -141,7 +134,7 @@ pub async fn perform_federated_search(
|
||||
let after_waiting_remote_results = std::time::Instant::now();
|
||||
|
||||
// 3. merge hits and metadata across indexes and hosts
|
||||
progress.update_progress(FederatingResultsStep::MergingResults);
|
||||
|
||||
// 3.1. Build metadata in the same order as the original queries
|
||||
let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| {
|
||||
// If a remote is present, set the local remote name
|
||||
@@ -194,7 +187,6 @@ pub async fn perform_federated_search(
|
||||
};
|
||||
|
||||
// 3.5. merge facets
|
||||
progress.update_progress(FederatingResultsStep::MergingFacets);
|
||||
let (facet_distribution, facet_stats, facets_by_index) =
|
||||
facet_order.merge(federation.merge_facets, remote_results, facets);
|
||||
|
||||
@@ -839,7 +831,6 @@ impl SearchByIndex {
|
||||
index_uid: String,
|
||||
queries: Vec<QueryByIndex>,
|
||||
params: &SearchByIndexParams<'_>,
|
||||
progress: &Progress,
|
||||
) -> Result<(), ResponseError> {
|
||||
let first_query_index = queries.first().map(|query| query.query_index);
|
||||
let index = match params.index_scheduler.index(&index_uid) {
|
||||
@@ -966,7 +957,6 @@ impl SearchByIndex {
|
||||
// clones of `TimeBudget` share the budget rather than restart it
|
||||
time_budget.clone(),
|
||||
params.features,
|
||||
progress,
|
||||
)?;
|
||||
|
||||
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
|
||||
@@ -1054,7 +1044,7 @@ impl SearchByIndex {
|
||||
hit_maker,
|
||||
query_index,
|
||||
}| {
|
||||
let mut hit = hit_maker.make_hit(docid, &score, progress)?;
|
||||
let mut hit = hit_maker.make_hit(docid, &score)?;
|
||||
let weighted_score = ScoreDetails::global_score(score.iter()) * (*weight);
|
||||
|
||||
let mut _federation = serde_json::json!(
|
||||
|
||||
@@ -17,13 +17,11 @@ use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::locales::Locale;
|
||||
use meilisearch_types::milli::index::{self, EmbeddingsWithMetadata, SearchParameters};
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::vector::Embedder;
|
||||
use meilisearch_types::milli::{
|
||||
FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, SearchStep,
|
||||
TimeBudget,
|
||||
FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, TimeBudget,
|
||||
};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
@@ -1026,13 +1024,11 @@ pub fn prepare_search<'t>(
|
||||
search_kind: &SearchKind,
|
||||
time_budget: TimeBudget,
|
||||
features: RoFeatures,
|
||||
progress: &'t Progress,
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> {
|
||||
let _step = progress.update_progress_scoped(SearchStep::PreparingSearch);
|
||||
if query.media.is_some() {
|
||||
features.check_multimodal("passing `media` in a search query")?;
|
||||
}
|
||||
let mut search = index.search(rtxn, progress);
|
||||
let mut search = index.search(rtxn);
|
||||
search.time_budget(time_budget);
|
||||
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
|
||||
search.ranking_score_threshold(ranking_score_threshold.0);
|
||||
@@ -1052,7 +1048,6 @@ pub fn prepare_search<'t>(
|
||||
let vector = match query.vector.clone() {
|
||||
Some(vector) => vector,
|
||||
None => {
|
||||
let _ = progress.update_progress_scoped(SearchStep::EmbeddingQuery);
|
||||
let span = tracing::trace_span!(target: "search::vector", "embed_one");
|
||||
let _entered = span.enter();
|
||||
|
||||
@@ -1066,7 +1061,6 @@ pub fn prepare_search<'t>(
|
||||
(q, media) => milli::vector::SearchQuery::Media { q, media },
|
||||
};
|
||||
|
||||
|
||||
embedder
|
||||
.embed_search(search_query, Some(deadline))
|
||||
.map_err(milli::vector::Error::from)
|
||||
@@ -1179,7 +1173,6 @@ pub struct SearchParams {
|
||||
pub fn perform_search(
|
||||
params: SearchParams,
|
||||
index: &Index,
|
||||
progress: &Progress,
|
||||
) -> Result<(SearchResult, TimeBudget), ResponseError> {
|
||||
let SearchParams {
|
||||
index_uid,
|
||||
@@ -1198,15 +1191,8 @@ pub fn perform_search(
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
let (search, is_finite_pagination, max_total_hits, offset) = prepare_search(
|
||||
index,
|
||||
&rtxn,
|
||||
&query,
|
||||
&search_kind,
|
||||
time_budget.clone(),
|
||||
features,
|
||||
progress,
|
||||
)?;
|
||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||
prepare_search(index, &rtxn, &query, &search_kind, time_budget.clone(), features)?;
|
||||
|
||||
let (
|
||||
milli::SearchResult {
|
||||
@@ -1267,7 +1253,6 @@ pub fn perform_search(
|
||||
personalize: _,
|
||||
} = query;
|
||||
|
||||
progress.update_progress(SearchStep::FormattingResults);
|
||||
let format = AttributesFormat {
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
@@ -1290,7 +1275,6 @@ pub fn perform_search(
|
||||
format,
|
||||
matching_words,
|
||||
documents_ids.iter().copied().zip(document_scores.iter()),
|
||||
progress,
|
||||
)?;
|
||||
|
||||
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
||||
@@ -1313,13 +1297,11 @@ pub fn perform_search(
|
||||
|
||||
let (facet_distribution, facet_stats) = facets
|
||||
.map(move |facets| {
|
||||
let _ = progress.update_progress_scoped(SearchStep::ComputingFacetDistribution);
|
||||
compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search)
|
||||
})
|
||||
.transpose()?
|
||||
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
|
||||
.unzip();
|
||||
|
||||
let result = SearchResult {
|
||||
hits: documents,
|
||||
hits_info,
|
||||
@@ -1334,7 +1316,6 @@ pub fn perform_search(
|
||||
request_uid: Some(request_uid),
|
||||
metadata,
|
||||
};
|
||||
|
||||
Ok((result, time_budget))
|
||||
}
|
||||
|
||||
@@ -1599,13 +1580,7 @@ impl<'a> HitMaker<'a> {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn make_hit(
|
||||
&self,
|
||||
id: u32,
|
||||
score: &[ScoreDetails],
|
||||
progress: &Progress,
|
||||
) -> milli::Result<SearchHit> {
|
||||
let _step = progress.update_progress_scoped(SearchStep::FormattingResults);
|
||||
pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
|
||||
let (_, obkv) =
|
||||
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
|
||||
|
||||
@@ -1694,7 +1669,6 @@ fn make_hits<'a>(
|
||||
format: AttributesFormat,
|
||||
matching_words: milli::MatchingWords,
|
||||
documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
|
||||
progress: &Progress,
|
||||
) -> milli::Result<Vec<SearchHit>> {
|
||||
let mut documents = Vec::new();
|
||||
|
||||
@@ -1712,7 +1686,7 @@ fn make_hits<'a>(
|
||||
let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?;
|
||||
|
||||
for (id, score) in documents_ids_scores {
|
||||
documents.push(hit_maker.make_hit(id, score, progress)?);
|
||||
documents.push(hit_maker.make_hit(id, score)?);
|
||||
}
|
||||
Ok(documents)
|
||||
}
|
||||
@@ -1727,7 +1701,6 @@ pub fn perform_facet_search(
|
||||
locales: Option<Vec<Language>>,
|
||||
) -> Result<FacetSearchResult, ResponseError> {
|
||||
let before_search = Instant::now();
|
||||
let progress = Progress::default();
|
||||
let rtxn = index.read_txn()?;
|
||||
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
@@ -1756,15 +1729,8 @@ pub fn perform_facet_search(
|
||||
.collect()
|
||||
});
|
||||
|
||||
let (search, _, _, _) = prepare_search(
|
||||
index,
|
||||
&rtxn,
|
||||
&search_query,
|
||||
&search_kind,
|
||||
time_budget,
|
||||
features,
|
||||
&progress,
|
||||
)?;
|
||||
let (search, _, _, _) =
|
||||
prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?;
|
||||
let mut facet_search = SearchForFacetValues::new(
|
||||
facet_name,
|
||||
search,
|
||||
@@ -1796,7 +1762,6 @@ pub fn perform_similar(
|
||||
quantized: bool,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
progress: &Progress,
|
||||
) -> Result<SimilarResult, ResponseError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
@@ -1886,7 +1851,6 @@ pub fn perform_similar(
|
||||
format,
|
||||
Default::default(),
|
||||
documents_ids.iter().copied().zip(document_scores.iter()),
|
||||
progress,
|
||||
)?;
|
||||
|
||||
let max_total_hits = index
|
||||
|
||||
@@ -43,9 +43,9 @@ impl Server<Owned> {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
if cfg!(windows) {
|
||||
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
||||
} else {
|
||||
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
@@ -58,9 +58,9 @@ impl Server<Owned> {
|
||||
|
||||
pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self {
|
||||
if cfg!(windows) {
|
||||
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
||||
} else {
|
||||
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
||||
}
|
||||
|
||||
options.master_key = Some("MASTER_KEY".to_string());
|
||||
@@ -215,9 +215,9 @@ impl Server<Shared> {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
if cfg!(windows) {
|
||||
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
||||
} else {
|
||||
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
@@ -508,6 +508,8 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
experimental_no_edition_2024_for_dumps: false,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: false,
|
||||
experimental_no_edition_2024_for_facet_post_processing: false,
|
||||
// It has no effect to set the delta encoding here as the toggle is done in try_main
|
||||
experimental_disable_delta_encoding: false,
|
||||
},
|
||||
experimental_enable_metrics: false,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
|
||||
@@ -452,6 +452,7 @@ async fn limit_offset() {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[cfg(not(windows))]
|
||||
async fn simple_search_hf() {
|
||||
let server = Server::new_shared();
|
||||
let index = index_with_documents_hf(server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
@@ -103,7 +103,7 @@ async fn swap_indexes() {
|
||||
{
|
||||
"uid": 1,
|
||||
"batchUid": 1,
|
||||
"indexUid": "b",
|
||||
"indexUid": "a",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
@@ -266,7 +266,7 @@ async fn swap_indexes() {
|
||||
{
|
||||
"uid": 4,
|
||||
"batchUid": 4,
|
||||
"indexUid": "d",
|
||||
"indexUid": "c",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
@@ -341,7 +341,7 @@ async fn swap_indexes() {
|
||||
{
|
||||
"uid": 0,
|
||||
"batchUid": 0,
|
||||
"indexUid": "b",
|
||||
"indexUid": "a",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
|
||||
@@ -274,19 +274,19 @@ async fn test_both_apis() {
|
||||
"birthyear": 2011,
|
||||
"breed": "Beagle"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Max",
|
||||
"gender": "M",
|
||||
"birthyear": 1995,
|
||||
"breed": "Labrador Retriever"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "Vénus",
|
||||
"gender": "F",
|
||||
"birthyear": 2003,
|
||||
"breed": "Jack Russel Terrier"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Max",
|
||||
"gender": "M",
|
||||
"birthyear": 1995,
|
||||
"breed": "Labrador Retriever"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
@@ -314,19 +314,19 @@ async fn test_both_apis() {
|
||||
"birthyear": 2011,
|
||||
"breed": "Beagle"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Max",
|
||||
"gender": "M",
|
||||
"birthyear": 1995,
|
||||
"breed": "Labrador Retriever"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "Vénus",
|
||||
"gender": "F",
|
||||
"birthyear": 2003,
|
||||
"breed": "Jack Russel Terrier"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Max",
|
||||
"gender": "M",
|
||||
"birthyear": 1995,
|
||||
"breed": "Labrador Retriever"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
@@ -354,19 +354,19 @@ async fn test_both_apis() {
|
||||
"birthyear": 2011,
|
||||
"breed": "Beagle"
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"gender": "M",
|
||||
"birthyear": 2023,
|
||||
"breed": "Patou"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "Vénus",
|
||||
"gender": "F",
|
||||
"birthyear": 2003,
|
||||
"breed": "Jack Russel Terrier"
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"gender": "M",
|
||||
"birthyear": 2023,
|
||||
"breed": "Patou"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
@@ -394,19 +394,19 @@ async fn test_both_apis() {
|
||||
"birthyear": 2011,
|
||||
"breed": "Beagle"
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"gender": "M",
|
||||
"birthyear": 2023,
|
||||
"breed": "Patou"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "Vénus",
|
||||
"gender": "F",
|
||||
"birthyear": 2003,
|
||||
"breed": "Jack Russel Terrier"
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"gender": "M",
|
||||
"birthyear": 2023,
|
||||
"breed": "Patou"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
@@ -420,13 +420,6 @@ async fn test_both_apis() {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"gender": "M",
|
||||
"birthyear": 2023,
|
||||
"breed": "Patou"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Intel",
|
||||
@@ -435,11 +428,11 @@ async fn test_both_apis() {
|
||||
"breed": "Beagle"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Max",
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"gender": "M",
|
||||
"birthyear": 1995,
|
||||
"breed": "Labrador Retriever"
|
||||
"birthyear": 2023,
|
||||
"breed": "Patou"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
@@ -447,6 +440,13 @@ async fn test_both_apis() {
|
||||
"gender": "F",
|
||||
"birthyear": 2003,
|
||||
"breed": "Jack Russel Terrier"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Max",
|
||||
"gender": "M",
|
||||
"birthyear": 1995,
|
||||
"breed": "Labrador Retriever"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
@@ -460,13 +460,6 @@ async fn test_both_apis() {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"gender": "M",
|
||||
"birthyear": 2023,
|
||||
"breed": "Patou"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Intel",
|
||||
@@ -475,11 +468,11 @@ async fn test_both_apis() {
|
||||
"breed": "Beagle"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Max",
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"gender": "M",
|
||||
"birthyear": 1995,
|
||||
"breed": "Labrador Retriever"
|
||||
"birthyear": 2023,
|
||||
"breed": "Patou"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
@@ -487,6 +480,13 @@ async fn test_both_apis() {
|
||||
"gender": "F",
|
||||
"birthyear": 2003,
|
||||
"breed": "Jack Russel Terrier"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Max",
|
||||
"gender": "M",
|
||||
"birthyear": 1995,
|
||||
"breed": "Labrador Retriever"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
|
||||
@@ -10,6 +10,7 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.100"
|
||||
bstr = "1.12.1"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
|
||||
@@ -19,7 +19,7 @@ use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
||||
use meilisearch_types::milli::{obkv_to_json, DeCboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::versioning::{get_version, parse_version};
|
||||
use meilisearch_types::Index;
|
||||
@@ -140,6 +140,14 @@ enum Command {
|
||||
#[arg(long, value_delimiter = ',')]
|
||||
index_part: Vec<IndexPart>,
|
||||
},
|
||||
|
||||
/// Outputs all entries of the index in a formatted way.
|
||||
///
|
||||
/// This command is useful for debugging purposes.
|
||||
OutputFormattedEntries {
|
||||
#[arg(long)]
|
||||
index_name: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, ValueEnum)]
|
||||
@@ -169,9 +177,148 @@ fn main() -> anyhow::Result<()> {
|
||||
Command::HairDryer { index_name, index_part } => {
|
||||
hair_dryer(db_path, &index_name, &index_part)
|
||||
}
|
||||
Command::OutputFormattedEntries { index_name } => {
|
||||
output_formatted_entries(db_path, &index_name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn output_formatted_entries(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = unsafe {
|
||||
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
|
||||
}
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
let index_mapper_rtxn = env.read_txn()?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
try_opening_database(&env, &index_mapper_rtxn, "index-mapping")?;
|
||||
|
||||
for result in index_mapping.iter(&index_mapper_rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
if uid != index_name {
|
||||
continue;
|
||||
}
|
||||
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index = Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
|
||||
.with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let Index {
|
||||
word_docids,
|
||||
exact_word_docids,
|
||||
word_prefix_docids,
|
||||
exact_word_prefix_docids,
|
||||
word_pair_proximity_docids,
|
||||
word_position_docids,
|
||||
word_fid_docids,
|
||||
field_id_word_count_docids,
|
||||
word_prefix_position_docids,
|
||||
word_prefix_fid_docids,
|
||||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
facet_id_is_empty_docids,
|
||||
..
|
||||
} = index;
|
||||
|
||||
struct DatabaseInfo {
|
||||
name: &'static str,
|
||||
database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl DatabaseInfo {
|
||||
fn new(name: &'static str, database: Database<Bytes, DeCboRoaringBitmapCodec>) -> Self {
|
||||
DatabaseInfo { name, database }
|
||||
}
|
||||
}
|
||||
|
||||
let databases = [
|
||||
DatabaseInfo::new("word_docids", word_docids.remap_key_type()),
|
||||
DatabaseInfo::new("exact_word_docids", exact_word_docids.remap_key_type()),
|
||||
DatabaseInfo::new("word_prefix_docids", word_prefix_docids.remap_key_type()),
|
||||
DatabaseInfo::new(
|
||||
"exact_word_prefix_docids",
|
||||
exact_word_prefix_docids.remap_key_type(),
|
||||
),
|
||||
DatabaseInfo::new(
|
||||
"word_pair_proximity_docids",
|
||||
word_pair_proximity_docids.remap_key_type(),
|
||||
),
|
||||
DatabaseInfo::new("word_position_docids", word_position_docids.remap_key_type()),
|
||||
DatabaseInfo::new("word_fid_docids", word_fid_docids.remap_key_type()),
|
||||
DatabaseInfo::new(
|
||||
"field_id_word_count_docids",
|
||||
field_id_word_count_docids.remap_key_type(),
|
||||
),
|
||||
DatabaseInfo::new(
|
||||
"word_prefix_position_docids",
|
||||
word_prefix_position_docids.remap_key_type(),
|
||||
),
|
||||
DatabaseInfo::new("word_prefix_fid_docids", word_prefix_fid_docids.remap_key_type()),
|
||||
DatabaseInfo::new("facet_id_exists_docids", facet_id_exists_docids.remap_key_type()),
|
||||
DatabaseInfo::new("facet_id_is_null_docids", facet_id_is_null_docids.remap_key_type()),
|
||||
DatabaseInfo::new(
|
||||
"facet_id_is_empty_docids",
|
||||
facet_id_is_empty_docids.remap_key_type(),
|
||||
),
|
||||
// DatabaseInfo::new("facet_id_f64_docids", facet_id_f64_docids.remap_key_type()),
|
||||
// DatabaseInfo::new(
|
||||
// "facet_id_string_docids",
|
||||
// facet_id_string_docids.remap_key_type(),
|
||||
// ),
|
||||
// DatabaseInfo::new(
|
||||
// "facet_id_normalized_string_strings",
|
||||
// facet_id_normalized_string_strings.remap_key_type(),
|
||||
// ),
|
||||
// DatabaseInfo::new("facet_id_string_fst", facet_id_string_fst.remap_key_type()),
|
||||
// DatabaseInfo::new(
|
||||
// "field_id_docid_facet_f64s",
|
||||
// field_id_docid_facet_f64s.remap_key_type(),
|
||||
// ),
|
||||
// DatabaseInfo::new(
|
||||
// "field_id_docid_facet_strings",
|
||||
// field_id_docid_facet_strings.remap_key_type(),
|
||||
// ),
|
||||
];
|
||||
|
||||
use bstr::ByteSlice as _;
|
||||
|
||||
let stdout = std::io::stdout();
|
||||
let mut stdout_lock = BufWriter::new(stdout.lock());
|
||||
|
||||
for DatabaseInfo { name: db_name, database } in databases {
|
||||
for result in database.iter(&rtxn)? {
|
||||
let (key, bitmap) = result?;
|
||||
let value: Vec<u32> = bitmap.iter().collect();
|
||||
writeln!(&mut stdout_lock, "{db_name}: {} -> {:?}", key.as_bstr(), value)?;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let db_name = "main";
|
||||
|
||||
let fst = index.words_fst(&rtxn)?;
|
||||
writeln!(&mut stdout_lock, "{db_name}: words-fst -> {fst:?}")?;
|
||||
|
||||
let prefix_fst = index.words_prefixes_fst(&rtxn)?;
|
||||
writeln!(&mut stdout_lock, "{db_name}: words-prefixes-fst -> {prefix_fst:?}")?;
|
||||
|
||||
let documents_ids = index.documents_ids(&rtxn)?;
|
||||
writeln!(&mut stdout_lock, "{db_name}: documents-ids -> {documents_ids:?}")?;
|
||||
|
||||
let exact_words = index.exact_words(&rtxn)?;
|
||||
writeln!(&mut stdout_lock, "{db_name}: exact-words -> {exact_words:?}")?;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Clears the task queue located at `db_path`.
|
||||
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
||||
let path = db_path.join("tasks");
|
||||
|
||||
@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
|
||||
"sync",
|
||||
] }
|
||||
arroy = "0.6.4-nested-rtxns"
|
||||
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
|
||||
hannoy = { version = "0.1.2-nested-rtxns", features = ["arroy"] }
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
@@ -120,14 +120,16 @@ twox-hash = { version = "2.1.2", default-features = false, features = [
|
||||
] }
|
||||
geo-types = "0.7.17"
|
||||
zerometry = "0.3.0"
|
||||
bitpacking = "0.9.2"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.8.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
quickcheck = "1.0.3"
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[features]
|
||||
|
||||
@@ -12,7 +12,7 @@ use roaring::RoaringBitmap;
|
||||
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
|
||||
pub use self::ordered_f64_codec::OrderedF64Codec;
|
||||
use super::StrRefCodec;
|
||||
use crate::{CboRoaringBitmapCodec, BEU16};
|
||||
use crate::{DeCboRoaringBitmapCodec, BEU16};
|
||||
|
||||
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
|
||||
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
|
||||
@@ -97,7 +97,7 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut v = vec![value.size];
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&value.bitmap, &mut v);
|
||||
DeCboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v)?;
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
@@ -107,7 +107,7 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let size = bytes[0];
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
|
||||
let bitmap = DeCboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
|
||||
Ok(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,10 +22,10 @@ pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
pub use self::roaring_bitmap_length::{
|
||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||
pub use self::roaring_bitmap::{
|
||||
DeCboRoaringBitmapCodec, RoaringBitmapCodec, DELTA_ENCODING_STATUS,
|
||||
};
|
||||
pub use self::roaring_bitmap_length::DeCboRoaringBitmapLenCodec;
|
||||
pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
|
||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::mem::size_of;
|
||||
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
pub struct BoRoaringBitmapCodec;
|
||||
|
||||
impl BoRoaringBitmapCodec {
|
||||
pub fn serialize_into(bitmap: &RoaringBitmap, out: &mut Vec<u8>) {
|
||||
out.reserve(bitmap.len() as usize * size_of::<u32>());
|
||||
bitmap.iter().map(u32::to_ne_bytes).for_each(|bytes| out.extend_from_slice(&bytes));
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecode<'_> for BoRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
|
||||
for chunk in bytes.chunks(size_of::<u32>()) {
|
||||
let bytes = chunk.try_into()?;
|
||||
bitmap.push(u32::from_ne_bytes(bytes));
|
||||
}
|
||||
|
||||
Ok(bitmap)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for BoRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut out = Vec::new();
|
||||
BoRoaringBitmapCodec::serialize_into(item, &mut out);
|
||||
Ok(Cow::Owned(out))
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,6 @@ use heed::BoxedError;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
|
||||
/// This is the limit where using a byteorder became less size efficient
|
||||
/// than using a direct roaring encoding, it is also the point where we are able
|
||||
@@ -19,8 +18,19 @@ pub const THRESHOLD: usize = 7;
|
||||
pub struct CboRoaringBitmapCodec;
|
||||
|
||||
impl CboRoaringBitmapCodec {
|
||||
/// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
/// it means that it would weigh the same or less than the RoaringBitmap
|
||||
/// header, so we directly encode them using ByteOrder instead.
|
||||
pub fn bitmap_serialize_as_raw_u32s(roaring: &RoaringBitmap) -> bool {
|
||||
roaring.len() <= THRESHOLD as u64
|
||||
}
|
||||
|
||||
pub fn bytes_deserialize_as_raw_u32s(bytes: &[u8]) -> bool {
|
||||
bytes.len() <= THRESHOLD * size_of::<u32>()
|
||||
}
|
||||
|
||||
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
if Self::bitmap_serialize_as_raw_u32s(roaring) {
|
||||
roaring.len() as usize * size_of::<u32>()
|
||||
} else {
|
||||
roaring.serialized_size()
|
||||
@@ -35,10 +45,7 @@ impl CboRoaringBitmapCodec {
|
||||
roaring: &RoaringBitmap,
|
||||
mut writer: W,
|
||||
) -> io::Result<()> {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
// it means that it would weigh the same or less than the RoaringBitmap
|
||||
// header, so we directly encode them using ByteOrder instead.
|
||||
if Self::bitmap_serialize_as_raw_u32s(roaring) {
|
||||
for integer in roaring {
|
||||
writer.write_u32::<NativeEndian>(integer)?;
|
||||
}
|
||||
@@ -51,7 +58,7 @@ impl CboRoaringBitmapCodec {
|
||||
}
|
||||
|
||||
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
if Self::bytes_deserialize_as_raw_u32s(bytes) {
|
||||
// If there is threshold or less than threshold integers that can fit into this array
|
||||
// of bytes it means that we used the ByteOrder codec serializer.
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
@@ -71,7 +78,7 @@ impl CboRoaringBitmapCodec {
|
||||
other: &RoaringBitmap,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
// See above `deserialize_from` method for implementation details.
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
if Self::bytes_deserialize_as_raw_u32s(bytes) {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
||||
if other.contains(integer) {
|
||||
@@ -83,78 +90,6 @@ impl CboRoaringBitmapCodec {
|
||||
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge serialized CboRoaringBitmaps in a buffer.
|
||||
///
|
||||
/// if the merged values length is under the threshold, values are directly
|
||||
/// serialized in the buffer else a RoaringBitmap is created from the
|
||||
/// values and is serialized in the buffer.
|
||||
pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = A>,
|
||||
A: AsRef<[u8]>,
|
||||
{
|
||||
let mut roaring = RoaringBitmap::new();
|
||||
let mut vec = Vec::new();
|
||||
|
||||
for bytes in slices {
|
||||
if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
|
||||
let mut reader = bytes.as_ref();
|
||||
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
||||
vec.push(integer);
|
||||
}
|
||||
} else {
|
||||
roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?;
|
||||
}
|
||||
}
|
||||
|
||||
if roaring.is_empty() {
|
||||
vec.sort_unstable();
|
||||
vec.dedup();
|
||||
|
||||
if vec.len() <= THRESHOLD {
|
||||
for integer in vec {
|
||||
buffer.extend_from_slice(&integer.to_ne_bytes());
|
||||
}
|
||||
} else {
|
||||
// We can unwrap safely because the vector is sorted upper.
|
||||
let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
|
||||
roaring.serialize_into(buffer)?;
|
||||
}
|
||||
} else {
|
||||
roaring.extend(vec);
|
||||
roaring.serialize_into(buffer)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Merges a DelAdd delta into a CboRoaringBitmap.
|
||||
pub fn merge_deladd_into<'a>(
|
||||
deladd: &KvReaderDelAdd,
|
||||
previous: &[u8],
|
||||
buffer: &'a mut Vec<u8>,
|
||||
) -> io::Result<Option<&'a [u8]>> {
|
||||
// Deserialize the bitmap that is already there
|
||||
let mut previous = Self::deserialize_from(previous)?;
|
||||
|
||||
// Remove integers we no more want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Deletion) {
|
||||
previous -= Self::deserialize_from(value)?;
|
||||
}
|
||||
|
||||
// Insert the new integers we want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Addition) {
|
||||
previous |= Self::deserialize_from(value)?;
|
||||
}
|
||||
|
||||
if previous.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Self::serialize_into_vec(&previous, buffer);
|
||||
Ok(Some(&buffer[..]))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
||||
@@ -182,75 +117,3 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||
Ok(Cow::Owned(vec))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn verify_encoding_decoding() {
|
||||
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||
let bytes = CboRoaringBitmapCodec::bytes_encode(&input).unwrap();
|
||||
let output = CboRoaringBitmapCodec::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!(input, output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_threshold() {
|
||||
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||
|
||||
// use roaring bitmap
|
||||
let mut bytes = Vec::new();
|
||||
input.serialize_into(&mut bytes).unwrap();
|
||||
let roaring_size = bytes.len();
|
||||
|
||||
// use byteorder directly
|
||||
let mut bytes = Vec::new();
|
||||
for integer in input {
|
||||
bytes.write_u32::<NativeEndian>(integer).unwrap();
|
||||
}
|
||||
let bo_size = bytes.len();
|
||||
|
||||
assert!(roaring_size > bo_size);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_cbo_roaring_bitmaps() {
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
let small_data = [
|
||||
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(4..6).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(1..3).unwrap(),
|
||||
];
|
||||
|
||||
let small_data: Vec<_> =
|
||||
small_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||
CboRoaringBitmapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||
let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
|
||||
let medium_data = [
|
||||
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(4..8).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(0..3).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(7..23).unwrap(),
|
||||
];
|
||||
|
||||
let medium_data: Vec<_> =
|
||||
medium_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
|
||||
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||
let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,374 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io::{self, Cursor, ErrorKind};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use byteorder::{NativeEndian, ReadBytesExt as _};
|
||||
use heed::BoxedError;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
use super::de_roaring_bitmap_codec::DeRoaringBitmapCodec;
|
||||
use crate::heed_codec::roaring_bitmap::take_all_blocks;
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
|
||||
/// Defines the status of the delta encoding on whether we have enabled it or not.
|
||||
pub static DELTA_ENCODING_STATUS: DeltaEncodingStatusLock = DeltaEncodingStatusLock::new();
|
||||
|
||||
pub struct DeCboRoaringBitmapCodec;
|
||||
|
||||
impl DeCboRoaringBitmapCodec {
|
||||
pub fn serialized_size_with_tmp_buffer(
|
||||
bitmap: &RoaringBitmap,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> usize {
|
||||
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
|
||||
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
|
||||
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|
||||
|| DELTA_ENCODING_STATUS.is_disabled()
|
||||
{
|
||||
CboRoaringBitmapCodec::serialized_size(bitmap)
|
||||
} else {
|
||||
DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, tmp_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes the delta-encoded compressed version of
|
||||
/// the given roaring bitmap into the provided writer.
|
||||
pub fn serialize_into<W: io::Write>(bitmap: &RoaringBitmap, writer: &mut W) -> io::Result<()> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
Self::serialize_into_with_tmp_buffer(bitmap, writer, &mut tmp_buffer)
|
||||
}
|
||||
|
||||
/// Same as [Self::serialize_into] but accepts a buffer to avoid allocating one.
|
||||
///
|
||||
/// Note that we always serialize the bitmap with the delta-encoded compressed version.
|
||||
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
|
||||
bitmap: &RoaringBitmap,
|
||||
writer: &mut W,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<()> {
|
||||
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
|
||||
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
|
||||
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|
||||
|| DELTA_ENCODING_STATUS.is_disabled()
|
||||
{
|
||||
CboRoaringBitmapCodec::serialize_into_writer(bitmap, writer)
|
||||
} else {
|
||||
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(bitmap, writer, tmp_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the delta-decoded roaring bitmap from the compressed bytes.
|
||||
pub fn deserialize_from(compressed: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
Self::deserialize_from_with_tmp_buffer(compressed, &mut tmp_buffer)
|
||||
}
|
||||
|
||||
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
|
||||
///
|
||||
/// It tries to decode the input by using the delta-decoded version and
|
||||
/// if it fails, falls back to the CboRoaringBitmap version.
|
||||
pub fn deserialize_from_with_tmp_buffer(
|
||||
input: &[u8],
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
// The input is too short to be a valid delta-decoded bitmap.
|
||||
// We fall back to the CboRoaringBitmap version with raw u32s.
|
||||
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(input) {
|
||||
return CboRoaringBitmapCodec::deserialize_from(input);
|
||||
}
|
||||
|
||||
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
|
||||
input,
|
||||
take_all_blocks,
|
||||
tmp_buffer,
|
||||
) {
|
||||
Ok(bitmap) => Ok(bitmap),
|
||||
// If the error kind is Other it means that the delta-decoder found
|
||||
// an invalid magic header. We fall back to the CboRoaringBitmap version.
|
||||
Err(e) if e.kind() == ErrorKind::Other => {
|
||||
CboRoaringBitmapCodec::deserialize_from(input)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge serialized DeCboRoaringBitmaps in a buffer.
|
||||
///
|
||||
/// If the merged values length is under the threshold, values are directly
|
||||
/// serialized in the buffer else a delta-encoded list of integers is created
|
||||
/// from the values and is serialized in the buffer.
|
||||
pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = A>,
|
||||
A: AsRef<[u8]>,
|
||||
{
|
||||
let mut roaring = RoaringBitmap::new();
|
||||
let mut vec = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
|
||||
for bytes in slices {
|
||||
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes.as_ref()) {
|
||||
let mut reader = bytes.as_ref();
|
||||
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
||||
vec.push(integer);
|
||||
}
|
||||
} else {
|
||||
roaring |= DeCboRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
|
||||
bytes.as_ref(),
|
||||
&mut tmp_buffer,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
roaring.extend(vec);
|
||||
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&roaring, buffer, &mut tmp_buffer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Do an intersection directly with a serialized delta-encoded bitmap.
|
||||
///
|
||||
/// When doing the intersection we only need to deserialize the necessary
|
||||
/// bitmap containers and avoid a lot of unnecessary allocations. We do
|
||||
/// that by skipping entire delta-encoded blocks when possible to avoid
|
||||
/// storing them in the bitmap we use for the final intersection.
|
||||
pub fn intersection_with_serialized(
|
||||
bytes: &[u8],
|
||||
other: &RoaringBitmap,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes) {
|
||||
return CboRoaringBitmapCodec::intersection_with_serialized(bytes, other);
|
||||
}
|
||||
|
||||
// TODO move this tmp buffer outside
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let filter_block = |first, last| other.range_cardinality(first..=last) == 0;
|
||||
|
||||
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(
|
||||
bytes,
|
||||
filter_block,
|
||||
&mut tmp_buffer,
|
||||
) {
|
||||
Ok(bitmap) => Ok(bitmap & other),
|
||||
// If the error kind is Other it means that the delta-decoder found
|
||||
// an invalid magic header. We fall back to the CboRoaringBitmap version.
|
||||
Err(e) if e.kind() == ErrorKind::Other => {
|
||||
other.intersection_with_serialized_unchecked(Cursor::new(bytes))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn merge_deladd_into<'a>(
|
||||
deladd: &KvReaderDelAdd,
|
||||
previous: &[u8],
|
||||
buffer: &'a mut Vec<u8>,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<Option<&'a [u8]>> {
|
||||
// Deserialize the bitmap that is already there
|
||||
let mut previous = Self::deserialize_from_with_tmp_buffer(previous, tmp_buffer)?;
|
||||
|
||||
// Remove integers we no more want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Deletion) {
|
||||
previous -= Self::deserialize_from_with_tmp_buffer(value, tmp_buffer)?;
|
||||
}
|
||||
|
||||
// Insert the new integers we want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Addition) {
|
||||
previous |= Self::deserialize_from_with_tmp_buffer(value, tmp_buffer)?;
|
||||
}
|
||||
|
||||
if previous.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Self::serialize_into_with_tmp_buffer(&previous, buffer, tmp_buffer)?;
|
||||
|
||||
Ok(Some(&buffer[..]))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for DeCboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for DeCboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for DeCboRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let capacity = Self::serialized_size_with_tmp_buffer(item, &mut tmp_buffer);
|
||||
let mut output = Vec::with_capacity(capacity);
|
||||
Self::serialize_into_with_tmp_buffer(item, &mut output, &mut tmp_buffer)?;
|
||||
Ok(Cow::Owned(output))
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages the global status of the delta encoding.
|
||||
///
|
||||
/// Whether we must use delta encoding or not when encoding roaring bitmaps.
|
||||
#[derive(Default)]
|
||||
pub struct DeltaEncodingStatusLock(OnceLock<DeltaEncodingStatus>);
|
||||
|
||||
impl DeltaEncodingStatusLock {
|
||||
pub const fn new() -> Self {
|
||||
Self(OnceLock::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
enum DeltaEncodingStatus {
|
||||
Enabled,
|
||||
#[default]
|
||||
Disabled,
|
||||
}
|
||||
|
||||
impl DeltaEncodingStatusLock {
|
||||
pub fn set_to_enabled(&self) -> Result<(), ()> {
|
||||
self.0.set(DeltaEncodingStatus::Enabled).map_err(drop)
|
||||
}
|
||||
|
||||
pub fn set_to_disabled(&self) -> Result<(), ()> {
|
||||
self.0.set(DeltaEncodingStatus::Disabled).map_err(drop)
|
||||
}
|
||||
|
||||
pub fn is_enabled(&self) -> bool {
|
||||
matches!(self.0.get(), Some(DeltaEncodingStatus::Enabled))
|
||||
}
|
||||
|
||||
pub fn is_disabled(&self) -> bool {
|
||||
!self.is_enabled()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use byteorder::WriteBytesExt as _;
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use quickcheck::quickcheck;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::super::super::roaring_bitmap_length::DeCboRoaringBitmapLenCodec;
|
||||
use super::super::THRESHOLD;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn verify_encoding_decoding() {
|
||||
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||
let bytes = DeCboRoaringBitmapCodec::bytes_encode(&input).unwrap();
|
||||
let output = DeCboRoaringBitmapCodec::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!(input, output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_threshold() {
|
||||
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||
|
||||
// use roaring bitmap
|
||||
let mut bytes = Vec::new();
|
||||
input.serialize_into(&mut bytes).unwrap();
|
||||
let roaring_size = bytes.len();
|
||||
|
||||
// use byteorder directly
|
||||
let mut bytes = Vec::new();
|
||||
for integer in input {
|
||||
bytes.write_u32::<NativeEndian>(integer).unwrap();
|
||||
}
|
||||
let bo_size = bytes.len();
|
||||
|
||||
assert!(roaring_size > bo_size);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_de_cbo_roaring_bitmaps() {
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
let small_data = [
|
||||
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(4..6).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(1..3).unwrap(),
|
||||
];
|
||||
|
||||
let small_data: Vec<_> =
|
||||
small_data.iter().map(|b| DeCboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||
DeCboRoaringBitmapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
|
||||
let bitmap = DeCboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||
let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
|
||||
let medium_data = [
|
||||
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(4..8).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(0..3).unwrap(),
|
||||
RoaringBitmap::from_sorted_iter(7..23).unwrap(),
|
||||
];
|
||||
|
||||
let medium_data: Vec<_> =
|
||||
medium_data.iter().map(|b| DeCboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||
buffer.clear();
|
||||
DeCboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
|
||||
|
||||
let bitmap = DeCboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||
let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
}
|
||||
|
||||
quickcheck! {
|
||||
fn qc_random(xs: Vec<u32>) -> bool {
|
||||
let bitmap = RoaringBitmap::from_iter(xs);
|
||||
let mut compressed = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
|
||||
let length = DeCboRoaringBitmapLenCodec::bytes_decode(&compressed[..]).unwrap();
|
||||
let decompressed = DeCboRoaringBitmapCodec::deserialize_from_with_tmp_buffer(&compressed[..], &mut tmp_buffer).unwrap();
|
||||
length == bitmap.len() && decompressed == bitmap
|
||||
}
|
||||
}
|
||||
|
||||
quickcheck! {
|
||||
fn qc_random_check_serialized_size(xs: Vec<u32>) -> bool {
|
||||
let bitmap = RoaringBitmap::from_iter(xs);
|
||||
let mut compressed = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
|
||||
let length = DeCboRoaringBitmapLenCodec::bytes_decode(&compressed).unwrap();
|
||||
let expected_len = DeCboRoaringBitmapCodec::serialized_size_with_tmp_buffer(&bitmap, &mut tmp_buffer);
|
||||
length == bitmap.len() && compressed.len() == expected_len
|
||||
}
|
||||
}
|
||||
|
||||
quickcheck! {
|
||||
fn qc_random_intersection_with_serialized(lhs: Vec<u32>, rhs: Vec<u32>) -> bool {
|
||||
let mut compressed = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
|
||||
let lhs = RoaringBitmap::from_iter(lhs);
|
||||
let rhs = RoaringBitmap::from_iter(rhs);
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&lhs, &mut compressed, &mut tmp_buffer).unwrap();
|
||||
|
||||
let intersection = DeCboRoaringBitmapCodec::intersection_with_serialized(&compressed, &rhs).unwrap();
|
||||
let expected_intersection = lhs & rhs;
|
||||
|
||||
intersection == expected_intersection
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,474 @@
|
||||
use std::io::{self, ErrorKind};
|
||||
use std::mem::{self, size_of, size_of_val};
|
||||
|
||||
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
/// The magic header for our custom encoding format
|
||||
const MAGIC_HEADER: u16 = 36869;
|
||||
|
||||
pub struct DeRoaringBitmapCodec;
|
||||
|
||||
// TODO reintroduce:
|
||||
// - serialized_size?
|
||||
// - serialize_into_vec
|
||||
// - intersection_with_serialized
|
||||
// - merge_into
|
||||
// - merge_deladd_into
|
||||
impl DeRoaringBitmapCodec {
|
||||
/// Returns the serialized size of the given roaring bitmap with the delta encoding format.
|
||||
pub fn serialized_size_with_tmp_buffer(
|
||||
bitmap: &RoaringBitmap,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> usize {
|
||||
let mut size = 2; // u16 magic header
|
||||
|
||||
let bitpacker8x = BitPacker8x::new();
|
||||
let bitpacker4x = BitPacker4x::new();
|
||||
let bitpacker1x = BitPacker1x::new();
|
||||
|
||||
// This temporary buffer is used to store each chunk of decompressed u32s.
|
||||
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
|
||||
let decompressed = &mut tmp_buffer[..];
|
||||
|
||||
let mut buffer_index = 0;
|
||||
let mut initial = None;
|
||||
// We initially collect all the integers into a flat buffer of the size
|
||||
// of the largest bitpacker. We encode them with it until we don't have
|
||||
// enough of them...
|
||||
for n in bitmap {
|
||||
decompressed[buffer_index] = n;
|
||||
buffer_index += 1;
|
||||
if buffer_index == BitPacker8x::BLOCK_LEN {
|
||||
let num_bits = bitpacker8x.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = BitPacker8x::compressed_block_size(num_bits);
|
||||
size += 1; // u8 chunk header
|
||||
size += compressed_len; // compressed data length
|
||||
initial = Some(n);
|
||||
buffer_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
|
||||
let decompressed = &decompressed[..buffer_index];
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let num_bits = bitpacker4x.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = BitPacker4x::compressed_block_size(num_bits);
|
||||
size += 1; // u8 chunk header
|
||||
size += compressed_len; // compressed data length
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...And so on...
|
||||
let decompressed = chunks.remainder();
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let num_bits = bitpacker1x.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = BitPacker1x::compressed_block_size(num_bits);
|
||||
size += 1; // u8 chunk header
|
||||
size += compressed_len; // compressed data length
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...Until we don't have any small enough bitpacker. We put them raw
|
||||
// at the end of out buffer with a header indicating the matter.
|
||||
let decompressed = chunks.remainder();
|
||||
if !decompressed.is_empty() {
|
||||
size += 1; // u8 chunk header
|
||||
size += mem::size_of_val(decompressed); // remaining uncompressed u32s
|
||||
}
|
||||
|
||||
size
|
||||
}
|
||||
|
||||
/// Writes the delta-encoded compressed version of the given roaring bitmap
|
||||
/// into the provided writer. Accepts a buffer to avoid allocating one.
|
||||
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
|
||||
bitmap: &RoaringBitmap,
|
||||
mut writer: W,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<()> {
|
||||
// Insert the magic header
|
||||
writer.write_all(&MAGIC_HEADER.to_ne_bytes())?;
|
||||
|
||||
let bitpacker8x = BitPacker8x::new();
|
||||
let bitpacker4x = BitPacker4x::new();
|
||||
let bitpacker1x = BitPacker1x::new();
|
||||
|
||||
// This temporary buffer is used to store each chunk of decompressed and
|
||||
// compressed and delta-encoded u32s. We need room for the decompressed
|
||||
// u32s coming from the roaring bitmap, the compressed output that can
|
||||
// be as large as the decompressed u32s, and the chunk header.
|
||||
tmp_buffer.resize((BitPacker8x::BLOCK_LEN * 2) + 1, 0u32);
|
||||
let (decompressed, compressed) = tmp_buffer.split_at_mut(BitPacker8x::BLOCK_LEN);
|
||||
let compressed = bytemuck::cast_slice_mut(compressed);
|
||||
|
||||
let mut buffer_index = 0;
|
||||
let mut initial = None;
|
||||
// We initially collect all the integers into a flat buffer of the size
|
||||
// of the largest bitpacker. We encode them with it until we don't have
|
||||
// enough of them...
|
||||
for n in bitmap {
|
||||
decompressed[buffer_index] = n;
|
||||
buffer_index += 1;
|
||||
if buffer_index == BitPacker8x::BLOCK_LEN {
|
||||
let output = encode_with_packer(&bitpacker8x, decompressed, initial, compressed);
|
||||
writer.write_all(output)?;
|
||||
initial = Some(n);
|
||||
buffer_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
|
||||
let decompressed = &decompressed[..buffer_index];
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let output = encode_with_packer(&bitpacker4x, decompressed, initial, compressed);
|
||||
writer.write_all(output)?;
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...And so on...
|
||||
let decompressed = chunks.remainder();
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let output = encode_with_packer(&bitpacker1x, decompressed, initial, compressed);
|
||||
writer.write_all(output)?;
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...Until we don't have any small enough bitpacker. We put them raw
|
||||
// at the end of out buffer with a header indicating the matter.
|
||||
let decompressed = chunks.remainder();
|
||||
if !decompressed.is_empty() {
|
||||
let header = encode_chunk_header(BitPackerLevel::None, u32::BITS as u8);
|
||||
// Note: Not convinced about the performance of writing a single
|
||||
// byte followed by a larger write. However, we will use this
|
||||
// codec with a BufWriter or directly with a Vec of bytes.
|
||||
writer.write_all(&[header])?;
|
||||
writer.write_all(bytemuck::cast_slice(decompressed))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
|
||||
///
|
||||
/// The `filter_block` function is used to filter out blocks. It takes the first
|
||||
/// and last u32 values of a block and returns `true` if the block must be kept.
|
||||
pub fn deserialize_from_with_tmp_buffer<F>(
|
||||
input: &[u8],
|
||||
filter_block: F,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<RoaringBitmap>
|
||||
where
|
||||
F: Fn(u32, u32) -> bool,
|
||||
{
|
||||
let Some((header, mut compressed)) = input.split_at_checked(size_of_val(&MAGIC_HEADER))
|
||||
else {
|
||||
return Err(io::Error::new(ErrorKind::UnexpectedEof, "expecting a two-bytes header"));
|
||||
};
|
||||
|
||||
// Safety: This unwrap cannot happen as the header buffer is the right size
|
||||
let header = u16::from_ne_bytes(header.try_into().unwrap());
|
||||
|
||||
if header != MAGIC_HEADER {
|
||||
return Err(io::Error::other("invalid header value"));
|
||||
}
|
||||
|
||||
let bitpacker8x = BitPacker8x::new();
|
||||
let bitpacker4x = BitPacker4x::new();
|
||||
let bitpacker1x = BitPacker1x::new();
|
||||
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
|
||||
let decompressed = &mut tmp_buffer[..];
|
||||
let mut initial = None;
|
||||
|
||||
while let Some((&chunk_header, encoded)) = compressed.split_first() {
|
||||
let (level, num_bits) = decode_chunk_header(chunk_header);
|
||||
let (bytes_read, decompressed) = match level {
|
||||
BitPackerLevel::None => {
|
||||
if num_bits != u32::BITS as u8 {
|
||||
return Err(io::Error::new(
|
||||
ErrorKind::InvalidData,
|
||||
"invalid number of bits to encode non-compressed u32s",
|
||||
));
|
||||
}
|
||||
|
||||
let chunks = encoded.chunks_exact(size_of::<u32>());
|
||||
if !chunks.remainder().is_empty() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"expecting last chunk to be a multiple of the size of an u32",
|
||||
));
|
||||
}
|
||||
|
||||
let integers = chunks
|
||||
// safety: This unwrap cannot happen as
|
||||
// the size of u32 is set correctly.
|
||||
.map(|b| b.try_into().unwrap())
|
||||
.map(u32::from_ne_bytes);
|
||||
|
||||
if let Some((first, last)) =
|
||||
integers.clone().next().zip(integers.clone().next_back())
|
||||
{
|
||||
if !(filter_block)(first, last) {
|
||||
bitmap
|
||||
.append(integers)
|
||||
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?;
|
||||
}
|
||||
}
|
||||
|
||||
// This is basically always the last chunk that exists in
|
||||
// this delta-encoded format as the raw u32s are appended
|
||||
// when there is not enough of them to fit in a bitpacker.
|
||||
break;
|
||||
}
|
||||
BitPackerLevel::BitPacker1x => {
|
||||
decode_with_packer(&bitpacker1x, decompressed, initial, encoded, num_bits)
|
||||
}
|
||||
BitPackerLevel::BitPacker4x => {
|
||||
decode_with_packer(&bitpacker4x, decompressed, initial, encoded, num_bits)
|
||||
}
|
||||
BitPackerLevel::BitPacker8x => {
|
||||
decode_with_packer(&bitpacker8x, decompressed, initial, encoded, num_bits)
|
||||
}
|
||||
};
|
||||
|
||||
initial = decompressed.iter().last().copied();
|
||||
if let Some((first, last)) = decompressed.first().copied().zip(initial) {
|
||||
if !(filter_block)(first, last) {
|
||||
// TODO investigate perf
|
||||
// Safety: Bitpackers cannot output unsorter integers when
|
||||
// used with the compress_strictly_sorted function.
|
||||
bitmap.append(decompressed.iter().copied()).unwrap();
|
||||
}
|
||||
}
|
||||
// What the delta-decoding read plus the chunk header size
|
||||
compressed = &compressed[bytes_read + 1..];
|
||||
}
|
||||
|
||||
Ok(bitmap)
|
||||
}
|
||||
|
||||
/// Returns the length of the serialized DeRoaringBitmap.
|
||||
pub fn deserialize_length_from(input: &[u8]) -> io::Result<u64> {
|
||||
let Some((header, mut compressed)) = input.split_at_checked(size_of_val(&MAGIC_HEADER))
|
||||
else {
|
||||
return Err(io::Error::new(ErrorKind::UnexpectedEof, "expecting a two-bytes header"));
|
||||
};
|
||||
|
||||
// Safety: This unwrap cannot happen as the header buffer is the right size
|
||||
let header = u16::from_ne_bytes(header.try_into().unwrap());
|
||||
|
||||
if header != MAGIC_HEADER {
|
||||
return Err(io::Error::other("invalid header value"));
|
||||
}
|
||||
|
||||
let mut length = 0;
|
||||
while let Some((&chunk_header, encoded)) = compressed.split_first() {
|
||||
let (level, num_bits) = decode_chunk_header(chunk_header);
|
||||
let bytes_read = match level {
|
||||
BitPackerLevel::None => {
|
||||
if num_bits != u32::BITS as u8 {
|
||||
return Err(io::Error::new(
|
||||
ErrorKind::InvalidData,
|
||||
"invalid number of bits to encode non-compressed u32s",
|
||||
));
|
||||
}
|
||||
|
||||
let chunks = encoded.chunks_exact(size_of::<u32>());
|
||||
if !chunks.remainder().is_empty() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"expecting last chunk to be a multiple of the size of an u32",
|
||||
));
|
||||
}
|
||||
|
||||
// This call is optimized for performance
|
||||
// and will not iterate over the chunks.
|
||||
length += chunks.count() as u64;
|
||||
|
||||
// This is basically always the last chunk that exists in
|
||||
// this delta-encoded format as the raw u32s are appended
|
||||
// when there is not enough of them to fit in a bitpacker.
|
||||
break;
|
||||
}
|
||||
BitPackerLevel::BitPacker1x => {
|
||||
length += BitPacker1x::BLOCK_LEN as u64;
|
||||
BitPacker1x::compressed_block_size(num_bits)
|
||||
}
|
||||
BitPackerLevel::BitPacker4x => {
|
||||
length += BitPacker4x::BLOCK_LEN as u64;
|
||||
BitPacker4x::compressed_block_size(num_bits)
|
||||
}
|
||||
BitPackerLevel::BitPacker8x => {
|
||||
length += BitPacker8x::BLOCK_LEN as u64;
|
||||
BitPacker8x::compressed_block_size(num_bits)
|
||||
}
|
||||
};
|
||||
|
||||
// What the delta-decoding read plus the chunk header size
|
||||
compressed = &compressed[bytes_read + 1..];
|
||||
}
|
||||
|
||||
Ok(length)
|
||||
}
|
||||
}
|
||||
|
||||
/// A utility function to take all blocks.
|
||||
pub fn take_all_blocks(_first: u32, _last: u32) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Takes a strickly sorted list of u32s and outputs delta-encoded
|
||||
/// bytes with a chunk header. We expect the output buffer to be
|
||||
/// at least BLOCK_LEN + 1.
|
||||
fn encode_with_packer<'c, B: BitPackerExt>(
|
||||
bitpacker: &B,
|
||||
decompressed: &[u32],
|
||||
initial: Option<u32>,
|
||||
output: &'c mut [u8],
|
||||
) -> &'c [u8] {
|
||||
let num_bits = bitpacker.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = B::compressed_block_size(num_bits);
|
||||
let chunk_header = encode_chunk_header(B::level(), num_bits);
|
||||
let buffer = &mut output[..compressed_len + 1];
|
||||
// Safety: The buffer is at least one byte
|
||||
let (header_in_buffer, encoded) = buffer.split_first_mut().unwrap();
|
||||
*header_in_buffer = chunk_header;
|
||||
bitpacker.compress_strictly_sorted(initial, decompressed, encoded, num_bits);
|
||||
buffer
|
||||
}
|
||||
|
||||
/// Returns the number of bytes read and the decoded unsigned integers.
|
||||
fn decode_with_packer<'d, B: BitPacker>(
|
||||
bitpacker: &B,
|
||||
decompressed: &'d mut [u32],
|
||||
initial: Option<u32>,
|
||||
compressed: &[u8],
|
||||
num_bits: u8,
|
||||
) -> (usize, &'d [u32]) {
|
||||
let decompressed = &mut decompressed[..B::BLOCK_LEN];
|
||||
let read = bitpacker.decompress_strictly_sorted(initial, compressed, decompressed, num_bits);
|
||||
(read, decompressed)
|
||||
}
|
||||
|
||||
/// An identifier for the bitpacker to be able
|
||||
/// to correctly decode the compressed integers.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
enum BitPackerLevel {
|
||||
/// The remaining bytes are raw little endian encoded u32s.
|
||||
None,
|
||||
/// The remaining bits are encoded using a `BitPacker1x`.
|
||||
BitPacker1x,
|
||||
/// The remaining bits are encoded using a `BitPacker4x`.
|
||||
BitPacker4x,
|
||||
/// The remaining bits are encoded using a `BitPacker8x`.
|
||||
BitPacker8x,
|
||||
}
|
||||
|
||||
/// Returns the chunk header based on the bitpacker level
|
||||
/// and the number of bits to encode the list of integers.
|
||||
fn encode_chunk_header(level: BitPackerLevel, num_bits: u8) -> u8 {
|
||||
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
|
||||
let level = level as u8;
|
||||
debug_assert!(level <= 3);
|
||||
num_bits | (level << 6)
|
||||
}
|
||||
|
||||
/// Decodes the chunk header and output the bitpacker level
|
||||
/// and the number of bits to decode the following bytes.
|
||||
fn decode_chunk_header(data: u8) -> (BitPackerLevel, u8) {
|
||||
let num_bits = data & 0b00111111;
|
||||
let level = match data >> 6 {
|
||||
0 => BitPackerLevel::None,
|
||||
1 => BitPackerLevel::BitPacker1x,
|
||||
2 => BitPackerLevel::BitPacker4x,
|
||||
3 => BitPackerLevel::BitPacker8x,
|
||||
invalid => panic!("Invalid bitpacker level: {invalid}"),
|
||||
};
|
||||
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
|
||||
(level, num_bits)
|
||||
}
|
||||
|
||||
/// A simple helper trait to get the BitPackerLevel
|
||||
/// and correctly generate the chunk header.
|
||||
trait BitPackerExt: BitPacker {
|
||||
/// Returns the level of the bitpacker: an identifier to be
|
||||
/// able to decode the numbers with the right bitpacker.
|
||||
fn level() -> BitPackerLevel;
|
||||
}
|
||||
|
||||
impl BitPackerExt for BitPacker8x {
|
||||
fn level() -> BitPackerLevel {
|
||||
BitPackerLevel::BitPacker8x
|
||||
}
|
||||
}
|
||||
|
||||
impl BitPackerExt for BitPacker4x {
|
||||
fn level() -> BitPackerLevel {
|
||||
BitPackerLevel::BitPacker4x
|
||||
}
|
||||
}
|
||||
|
||||
impl BitPackerExt for BitPacker1x {
|
||||
fn level() -> BitPackerLevel {
|
||||
BitPackerLevel::BitPacker1x
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use quickcheck::quickcheck;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{take_all_blocks, DeRoaringBitmapCodec};
|
||||
|
||||
quickcheck! {
|
||||
fn qc_random(xs: Vec<u32>) -> bool {
|
||||
let bitmap = RoaringBitmap::from_iter(xs);
|
||||
let mut compressed = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
|
||||
let length = DeRoaringBitmapCodec::deserialize_length_from(&compressed[..]).unwrap();
|
||||
let decompressed = DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(&compressed[..], take_all_blocks, &mut tmp_buffer).unwrap();
|
||||
length == bitmap.len() && decompressed == bitmap
|
||||
}
|
||||
}
|
||||
|
||||
quickcheck! {
|
||||
fn qc_random_check_serialized_size(xs: Vec<u32>) -> bool {
|
||||
let bitmap = RoaringBitmap::from_iter(xs);
|
||||
let mut compressed = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(&bitmap, &mut compressed, &mut tmp_buffer).unwrap();
|
||||
let length = DeRoaringBitmapCodec::deserialize_length_from(&compressed).unwrap();
|
||||
let expected_len = DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(&bitmap, &mut tmp_buffer);
|
||||
length == bitmap.len() && compressed.len() == expected_len
|
||||
}
|
||||
}
|
||||
|
||||
quickcheck! {
|
||||
fn qc_random_intersection_with_serialized(lhs: Vec<u32>, rhs: Vec<u32>) -> bool {
|
||||
let mut compressed = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
|
||||
let lhs = RoaringBitmap::from_iter(lhs);
|
||||
let rhs = RoaringBitmap::from_iter(rhs);
|
||||
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(&lhs, &mut compressed, &mut tmp_buffer).unwrap();
|
||||
|
||||
let sub_lhs = DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(&compressed, |first, last| {
|
||||
rhs.range_cardinality(first..=last) == 0
|
||||
}, &mut tmp_buffer).unwrap();
|
||||
|
||||
let intersection = sub_lhs & rhs.clone();
|
||||
let expected_intersection = lhs & rhs;
|
||||
|
||||
intersection == expected_intersection
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
mod bo_roaring_bitmap_codec;
|
||||
pub mod cbo_roaring_bitmap_codec;
|
||||
pub(super) mod cbo_roaring_bitmap_codec;
|
||||
mod de_cbo_roaring_bitmap_codec;
|
||||
pub(super) mod de_roaring_bitmap_codec;
|
||||
mod roaring_bitmap_codec;
|
||||
|
||||
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
|
||||
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
pub use self::cbo_roaring_bitmap_codec::THRESHOLD;
|
||||
pub use self::de_cbo_roaring_bitmap_codec::{DeCboRoaringBitmapCodec, DELTA_ENCODING_STATUS};
|
||||
pub use self::de_roaring_bitmap_codec::take_all_blocks;
|
||||
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::mem;
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
|
||||
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
|
||||
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
|
||||
use crate::heed_codec::roaring_bitmap::THRESHOLD;
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
pub struct CboRoaringBitmapLenCodec;
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
|
||||
use super::BoRoaringBitmapLenCodec;
|
||||
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
use crate::heed_codec::roaring_bitmap::de_roaring_bitmap_codec::DeRoaringBitmapCodec;
|
||||
use crate::heed_codec::roaring_bitmap_length::CboRoaringBitmapLenCodec;
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
pub struct DeCboRoaringBitmapLenCodec;
|
||||
|
||||
impl BytesDecode<'_> for DeCboRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(bytes) {
|
||||
// If there is threshold or less than threshold integers that can fit
|
||||
// into this array of bytes it means that we used the ByteOrder codec
|
||||
// serializer.
|
||||
BoRoaringBitmapLenCodec::bytes_decode(bytes)
|
||||
} else {
|
||||
match DeRoaringBitmapCodec::deserialize_length_from(bytes) {
|
||||
Ok(bitmap) => Ok(bitmap),
|
||||
// If the error kind is Other it means that the delta-decoder found
|
||||
// an invalid magic header. We fall back to the CboRoaringBitmap version.
|
||||
Err(e) if e.kind() == ErrorKind::Other => {
|
||||
CboRoaringBitmapLenCodec::bytes_decode(bytes)
|
||||
}
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for DeCboRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
mod bo_roaring_bitmap_len_codec;
|
||||
mod cbo_roaring_bitmap_len_codec;
|
||||
mod de_cbo_roaring_bitmap_len_codec;
|
||||
mod roaring_bitmap_len_codec;
|
||||
|
||||
pub use self::bo_roaring_bitmap_len_codec::BoRoaringBitmapLenCodec;
|
||||
pub use self::cbo_roaring_bitmap_len_codec::CboRoaringBitmapLenCodec;
|
||||
pub use self::roaring_bitmap_len_codec::RoaringBitmapLenCodec;
|
||||
use self::bo_roaring_bitmap_len_codec::BoRoaringBitmapLenCodec;
|
||||
use self::cbo_roaring_bitmap_len_codec::CboRoaringBitmapLenCodec;
|
||||
pub use self::de_cbo_roaring_bitmap_len_codec::DeCboRoaringBitmapLenCodec;
|
||||
use self::roaring_bitmap_len_codec::RoaringBitmapLenCodec;
|
||||
|
||||
@@ -72,16 +72,15 @@ impl BytesDecodeOwned for RoaringBitmapLenCodec {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use heed::BytesEncode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::*;
|
||||
use crate::heed_codec::RoaringBitmapCodec;
|
||||
|
||||
#[test]
|
||||
fn deserialize_roaring_bitmap_length() {
|
||||
let bitmap: RoaringBitmap = (0..500).chain(800..800_000).chain(920_056..930_032).collect();
|
||||
let bytes = RoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
|
||||
let mut bytes = Vec::new();
|
||||
bitmap.serialize_into(&mut bytes).unwrap();
|
||||
let len = RoaringBitmapLenCodec::deserialize_from_slice(&bytes).unwrap();
|
||||
assert_eq!(bitmap.len(), len);
|
||||
}
|
||||
|
||||
@@ -28,18 +28,16 @@ use crate::heed_codec::facet::{
|
||||
use crate::heed_codec::version::VersionCodec;
|
||||
use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::progress::Progress;
|
||||
use crate::prompt::PromptData;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::new::StdResult;
|
||||
use crate::vector::db::IndexEmbeddingConfigs;
|
||||
use crate::vector::{Embedding, VectorStore, VectorStoreBackend, VectorStoreStats};
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||
FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec,
|
||||
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32,
|
||||
BEU64,
|
||||
default_criteria, Criterion, DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec, DocumentId,
|
||||
ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry,
|
||||
FieldIdWordCountCodec, FieldidsWeightsMap, FilterableAttributesRule, GeoPoint,
|
||||
LocalizedAttributesRule, ObkvCodec, Result, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
@@ -134,38 +132,38 @@ pub struct Index {
|
||||
pub external_documents_ids: Database<Str, BEU32>,
|
||||
|
||||
/// A word and all the documents ids containing the word.
|
||||
pub word_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
pub word_docids: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
|
||||
/// A word and all the documents ids containing the word, from attributes for which typos are not allowed.
|
||||
pub exact_word_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
pub exact_word_docids: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
|
||||
/// A prefix of word and all the documents ids containing this prefix.
|
||||
pub word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
pub word_prefix_docids: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
|
||||
/// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed.
|
||||
pub exact_word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
pub exact_word_prefix_docids: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the proximity between a pair of words with all the docids where this relation appears.
|
||||
pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
|
||||
pub word_pair_proximity_docids: Database<U8StrStrCodec, DeCboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the word and the position with the docids that corresponds to it.
|
||||
pub word_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
pub word_position_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
|
||||
/// Maps the word and the field id with the docids that corresponds to it.
|
||||
pub word_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
pub word_fid_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the field id and the word count with the docids that corresponds to it.
|
||||
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
|
||||
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, DeCboRoaringBitmapCodec>,
|
||||
/// Maps the word prefix and a position with all the docids where the prefix appears at the position.
|
||||
pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
pub word_prefix_position_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
|
||||
/// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
|
||||
pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
pub word_prefix_fid_docids: Database<StrBEU16Codec, DeCboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and the docids for which this field exists
|
||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
pub facet_id_exists_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the docids for which this field is set as null
|
||||
pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
pub facet_id_is_null_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the docids for which this field is considered empty
|
||||
pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
pub facet_id_is_empty_docids: Database<FieldIdCodec, DeCboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
@@ -506,7 +504,7 @@ impl Index {
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
docids: &RoaringBitmap,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, RoaringBitmapCodec>().put(
|
||||
self.main.remap_types::<Str, DeCboRoaringBitmapCodec>().put(
|
||||
wtxn,
|
||||
main_key::DOCUMENTS_IDS_KEY,
|
||||
docids,
|
||||
@@ -517,7 +515,7 @@ impl Index {
|
||||
pub fn documents_ids(&self, rtxn: &RoTxn<'_>) -> heed::Result<RoaringBitmap> {
|
||||
Ok(self
|
||||
.main
|
||||
.remap_types::<Str, RoaringBitmapCodec>()
|
||||
.remap_types::<Str, DeCboRoaringBitmapCodec>()
|
||||
.get(rtxn, main_key::DOCUMENTS_IDS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
@@ -526,7 +524,7 @@ impl Index {
|
||||
pub fn number_of_documents(&self, rtxn: &RoTxn<'_>) -> Result<u64> {
|
||||
let count = self
|
||||
.main
|
||||
.remap_types::<Str, RoaringBitmapLenCodec>()
|
||||
.remap_types::<Str, DeCboRoaringBitmapLenCodec>()
|
||||
.get(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
|
||||
Ok(count.unwrap_or_default())
|
||||
}
|
||||
@@ -600,13 +598,6 @@ impl Index {
|
||||
/// Returns the fields ids map which associate the documents keys with an internal field id
|
||||
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
|
||||
pub fn fields_ids_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<FieldsIdsMap> {
|
||||
let map = self.fields_ids_map_with_metadata(rtxn).unwrap();
|
||||
eprintln!(
|
||||
"fields_ids_map: {:?}",
|
||||
map.iter_id_metadata()
|
||||
.map(|(id, metadata)| (id, map.name(id).unwrap(), metadata))
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
Ok(self
|
||||
.main
|
||||
.remap_types::<Str, SerdeJson<FieldsIdsMap>>()
|
||||
@@ -619,10 +610,7 @@ impl Index {
|
||||
/// This structure is not yet stored in the index, and is generated on the fly.
|
||||
pub fn fields_ids_map_with_metadata(&self, rtxn: &RoTxn<'_>) -> Result<FieldIdMapWithMetadata> {
|
||||
Ok(FieldIdMapWithMetadata::new(
|
||||
self.main
|
||||
.remap_types::<Str, SerdeJson<FieldsIdsMap>>()
|
||||
.get(rtxn, main_key::FIELDS_IDS_MAP_KEY)?
|
||||
.unwrap_or_default(),
|
||||
self.fields_ids_map(rtxn)?,
|
||||
MetadataBuilder::from_index(self, rtxn)?,
|
||||
))
|
||||
}
|
||||
@@ -737,7 +725,7 @@ impl Index {
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
docids: &RoaringBitmap,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, RoaringBitmapCodec>().put(
|
||||
self.main.remap_types::<Str, DeCboRoaringBitmapCodec>().put(
|
||||
wtxn,
|
||||
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
|
||||
docids,
|
||||
@@ -756,7 +744,7 @@ impl Index {
|
||||
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn<'_>) -> heed::Result<RoaringBitmap> {
|
||||
match self
|
||||
.main
|
||||
.remap_types::<Str, RoaringBitmapCodec>()
|
||||
.remap_types::<Str, DeCboRoaringBitmapCodec>()
|
||||
.get(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
|
||||
{
|
||||
Some(docids) => Ok(docids),
|
||||
@@ -1409,7 +1397,7 @@ impl Index {
|
||||
/// Returns the number of documents ids associated with the given word,
|
||||
/// it is much faster than deserializing the bitmap and getting the length of it.
|
||||
pub fn word_documents_count(&self, rtxn: &RoTxn<'_>, word: &str) -> heed::Result<Option<u64>> {
|
||||
self.word_docids.remap_data_type::<RoaringBitmapLenCodec>().get(rtxn, word)
|
||||
self.word_docids.remap_data_type::<DeCboRoaringBitmapLenCodec>().get(rtxn, word)
|
||||
}
|
||||
|
||||
/* documents */
|
||||
@@ -1488,8 +1476,8 @@ impl Index {
|
||||
FacetDistribution::new(rtxn, self)
|
||||
}
|
||||
|
||||
pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>, progress: &'a Progress) -> Search<'a> {
|
||||
Search::new(rtxn, self, progress)
|
||||
pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>) -> Search<'a> {
|
||||
Search::new(rtxn, self)
|
||||
}
|
||||
|
||||
/// Returns the index creation time.
|
||||
|
||||
@@ -72,16 +72,14 @@ pub use self::filterable_attributes_rules::{
|
||||
FilterableAttributesRule,
|
||||
};
|
||||
pub use self::heed_codec::{
|
||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
||||
RoaringBitmapCodec, RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec,
|
||||
BEU16StrCodec, BEU32StrCodec, DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec,
|
||||
FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec, StrBEU32Codec, U8StrStrCodec,
|
||||
UncheckedU8StrStrCodec,
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::localized_attributes_rules::LocalizedAttributesRule;
|
||||
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
|
||||
pub use self::search::similar::Similar;
|
||||
pub use self::search::steps::{FederatingResultsStep, SearchStep, TotalProcessingTimeStep};
|
||||
pub use self::search::{
|
||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
|
||||
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
|
||||
@@ -64,30 +64,6 @@ impl Progress {
|
||||
steps.push((step_type, Box::new(sub_progress), now));
|
||||
}
|
||||
|
||||
/// End a step that has been started without having to start a new step.
|
||||
fn end_progress_step<P: Step>(&self, sub_progress: P) {
|
||||
let mut inner = self.steps.write().unwrap();
|
||||
let InnerProgress { steps, durations } = &mut *inner;
|
||||
|
||||
let now = Instant::now();
|
||||
let step_type = TypeId::of::<P>();
|
||||
debug_assert!(
|
||||
steps.iter().any(|(id, s, _)| *id == step_type && s.name() == sub_progress.name()),
|
||||
"Step `{}` must have been started",
|
||||
sub_progress.name()
|
||||
);
|
||||
if let Some(idx) = steps.iter().position(|(id, _, _)| *id == step_type) {
|
||||
push_steps_durations(steps, durations, now, idx);
|
||||
steps.truncate(idx);
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the progress and return a scoped progress step that will end the progress step when dropped.
|
||||
pub fn update_progress_scoped<P: Step + Copy>(&self, step: P) -> ScopedProgressStep<'_, P> {
|
||||
self.update_progress(step);
|
||||
ScopedProgressStep { progress: self, step }
|
||||
}
|
||||
|
||||
// TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
|
||||
pub fn as_progress_view(&self) -> ProgressView {
|
||||
let inner = self.steps.read().unwrap();
|
||||
@@ -119,15 +95,7 @@ impl Progress {
|
||||
let now = Instant::now();
|
||||
push_steps_durations(steps, &mut durations, now, 0);
|
||||
|
||||
let mut accumulated_durations = IndexMap::new();
|
||||
for (name, duration) in durations.drain(..) {
|
||||
accumulated_durations.entry(name).and_modify(|d| *d += duration).or_insert(duration);
|
||||
}
|
||||
|
||||
accumulated_durations
|
||||
.into_iter()
|
||||
.map(|(name, duration)| (name, format!("{duration:.2?}")))
|
||||
.collect()
|
||||
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
|
||||
}
|
||||
|
||||
// TODO: ideally we should expose the progress in a way that let arroy use it directly
|
||||
@@ -375,14 +343,3 @@ impl<T: steppe::Step> Step for Compat<T> {
|
||||
self.0.total().try_into().unwrap_or(u32::MAX)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ScopedProgressStep<'a, P: Step + Copy> {
|
||||
progress: &'a Progress,
|
||||
step: P,
|
||||
}
|
||||
|
||||
impl<'a, P: Step + Copy> Drop for ScopedProgressStep<'a, P> {
|
||||
fn drop(&mut self) {
|
||||
self.progress.end_progress_step(self.step);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId};
|
||||
use crate::{DeCboRoaringBitmapCodec, DocumentId};
|
||||
|
||||
/// Call the given closure on the facet distribution of the candidate documents.
|
||||
///
|
||||
@@ -88,7 +88,7 @@ where
|
||||
if key.field_id != field_id {
|
||||
break;
|
||||
}
|
||||
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
let intersection = DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
@@ -120,7 +120,7 @@ where
|
||||
if key.field_id != field_id {
|
||||
break;
|
||||
}
|
||||
let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
let intersection = DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
@@ -173,7 +173,7 @@ where
|
||||
if key.field_id != self.field_id {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
let docids_in_common = DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
@@ -210,7 +210,7 @@ where
|
||||
if key.field_id != self.field_id {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
let docids_in_common = DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
candidates,
|
||||
)?;
|
||||
|
||||
@@ -8,7 +8,7 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
use crate::{DeCboRoaringBitmapCodec, Result};
|
||||
|
||||
/// Find all the document ids for which the given field contains a value contained within
|
||||
/// the two bounds.
|
||||
@@ -114,11 +114,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
|
||||
|
||||
if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
|
||||
None => DeCboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -211,11 +211,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
|
||||
};
|
||||
if should_take_whole_group {
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
previous_value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
None => DeCboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
};
|
||||
previous_key = next_key;
|
||||
previous_value = next_value;
|
||||
@@ -313,11 +313,11 @@ impl<'t> FacetRangeSearch<'t, '_, '_> {
|
||||
};
|
||||
if should_take_whole_group {
|
||||
*self.docids |= match self.universe {
|
||||
Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
|
||||
Some(universe) => DeCboRoaringBitmapCodec::intersection_with_serialized(
|
||||
previous_value.bitmap_bytes,
|
||||
universe,
|
||||
)?,
|
||||
None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
None => DeCboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
|
||||
};
|
||||
} else {
|
||||
let level = level - 1;
|
||||
|
||||
@@ -1165,7 +1165,7 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
// this filter is copy pasted from #2380 with the exact same espace sequence
|
||||
search.filter(Filter::from_str("monitor_diagonal = '27\" to 30\\''").unwrap().unwrap());
|
||||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
@@ -1225,7 +1225,7 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
|
||||
search.filter(Filter::from_str("_geoRadius(45.4777599, 9.1967508, 0)").unwrap().unwrap());
|
||||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
@@ -6,7 +6,6 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use crate::score_details::{ScoreDetails, ScoreValue, ScoringStrategy};
|
||||
use crate::search::new::{distinct_fid, distinct_single_docid};
|
||||
use crate::search::steps::SearchStep;
|
||||
use crate::search::SemanticSearch;
|
||||
use crate::vector::{Embedding, SearchQuery};
|
||||
use crate::{Index, MatchingWords, Result, Search, SearchResult};
|
||||
@@ -222,7 +221,6 @@ impl Search<'_> {
|
||||
time_budget: self.time_budget.clone(),
|
||||
ranking_score_threshold: self.ranking_score_threshold,
|
||||
locales: self.locales.clone(),
|
||||
progress: self.progress,
|
||||
};
|
||||
|
||||
let semantic = search.semantic.take();
|
||||
@@ -243,7 +241,6 @@ impl Search<'_> {
|
||||
Some(vector_query) => vector_query,
|
||||
None => {
|
||||
// attempt to embed the vector
|
||||
self.progress.update_progress(SearchStep::EmbeddingQuery);
|
||||
let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
|
||||
let _entered = span.enter();
|
||||
|
||||
|
||||
@@ -12,9 +12,7 @@ use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats};
|
||||
use crate::documents::GeoSortParameter;
|
||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||
use crate::index::MatchingStrategy;
|
||||
use crate::progress::Progress;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::steps::SearchStep;
|
||||
use crate::vector::{Embedder, Embedding};
|
||||
use crate::{
|
||||
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index,
|
||||
@@ -31,7 +29,6 @@ mod fst_utils;
|
||||
pub mod hybrid;
|
||||
pub mod new;
|
||||
pub mod similar;
|
||||
pub mod steps;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SemanticSearch {
|
||||
@@ -64,11 +61,10 @@ pub struct Search<'a> {
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
locales: Option<Vec<Language>>,
|
||||
progress: &'a Progress,
|
||||
}
|
||||
|
||||
impl<'a> Search<'a> {
|
||||
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index, progress: &'a Progress) -> Search<'a> {
|
||||
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Search<'a> {
|
||||
Search {
|
||||
query: None,
|
||||
filter: None,
|
||||
@@ -90,7 +86,6 @@ impl<'a> Search<'a> {
|
||||
locales: None,
|
||||
time_budget: TimeBudget::max(),
|
||||
ranking_score_threshold: None,
|
||||
progress,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -203,7 +198,7 @@ impl<'a> Search<'a> {
|
||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||
if has_vector_search {
|
||||
let ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||
filtered_universe(ctx.index, ctx.txn, &self.filter, self.progress)
|
||||
filtered_universe(ctx.index, ctx.txn, &self.filter)
|
||||
} else {
|
||||
Ok(self.execute()?.candidates)
|
||||
}
|
||||
@@ -244,9 +239,8 @@ impl<'a> Search<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter, self.progress)?;
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
|
||||
let mut query_vector = None;
|
||||
|
||||
let PartialSearchResult {
|
||||
located_query_terms,
|
||||
candidates,
|
||||
@@ -282,7 +276,6 @@ impl<'a> Search<'a> {
|
||||
*quantized,
|
||||
self.time_budget.clone(),
|
||||
self.ranking_score_threshold,
|
||||
self.progress,
|
||||
)?
|
||||
}
|
||||
_ => execute_search(
|
||||
@@ -304,7 +297,6 @@ impl<'a> Search<'a> {
|
||||
self.time_budget.clone(),
|
||||
self.ranking_score_threshold,
|
||||
self.locales.as_ref(),
|
||||
self.progress,
|
||||
)?,
|
||||
};
|
||||
|
||||
@@ -314,7 +306,6 @@ impl<'a> Search<'a> {
|
||||
tracing::debug!("Vector store stats: total_time={total_time:.02?}, total_queries={total_queries}, total_results={total_results}");
|
||||
}
|
||||
|
||||
self.progress.update_progress(SearchStep::FormattingResults);
|
||||
// consume context and located_query_terms to build MatchingWords.
|
||||
let matching_words = match located_query_terms {
|
||||
Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
|
||||
@@ -356,7 +347,6 @@ impl fmt::Debug for Search<'_> {
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
progress: _,
|
||||
} = self;
|
||||
f.debug_struct("Search")
|
||||
.field("query", query)
|
||||
|
||||
@@ -3,12 +3,10 @@ use roaring::RoaringBitmap;
|
||||
use super::logger::SearchLogger;
|
||||
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
|
||||
use super::SearchContext;
|
||||
use crate::progress::Progress;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::{
|
||||
apply_distinct_rule, distinct_fid, distinct_single_docid, DistinctOutput,
|
||||
};
|
||||
use crate::search::steps::{ComputingBucketSortStep, SearchStep};
|
||||
use crate::{Result, TimeBudget};
|
||||
|
||||
pub struct BucketSortOutput {
|
||||
@@ -36,9 +34,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
ranking_score_threshold: Option<f64>,
|
||||
exhaustive_number_hits: bool,
|
||||
max_total_hits: Option<usize>,
|
||||
progress: &Progress,
|
||||
) -> Result<BucketSortOutput> {
|
||||
let _step = progress.update_progress_scoped(SearchStep::ComputingBucketSort);
|
||||
logger.initial_query(query);
|
||||
logger.ranking_rules(&ranking_rules);
|
||||
logger.initial_universe(universe);
|
||||
@@ -97,7 +93,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
};
|
||||
}
|
||||
|
||||
let step = progress.update_progress_scoped(ComputingBucketSortStep::Initializing);
|
||||
let ranking_rules_len = ranking_rules.len();
|
||||
|
||||
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
|
||||
@@ -110,7 +105,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
vec![RoaringBitmap::default(); ranking_rules_len];
|
||||
ranking_rule_universes[0].clone_from(universe);
|
||||
let mut cur_ranking_rule_index = 0;
|
||||
drop(step);
|
||||
|
||||
/// Finish iterating over the current ranking rule, yielding
|
||||
/// control to the parent (or finishing the search if not possible).
|
||||
@@ -163,7 +157,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
distinct_fid,
|
||||
&ranking_rule_scores,
|
||||
$candidates,
|
||||
progress,
|
||||
)?;
|
||||
};
|
||||
}
|
||||
@@ -192,7 +185,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
ctx,
|
||||
logger,
|
||||
&ranking_rule_universes[cur_ranking_rule_index],
|
||||
progress,
|
||||
)? {
|
||||
std::task::Poll::Ready(bucket) => bucket,
|
||||
std::task::Poll::Pending => {
|
||||
@@ -239,7 +231,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
logger,
|
||||
&ranking_rule_universes[cur_ranking_rule_index],
|
||||
&time_budget,
|
||||
progress,
|
||||
)?
|
||||
else {
|
||||
back!();
|
||||
@@ -332,11 +323,9 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
|
||||
distinct_fid: Option<u16>,
|
||||
ranking_rule_scores: &[ScoreDetails],
|
||||
candidates: RoaringBitmap,
|
||||
progress: &Progress,
|
||||
) -> Result<()> {
|
||||
// First apply the distinct rule on the candidates, reducing the universes if necessary
|
||||
let candidates = if let Some(distinct_fid) = distinct_fid {
|
||||
progress.update_progress(ComputingBucketSortStep::ApplyingDistinctRule);
|
||||
let DistinctOutput { remaining, excluded } =
|
||||
apply_distinct_rule(ctx, distinct_fid, &candidates)?;
|
||||
for universe in ranking_rule_universes.iter_mut() {
|
||||
@@ -347,8 +336,6 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
|
||||
} else {
|
||||
candidates.clone()
|
||||
};
|
||||
|
||||
progress.update_progress(ComputingBucketSortStep::MergingCandidates);
|
||||
*all_candidates |= &candidates;
|
||||
|
||||
// if the candidates are empty, there is nothing to do;
|
||||
|
||||
@@ -12,9 +12,9 @@ use super::interner::Interned;
|
||||
use super::Word;
|
||||
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::MergeCboRoaringBitmaps;
|
||||
use crate::update::MergeDeCboRoaringBitmaps;
|
||||
use crate::{
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
|
||||
DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
|
||||
};
|
||||
|
||||
/// A cache storing pointers to values in the LMDB databases.
|
||||
@@ -72,11 +72,11 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
|
||||
match (bitmap_bytes, universe) {
|
||||
(bytes, Some(universe)) => {
|
||||
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||
.map(Some)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||
(bytes, None) => DeCboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into),
|
||||
@@ -105,7 +105,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
CboRoaringBitmapLenCodec::bytes_decode_owned(bitmap_bytes)
|
||||
DeCboRoaringBitmapLenCodec::bytes_decode_owned(bitmap_bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into)
|
||||
@@ -157,11 +157,11 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
|
||||
match (bitmap_bytes, universe) {
|
||||
(bytes, Some(universe)) => {
|
||||
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||
.map(Some)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||
(bytes, None) => DeCboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into),
|
||||
@@ -223,7 +223,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||
universe,
|
||||
MergeCboRoaringBitmaps,
|
||||
MergeDeCboRoaringBitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value(
|
||||
@@ -255,7 +255,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self.db_cache.exact_word_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||
universe,
|
||||
MergeCboRoaringBitmaps,
|
||||
MergeDeCboRoaringBitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value(
|
||||
@@ -312,7 +312,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self.db_cache.word_prefix_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||
universe,
|
||||
MergeCboRoaringBitmaps,
|
||||
MergeDeCboRoaringBitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value(
|
||||
@@ -344,7 +344,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self.db_cache.exact_word_prefix_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||
universe,
|
||||
MergeCboRoaringBitmaps,
|
||||
MergeDeCboRoaringBitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value(
|
||||
@@ -377,7 +377,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
{
|
||||
docids
|
||||
.as_ref()
|
||||
.map(|d| CboRoaringBitmapCodec::bytes_decode_owned(d))
|
||||
.map(|d| DeCboRoaringBitmapCodec::bytes_decode_owned(d))
|
||||
.transpose()
|
||||
.map_err(heed::Error::Decoding)?
|
||||
} else {
|
||||
@@ -395,7 +395,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
docids |= word1_docids & word2_docids;
|
||||
}
|
||||
}
|
||||
let encoded = CboRoaringBitmapCodec::bytes_encode(&docids)
|
||||
let encoded = DeCboRoaringBitmapCodec::bytes_encode(&docids)
|
||||
.map(Cow::into_owned)
|
||||
.map(Cow::Owned)
|
||||
.map(Some)
|
||||
|
||||
@@ -3,13 +3,10 @@ use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use super::query_graph::QueryGraph;
|
||||
use super::ranking_rules::{RankingRule, RankingRuleOutput};
|
||||
use crate::progress::Progress;
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::ExactTerm;
|
||||
use crate::search::new::ranking_rules::RankingRuleId;
|
||||
use crate::search::steps::ComputingBucketSortStep;
|
||||
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
|
||||
use crate::{DeCboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
|
||||
|
||||
/// A ranking rule that produces 3 disjoint buckets:
|
||||
///
|
||||
@@ -27,8 +24,8 @@ impl ExactAttribute {
|
||||
}
|
||||
|
||||
impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
|
||||
fn id(&self) -> RankingRuleId {
|
||||
RankingRuleId::Exactness
|
||||
fn id(&self) -> String {
|
||||
"exact_attribute".to_owned()
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
|
||||
@@ -51,9 +48,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
|
||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
universe: &roaring::RoaringBitmap,
|
||||
_time_budget: &TimeBudget,
|
||||
progress: &Progress,
|
||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||
progress.update_progress(ComputingBucketSortStep::from(self.id()));
|
||||
let state = std::mem::take(&mut self.state);
|
||||
let (state, output) = State::next(state, universe);
|
||||
self.state = state;
|
||||
@@ -224,7 +219,7 @@ impl State {
|
||||
|
||||
match bitmap_bytes {
|
||||
Some(bytes) => {
|
||||
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
|
||||
DeCboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
|
||||
}
|
||||
None => RoaringBitmap::default(),
|
||||
}
|
||||
|
||||
@@ -6,10 +6,7 @@ use rstar::RTree;
|
||||
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||
use crate::documents::geo_sort::{fill_cache, next_bucket};
|
||||
use crate::documents::{GeoSortParameter, GeoSortStrategy};
|
||||
use crate::progress::Progress;
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::new::ranking_rules::RankingRuleId;
|
||||
use crate::search::steps::ComputingBucketSortStep;
|
||||
use crate::{GeoPoint, Result, SearchContext, SearchLogger, TimeBudget};
|
||||
|
||||
pub struct GeoSort<Q: RankingRuleQueryTrait> {
|
||||
@@ -76,8 +73,8 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
}
|
||||
|
||||
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
fn id(&self) -> RankingRuleId {
|
||||
RankingRuleId::GeoSort
|
||||
fn id(&self) -> String {
|
||||
"geo_sort".to_owned()
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
|
||||
@@ -115,9 +112,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
_logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
_time_budget: &TimeBudget,
|
||||
progress: &Progress,
|
||||
) -> Result<Option<RankingRuleOutput<Q>>> {
|
||||
progress.update_progress(ComputingBucketSortStep::from(self.id()));
|
||||
let query = self.query.as_ref().unwrap().clone();
|
||||
|
||||
next_bucket(
|
||||
|
||||
@@ -50,54 +50,51 @@ use super::ranking_rule_graph::{
|
||||
};
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||
use crate::progress::Progress;
|
||||
use crate::score_details::Rank;
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::ranking_rule_graph::PathVisitor;
|
||||
use crate::search::new::ranking_rules::RankingRuleId;
|
||||
use crate::search::steps::ComputingBucketSortStep;
|
||||
use crate::{Result, TermsMatchingStrategy, TimeBudget};
|
||||
|
||||
pub type Words = GraphBasedRankingRule<WordsGraph>;
|
||||
impl GraphBasedRankingRule<WordsGraph> {
|
||||
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
|
||||
Self::new_with_id(RankingRuleId::Words, Some(terms_matching_strategy))
|
||||
Self::new_with_id("words".to_owned(), Some(terms_matching_strategy))
|
||||
}
|
||||
}
|
||||
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
|
||||
impl GraphBasedRankingRule<ProximityGraph> {
|
||||
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
|
||||
Self::new_with_id(RankingRuleId::Proximity, terms_matching_strategy)
|
||||
Self::new_with_id("proximity".to_owned(), terms_matching_strategy)
|
||||
}
|
||||
}
|
||||
pub type Fid = GraphBasedRankingRule<FidGraph>;
|
||||
impl GraphBasedRankingRule<FidGraph> {
|
||||
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
|
||||
Self::new_with_id(RankingRuleId::AttributePosition, terms_matching_strategy)
|
||||
Self::new_with_id("fid".to_owned(), terms_matching_strategy)
|
||||
}
|
||||
}
|
||||
pub type Position = GraphBasedRankingRule<PositionGraph>;
|
||||
impl GraphBasedRankingRule<PositionGraph> {
|
||||
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
|
||||
Self::new_with_id(RankingRuleId::WordPosition, terms_matching_strategy)
|
||||
Self::new_with_id("position".to_owned(), terms_matching_strategy)
|
||||
}
|
||||
}
|
||||
pub type Typo = GraphBasedRankingRule<TypoGraph>;
|
||||
impl GraphBasedRankingRule<TypoGraph> {
|
||||
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
|
||||
Self::new_with_id(RankingRuleId::Typo, terms_matching_strategy)
|
||||
Self::new_with_id("typo".to_owned(), terms_matching_strategy)
|
||||
}
|
||||
}
|
||||
pub type Exactness = GraphBasedRankingRule<ExactnessGraph>;
|
||||
impl GraphBasedRankingRule<ExactnessGraph> {
|
||||
pub fn new() -> Self {
|
||||
Self::new_with_id(RankingRuleId::Exactness, None)
|
||||
Self::new_with_id("exactness".to_owned(), None)
|
||||
}
|
||||
}
|
||||
|
||||
/// A generic graph-based ranking rule
|
||||
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
|
||||
id: RankingRuleId,
|
||||
id: String,
|
||||
terms_matching_strategy: Option<TermsMatchingStrategy>,
|
||||
// When the ranking rule is not iterating over its buckets,
|
||||
// its state is `None`.
|
||||
@@ -105,10 +102,7 @@ pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> GraphBasedRankingRule<G> {
|
||||
/// Creates the ranking rule with the given identifier
|
||||
pub fn new_with_id(
|
||||
id: RankingRuleId,
|
||||
terms_matching_strategy: Option<TermsMatchingStrategy>,
|
||||
) -> Self {
|
||||
pub fn new_with_id(id: String, terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
|
||||
Self { id, terms_matching_strategy, state: None }
|
||||
}
|
||||
}
|
||||
@@ -130,7 +124,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
||||
}
|
||||
|
||||
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
|
||||
fn id(&self) -> RankingRuleId {
|
||||
fn id(&self) -> String {
|
||||
self.id.clone()
|
||||
}
|
||||
|
||||
@@ -225,9 +219,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
_time_budget: &TimeBudget,
|
||||
progress: &Progress,
|
||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||
progress.update_progress(ComputingBucketSortStep::from(self.id()));
|
||||
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
|
||||
// should never happen
|
||||
let mut state = self.state.take().unwrap();
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::search::new::ranking_rule_graph::{
|
||||
ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph,
|
||||
WordsCondition, WordsGraph,
|
||||
};
|
||||
use crate::search::new::ranking_rules::{BoxRankingRule, RankingRuleId};
|
||||
use crate::search::new::ranking_rules::BoxRankingRule;
|
||||
use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
|
||||
use crate::Result;
|
||||
|
||||
@@ -45,26 +45,13 @@ enum Location {
|
||||
Other,
|
||||
}
|
||||
|
||||
impl From<RankingRuleId> for Location {
|
||||
fn from(ranking_rule_id: RankingRuleId) -> Self {
|
||||
match ranking_rule_id {
|
||||
RankingRuleId::Words => Self::Words,
|
||||
RankingRuleId::Typo => Self::Typo,
|
||||
RankingRuleId::Proximity => Self::Proximity,
|
||||
RankingRuleId::AttributePosition => Self::Fid,
|
||||
RankingRuleId::WordPosition => Self::Position,
|
||||
_ => Self::Other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VisualSearchLogger {
|
||||
initial_query: Option<QueryGraph>,
|
||||
initial_query_time: Option<Instant>,
|
||||
query_for_universe: Option<QueryGraph>,
|
||||
initial_universe: Option<RoaringBitmap>,
|
||||
ranking_rules_ids: Option<Vec<RankingRuleId>>,
|
||||
ranking_rules_ids: Option<Vec<String>>,
|
||||
events: Vec<SearchEvents>,
|
||||
location: Vec<Location>,
|
||||
}
|
||||
@@ -97,7 +84,14 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
|
||||
ranking_rule_idx,
|
||||
universe_len: universe.len(),
|
||||
});
|
||||
self.location.push(ranking_rule.id().into());
|
||||
self.location.push(match ranking_rule.id().as_str() {
|
||||
"words" => Location::Words,
|
||||
"typo" => Location::Typo,
|
||||
"proximity" => Location::Proximity,
|
||||
"fid" => Location::Fid,
|
||||
"position" => Location::Position,
|
||||
_ => Location::Other,
|
||||
});
|
||||
}
|
||||
|
||||
fn next_bucket_ranking_rule(
|
||||
|
||||
@@ -498,14 +498,12 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::progress::Progress;
|
||||
use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
|
||||
|
||||
impl<'a> MatcherBuilder<'a> {
|
||||
fn new_test(rtxn: &'a heed::RoTxn<'a>, index: &'a TempIndex, query: &str) -> Self {
|
||||
let progress = Progress::default();
|
||||
let mut ctx = SearchContext::new(index, rtxn).unwrap();
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &None, &progress).unwrap();
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &None).unwrap();
|
||||
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
||||
&mut ctx,
|
||||
Some(query),
|
||||
@@ -525,7 +523,6 @@ mod tests {
|
||||
TimeBudget::max(),
|
||||
None,
|
||||
None,
|
||||
&progress,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -56,10 +56,8 @@ use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::documents::GeoSortParameter;
|
||||
use crate::index::PrefixSearch;
|
||||
use crate::localized_attributes_rules::LocalizedFieldIds;
|
||||
use crate::progress::Progress;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
use crate::search::steps::SearchStep;
|
||||
use crate::vector::Embedder;
|
||||
use crate::{
|
||||
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
|
||||
@@ -296,9 +294,7 @@ fn resolve_universe(
|
||||
query_graph: &QueryGraph,
|
||||
matching_strategy: TermsMatchingStrategy,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
progress: &Progress,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let _step = progress.update_progress_scoped(SearchStep::ResolvingUniverse);
|
||||
resolve_maximally_reduced_query_graph(
|
||||
ctx,
|
||||
initial_universe,
|
||||
@@ -636,10 +632,8 @@ pub fn filtered_universe(
|
||||
index: &Index,
|
||||
txn: &RoTxn<'_>,
|
||||
filters: &Option<Filter<'_>>,
|
||||
progress: &Progress,
|
||||
) -> Result<RoaringBitmap> {
|
||||
Ok(if let Some(filters) = filters {
|
||||
let _step = progress.update_progress_scoped(SearchStep::ComputingFilter);
|
||||
filters.evaluate(txn, index)?
|
||||
} else {
|
||||
index.documents_ids(txn)?
|
||||
@@ -664,7 +658,6 @@ pub fn execute_vector_search(
|
||||
quantized: bool,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
progress: &Progress,
|
||||
) -> Result<PartialSearchResult> {
|
||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||
|
||||
@@ -699,7 +692,6 @@ pub fn execute_vector_search(
|
||||
ranking_score_threshold,
|
||||
exhaustive_number_hits,
|
||||
max_total_hits,
|
||||
progress,
|
||||
)?;
|
||||
|
||||
Ok(PartialSearchResult {
|
||||
@@ -733,14 +725,12 @@ pub fn execute_search(
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
locales: Option<&Vec<Language>>,
|
||||
progress: &Progress,
|
||||
) -> Result<PartialSearchResult> {
|
||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||
|
||||
let mut used_negative_operator = false;
|
||||
let mut located_query_terms = None;
|
||||
let query_terms = if let Some(query) = query {
|
||||
progress.update_progress(SearchStep::TokenizingQuery);
|
||||
let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
|
||||
let entered = span.enter();
|
||||
|
||||
@@ -844,14 +834,8 @@ pub fn execute_search(
|
||||
terms_matching_strategy,
|
||||
)?;
|
||||
|
||||
universe &= resolve_universe(
|
||||
ctx,
|
||||
&universe,
|
||||
&graph,
|
||||
terms_matching_strategy,
|
||||
query_graph_logger,
|
||||
progress,
|
||||
)?;
|
||||
universe &=
|
||||
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
|
||||
|
||||
bucket_sort(
|
||||
ctx,
|
||||
@@ -867,7 +851,6 @@ pub fn execute_search(
|
||||
ranking_score_threshold,
|
||||
exhaustive_number_hits,
|
||||
max_total_hits,
|
||||
progress,
|
||||
)?
|
||||
} else {
|
||||
let ranking_rules =
|
||||
@@ -886,7 +869,6 @@ pub fn execute_search(
|
||||
ranking_score_threshold,
|
||||
exhaustive_number_hits,
|
||||
max_total_hits,
|
||||
progress,
|
||||
)?
|
||||
};
|
||||
|
||||
|
||||
@@ -59,19 +59,19 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
let mut all_fields = FxHashSet::default();
|
||||
let mut current_max_weight = 0;
|
||||
for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
|
||||
let fields = dbg!(ctx.get_db_word_fids(word.interned())?);
|
||||
let fields = ctx.get_db_word_fids(word.interned())?;
|
||||
all_fields.extend(fields);
|
||||
}
|
||||
|
||||
for phrase in term.term_subset.all_phrases(ctx)? {
|
||||
for &word in phrase.words(ctx).iter().flatten() {
|
||||
let fields = dbg!(ctx.get_db_word_fids(word)?);
|
||||
let fields = ctx.get_db_word_fids(word)?;
|
||||
all_fields.extend(fields);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(word_prefix) = term.term_subset.use_prefix_db(ctx) {
|
||||
let fields = dbg!(ctx.get_db_word_prefix_fids(word_prefix.interned())?);
|
||||
let fields = ctx.get_db_word_prefix_fids(word_prefix.interned())?;
|
||||
all_fields.extend(fields);
|
||||
}
|
||||
|
||||
@@ -79,16 +79,27 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
|
||||
let mut edges = vec![];
|
||||
for fid in all_fields.iter().copied() {
|
||||
let weight = weights_map
|
||||
.weight(fid)
|
||||
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
|
||||
if weight > current_max_weight {
|
||||
current_max_weight = weight;
|
||||
match weights_map.weight(fid) {
|
||||
Some(weight) => {
|
||||
if weight > current_max_weight {
|
||||
current_max_weight = weight;
|
||||
}
|
||||
|
||||
edges.push((
|
||||
weight as u32 * term.term_ids.len() as u32,
|
||||
conditions_interner
|
||||
.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
|
||||
));
|
||||
}
|
||||
None => {
|
||||
// Hotfix: this is a temporary solution to handle the case where the weight is not found in the weights map.
|
||||
// This is due to a database corruption in word_fid_docids database.
|
||||
tracing::warn!(
|
||||
"{:?}",
|
||||
InternalError::FieldidsWeightsMapMissingEntry { key: fid }
|
||||
);
|
||||
}
|
||||
}
|
||||
edges.push((
|
||||
weight as u32 * term.term_ids.len() as u32,
|
||||
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
|
||||
));
|
||||
}
|
||||
|
||||
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
||||
|
||||
@@ -4,9 +4,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::{QueryGraph, SearchContext};
|
||||
use crate::progress::Progress;
|
||||
use crate::score_details::ScoreDetails;
|
||||
use crate::search::steps::ComputingBucketSortStep;
|
||||
use crate::{Result, TimeBudget};
|
||||
|
||||
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
|
||||
@@ -26,7 +24,7 @@ pub type BoxRankingRule<'ctx, Query> = Box<dyn RankingRule<'ctx, Query> + 'ctx>;
|
||||
/// (i.e. the read transaction and the cache) and over `Query`, which
|
||||
/// can be either [`PlaceholderQuery`] or [`QueryGraph`].
|
||||
pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
|
||||
fn id(&self) -> RankingRuleId;
|
||||
fn id(&self) -> String;
|
||||
|
||||
/// Prepare the ranking rule such that it can start iterating over its
|
||||
/// buckets using [`next_bucket`](RankingRule::next_bucket).
|
||||
@@ -58,7 +56,6 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
|
||||
logger: &mut dyn SearchLogger<Query>,
|
||||
universe: &RoaringBitmap,
|
||||
time_budget: &TimeBudget,
|
||||
progress: &Progress,
|
||||
) -> Result<Option<RankingRuleOutput<Query>>>;
|
||||
|
||||
/// Return the next bucket of this ranking rule, if doing so can be done without blocking
|
||||
@@ -72,9 +69,7 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
|
||||
_ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
_universe: &RoaringBitmap,
|
||||
progress: &Progress,
|
||||
) -> Result<Poll<RankingRuleOutput<Query>>> {
|
||||
progress.update_progress(ComputingBucketSortStep::from(self.id()));
|
||||
Ok(Poll::Pending)
|
||||
}
|
||||
|
||||
@@ -98,54 +93,3 @@ pub struct RankingRuleOutput<Q> {
|
||||
/// The score for the candidates of the current bucket
|
||||
pub score: ScoreDetails,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum RankingRuleId {
|
||||
Words,
|
||||
Typo,
|
||||
Proximity,
|
||||
AttributePosition,
|
||||
WordPosition,
|
||||
Exactness,
|
||||
Sort,
|
||||
GeoSort,
|
||||
VectorSort,
|
||||
Asc(String),
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RankingRuleId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
RankingRuleId::Words => write!(f, "words"),
|
||||
RankingRuleId::Typo => write!(f, "typo"),
|
||||
RankingRuleId::Proximity => write!(f, "proximity"),
|
||||
RankingRuleId::AttributePosition => write!(f, "attribute_position"),
|
||||
RankingRuleId::WordPosition => write!(f, "word_position"),
|
||||
RankingRuleId::Exactness => write!(f, "exactness"),
|
||||
RankingRuleId::Sort => write!(f, "sort"),
|
||||
RankingRuleId::GeoSort => write!(f, "geo_sort"),
|
||||
RankingRuleId::VectorSort => write!(f, "vector_sort"),
|
||||
RankingRuleId::Asc(field_name) => write!(f, "asc:{}", field_name),
|
||||
RankingRuleId::Desc(field_name) => write!(f, "desc:{}", field_name),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RankingRuleId> for ComputingBucketSortStep {
|
||||
fn from(ranking_rule_id: RankingRuleId) -> Self {
|
||||
match ranking_rule_id {
|
||||
RankingRuleId::Words => Self::Words,
|
||||
RankingRuleId::Typo => Self::Typo,
|
||||
RankingRuleId::Proximity => Self::Proximity,
|
||||
RankingRuleId::AttributePosition => Self::AttributePosition,
|
||||
RankingRuleId::WordPosition => Self::WordPosition,
|
||||
RankingRuleId::Exactness => Self::Exactness,
|
||||
RankingRuleId::Sort => Self::Sort,
|
||||
RankingRuleId::GeoSort => Self::GeoSort,
|
||||
RankingRuleId::VectorSort => Self::VectorSort,
|
||||
RankingRuleId::Asc(_) => Self::Asc,
|
||||
RankingRuleId::Desc(_) => Self::Desc,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,11 +5,8 @@ use super::logger::SearchLogger;
|
||||
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
|
||||
use crate::progress::Progress;
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||
use crate::search::new::ranking_rules::RankingRuleId;
|
||||
use crate::search::steps::ComputingBucketSortStep;
|
||||
use crate::{FieldId, Index, Result, TimeBudget};
|
||||
|
||||
pub trait RankingRuleOutputIter<'ctx, Query> {
|
||||
@@ -87,13 +84,9 @@ impl<'ctx, Query> Sort<'ctx, Query> {
|
||||
}
|
||||
|
||||
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> {
|
||||
fn id(&self) -> RankingRuleId {
|
||||
fn id(&self) -> String {
|
||||
let Self { field_name, is_ascending, .. } = self;
|
||||
if *is_ascending {
|
||||
RankingRuleId::Asc(field_name.clone())
|
||||
} else {
|
||||
RankingRuleId::Desc(field_name.clone())
|
||||
}
|
||||
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::sort")]
|
||||
@@ -203,9 +196,7 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
universe: &RoaringBitmap,
|
||||
_time_budget: &TimeBudget,
|
||||
progress: &Progress,
|
||||
) -> Result<Option<RankingRuleOutput<Query>>> {
|
||||
progress.update_progress(ComputingBucketSortStep::from(self.id()));
|
||||
let iter = self.iter.as_mut().unwrap();
|
||||
if let Some(mut bucket) = iter.next_bucket()? {
|
||||
bucket.candidates &= universe;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{db_snap, Criterion, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -119,7 +119,7 @@ fn test_attribute_fid_simple() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -147,7 +147,7 @@ fn test_attribute_fid_ngrams() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{db_snap, Criterion, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -134,7 +134,7 @@ fn test_attribute_position_simple() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("quick brown");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -150,7 +150,7 @@ fn test_attribute_position_repeated() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("a a a a a");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -167,7 +167,7 @@ fn test_attribute_position_different_fields() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("quick brown");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -184,7 +184,7 @@ fn test_attribute_position_ngrams() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("quick brown");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::update::Setting;
|
||||
use crate::vector::settings::EmbeddingSettings;
|
||||
use crate::vector::{Embedder, EmbedderOptions};
|
||||
use crate::{Criterion, Filter, FilterableAttributesRule, TimeBudget};
|
||||
use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -61,7 +61,7 @@ fn basic_degraded_search() {
|
||||
let index = create_index();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query("hello puppy kefir");
|
||||
search.limit(3);
|
||||
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
||||
@@ -75,7 +75,7 @@ fn degraded_search_cannot_skip_filter() {
|
||||
let index = create_index();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query("hello puppy kefir");
|
||||
search.limit(100);
|
||||
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
||||
@@ -96,7 +96,7 @@ fn degraded_search_and_score_details() {
|
||||
let index = create_index();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query("hello puppy kefir");
|
||||
search.limit(4);
|
||||
search.scoring_strategy(ScoringStrategy::Detailed);
|
||||
@@ -560,7 +560,7 @@ fn degraded_search_and_score_details_vector() {
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(
|
||||
|
||||
@@ -20,7 +20,7 @@ use maplit::hashset;
|
||||
use super::collect_field_values;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{
|
||||
AscDesc, Criterion, FilterableAttributesRule, Index, Member, SearchResult,
|
||||
AscDesc, Criterion, FilterableAttributesRule, Index, Member, Search, SearchResult,
|
||||
TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
@@ -246,7 +246,7 @@ fn test_distinct_placeholder_no_ranking_rules() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.distinct(S("letter"));
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
|
||||
@@ -275,7 +275,7 @@ fn test_distinct_at_search_placeholder_no_ranking_rules() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let s = index.search(&txn);
|
||||
let s = Search::new(&txn, &index);
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
@@ -308,7 +308,7 @@ fn test_distinct_placeholder_sort() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -348,7 +348,7 @@ fn test_distinct_placeholder_sort() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -388,7 +388,7 @@ fn test_distinct_placeholder_sort() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.sort_criteria(vec![
|
||||
AscDesc::Desc(Member::Field(S("letter"))),
|
||||
AscDesc::Desc(Member::Field(S("rank1"))),
|
||||
@@ -443,7 +443,7 @@ fn test_distinct_words() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
|
||||
@@ -496,7 +496,7 @@ fn test_distinct_sort_words() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
|
||||
@@ -569,7 +569,7 @@ fn test_distinct_all_candidates() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
|
||||
s.exhaustive_number_hits(true);
|
||||
@@ -592,7 +592,7 @@ fn test_distinct_typo() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ This module tests the following properties about the exactness ranking rule:
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index_simple_ordered() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -471,7 +471,7 @@ fn test_exactness_simple_ordered() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -503,7 +503,7 @@ fn test_exactness_simple_reversed() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -526,7 +526,7 @@ fn test_exactness_simple_reversed() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -556,7 +556,7 @@ fn test_exactness_simple_random() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -585,7 +585,7 @@ fn test_exactness_attribute_starts_with_simple() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("this balcony");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -611,7 +611,7 @@ fn test_exactness_attribute_starts_with_phrase() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("\"overlooking the sea\" is a beautiful balcony");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -631,7 +631,7 @@ fn test_exactness_attribute_starts_with_phrase() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("overlooking the sea is a beautiful balcony");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -660,7 +660,7 @@ fn test_exactness_all_candidates_with_typo() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("overlocking the sea is a beautiful balcony");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -696,7 +696,7 @@ fn test_exactness_after_words() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -744,7 +744,7 @@ fn test_words_after_exactness() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -792,7 +792,7 @@ fn test_proximity_after_exactness() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -829,7 +829,7 @@ fn test_proximity_after_exactness() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -862,7 +862,7 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("quick brown fox extra");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -897,7 +897,7 @@ fn test_typo_followed_by_exactness() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("extraordinarily quick brown fox");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
|
||||
@@ -82,7 +82,7 @@ fn test_geo_sort() {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&rtxn);
|
||||
let mut s = Search::new(&rtxn, &index);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
|
||||
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
|
||||
@@ -118,7 +118,7 @@ fn test_geo_sort_with_following_ranking_rules() {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&rtxn);
|
||||
let mut s = Search::new(&rtxn, &index);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![
|
||||
AscDesc::Asc(Member::Geo([0., 0.])),
|
||||
@@ -159,7 +159,7 @@ fn test_geo_sort_reached_max_bucket_size() {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&rtxn);
|
||||
let mut s = Search::new(&rtxn, &index);
|
||||
s.geo_max_bucket_size(2);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![
|
||||
@@ -219,7 +219,7 @@ fn test_geo_sort_around_the_edge_of_the_flat_earth() {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&rtxn);
|
||||
let mut s = Search::new(&rtxn, &index);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
|
||||
// --- asc
|
||||
@@ -295,7 +295,7 @@ fn geo_sort_mixed_with_words() {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&rtxn);
|
||||
let mut s = Search::new(&rtxn, &index);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
|
||||
|
||||
@@ -406,7 +406,7 @@ fn geo_sort_without_any_geo_faceted_documents() {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&rtxn);
|
||||
let mut s = Search::new(&rtxn, &index);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::SearchResult;
|
||||
use crate::{Search, SearchResult};
|
||||
|
||||
#[test]
|
||||
fn test_kanji_language_detection() {
|
||||
@@ -14,7 +14,7 @@ fn test_kanji_language_detection() {
|
||||
.unwrap();
|
||||
|
||||
let txn = index.write_txn().unwrap();
|
||||
let mut search = index.search(&txn);
|
||||
let mut search = Search::new(&txn, &index);
|
||||
|
||||
search.query("東京");
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
@@ -19,7 +19,7 @@ This module tests the following properties:
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -78,7 +78,7 @@ fn test_2gram_simple() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("sun flower");
|
||||
@@ -109,7 +109,7 @@ fn test_3gram_simple() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sun flower s are");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -129,7 +129,7 @@ fn test_2gram_typo() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sun flawer");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -159,7 +159,7 @@ fn test_no_disable_ngrams() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sun flower ");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -185,7 +185,7 @@ fn test_2gram_prefix() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sun flow");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -214,7 +214,7 @@ fn test_3gram_prefix() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("su nf l");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -237,7 +237,7 @@ fn test_split_words() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sunflower ");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -266,7 +266,7 @@ fn test_disable_split_words() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sunflower ");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -286,7 +286,7 @@ fn test_2gram_split_words() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sunf lower");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -310,7 +310,7 @@ fn test_3gram_no_split_words() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sunf lo wer");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -333,7 +333,7 @@ fn test_3gram_no_typos() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sunf la wer");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -352,7 +352,7 @@ fn test_no_ngram_phrases() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("\"sun\" flower");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -366,7 +366,7 @@ fn test_no_ngram_phrases() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("\"sun\" \"flower\"");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -385,7 +385,7 @@ fn test_short_split_words() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("xyz");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -412,7 +412,7 @@ fn test_split_words_never_disabled() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the sunflower is tall");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::collections::BTreeMap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_simple_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -268,7 +268,7 @@ fn test_proximity_simple() {
|
||||
let index = create_simple_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
@@ -295,7 +295,7 @@ fn test_proximity_split_word() {
|
||||
let index = create_edge_cases_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("sunflower wilting");
|
||||
@@ -315,7 +315,7 @@ fn test_proximity_split_word() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("\"sun flower\" wilting");
|
||||
@@ -342,7 +342,7 @@ fn test_proximity_split_word() {
|
||||
.unwrap();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("xyz wilting");
|
||||
@@ -365,7 +365,7 @@ fn test_proximity_prefix_db() {
|
||||
let index = create_edge_cases_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best s");
|
||||
@@ -390,7 +390,7 @@ fn test_proximity_prefix_db() {
|
||||
"###);
|
||||
|
||||
// Difference when using the `su` prefix, which is not in the prefix DB
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best su");
|
||||
@@ -417,7 +417,7 @@ fn test_proximity_prefix_db() {
|
||||
// **proximity** prefix DB. In that case, its sprximity score will always be
|
||||
// the maximum. This happens for prefixes that are larger than 2 bytes.
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best win");
|
||||
@@ -441,7 +441,7 @@ fn test_proximity_prefix_db() {
|
||||
|
||||
// Now using `wint`, which is not in the prefix DB:
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best wint");
|
||||
@@ -465,7 +465,7 @@ fn test_proximity_prefix_db() {
|
||||
|
||||
// and using `wi` which is in the prefix DB and proximity prefix DB
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best wi");
|
||||
|
||||
@@ -8,7 +8,7 @@ implemented.
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -57,7 +57,7 @@ fn test_trap_basic() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("summer holiday");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
|
||||
@@ -17,7 +17,9 @@ use meili_snap::insta;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{score_details, AscDesc, Criterion, Member, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{
|
||||
score_details, AscDesc, Criterion, Member, Search, SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -182,7 +184,7 @@ fn test_sort() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
|
||||
@@ -217,7 +219,7 @@ fn test_sort() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank")))]);
|
||||
@@ -252,7 +254,7 @@ fn test_sort() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![AscDesc::Asc(Member::Field(S("vague")))]);
|
||||
@@ -287,7 +289,7 @@ fn test_sort() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("vague")))]);
|
||||
@@ -336,7 +338,7 @@ fn test_redacted() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![
|
||||
|
||||
@@ -13,7 +13,7 @@ use std::collections::BTreeSet;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{SearchResult, TermsMatchingStrategy};
|
||||
use crate::{Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -79,7 +79,7 @@ fn test_ignore_stop_words() {
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
// `the` is treated as a prefix here, so it's not ignored
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("xyz to the");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -132,7 +132,7 @@ fn test_ignore_stop_words() {
|
||||
"###);
|
||||
|
||||
// `xyz` is treated as a prefix here, so it's not ignored
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("to the xyz");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -185,7 +185,7 @@ fn test_ignore_stop_words() {
|
||||
"###);
|
||||
|
||||
// `xyz` is not treated as a prefix anymore because of the trailing space, so it's ignored
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("to the xyz ");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -237,7 +237,7 @@ fn test_ignore_stop_words() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("to the dragon xyz");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -296,7 +296,7 @@ fn test_stop_words_in_phrase() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("\"how to train your dragon\"");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -389,7 +389,7 @@ fn test_stop_words_in_phrase() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("how \"to\" train \"the");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -441,7 +441,7 @@ fn test_stop_words_in_phrase() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("how \"to\" train \"The dragon");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -449,7 +449,7 @@ fn test_stop_words_in_phrase() {
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 6, 5]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("\"to\"");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
|
||||
@@ -22,7 +22,7 @@ use std::collections::BTreeMap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -157,7 +157,7 @@ fn test_no_typo() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
@@ -182,7 +182,7 @@ fn test_default_typo() {
|
||||
insta::assert_debug_snapshot!(tt, @"9");
|
||||
|
||||
// 0 typo
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
@@ -202,7 +202,7 @@ fn test_default_typo() {
|
||||
"###);
|
||||
|
||||
// 1 typo on one word, replaced letter
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quack brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
@@ -216,7 +216,7 @@ fn test_default_typo() {
|
||||
"###);
|
||||
|
||||
// 1 typo on one word, missing letter, extra letter
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quicest brownest fox jummps over the laziest dog");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
@@ -235,7 +235,7 @@ fn test_phrase_no_typo_allowed() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the \"quick brewn\" fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
@@ -265,7 +265,7 @@ fn test_typo_exact_word() {
|
||||
insta::assert_debug_snapshot!(tt, @"9");
|
||||
|
||||
// don't match quivk
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
@@ -279,7 +279,7 @@ fn test_typo_exact_word() {
|
||||
"###);
|
||||
|
||||
// Don't match quick
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quack brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
@@ -287,7 +287,7 @@ fn test_typo_exact_word() {
|
||||
insta::assert_snapshot!(format!("{document_scores:?}"), @"[]");
|
||||
|
||||
// words not in exact_words (quicest, jummps) have normal typo handling
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quicest brownest fox jummps over the laziest dog");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
@@ -301,7 +301,7 @@ fn test_typo_exact_word() {
|
||||
"###);
|
||||
|
||||
// exact words do not disable prefix (sunflowering OK, but no sunflowar)
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("network interconnection sunflower");
|
||||
@@ -340,7 +340,7 @@ fn test_typo_exact_attribute() {
|
||||
insta::assert_debug_snapshot!(tt, @"9");
|
||||
|
||||
// Exact match returns both exact attributes and tolerant ones.
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
@@ -365,7 +365,7 @@ fn test_typo_exact_attribute() {
|
||||
"###);
|
||||
|
||||
// 1 typo only returns the tolerant attribute
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("the quidk brown fox jumps over the lazy dog");
|
||||
@@ -386,7 +386,7 @@ fn test_typo_exact_attribute() {
|
||||
"###);
|
||||
|
||||
// combine with exact words
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("the quivk brown fox jumps over the lazy dog");
|
||||
@@ -414,7 +414,7 @@ fn test_typo_exact_attribute() {
|
||||
"###);
|
||||
|
||||
// No result in tolerant attribute
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("the quicest brownest fox jummps over the laziest dog");
|
||||
@@ -428,7 +428,7 @@ fn test_ngram_typos() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("the extra lagant fox skyrocketed over the languorous dog");
|
||||
@@ -442,7 +442,7 @@ fn test_ngram_typos() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("the ex tra lagant fox skyrocketed over the languorous dog");
|
||||
@@ -463,7 +463,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
@@ -499,7 +499,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
@@ -517,7 +517,7 @@ fn test_typo_bucketing() {
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
// First do the search with just the Words ranking rule
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("network interconnection sunflower");
|
||||
@@ -545,7 +545,7 @@ fn test_typo_bucketing() {
|
||||
.unwrap();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("network interconnection sunflower");
|
||||
@@ -564,7 +564,7 @@ fn test_typo_bucketing() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("network interconnection sun flower");
|
||||
@@ -600,7 +600,7 @@ fn test_typo_synonyms() {
|
||||
.unwrap();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("the quick brown fox jumps over the lackadaisical dog");
|
||||
@@ -616,7 +616,7 @@ fn test_typo_synonyms() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("the fast brownish fox jumps over the lackadaisical dog");
|
||||
|
||||
@@ -17,7 +17,7 @@ because the typo ranking rule before it only used the derivation `beautiful`.
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -87,7 +87,7 @@ fn test_trap_basic_and_complex1() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("beautiful summer");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -110,7 +110,7 @@ fn test_trap_complex2() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("delicious sweet dessert");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
|
||||
@@ -14,7 +14,7 @@ This module tests the following properties:
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -131,7 +131,7 @@ fn test_words_tms_last_simple() {
|
||||
let index = create_index();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -166,7 +166,7 @@ fn test_words_tms_last_simple() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("extravagant the quick brown fox jumps over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -180,7 +180,7 @@ fn test_words_tms_last_phrase() {
|
||||
let index = create_index();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("\"the quick brown fox\" jumps over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -205,7 +205,7 @@ fn test_words_tms_last_phrase() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("\"the quick brown fox\" jumps over the \"lazy\" dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -227,7 +227,7 @@ fn test_words_tms_last_phrase() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("\"the quick brown fox jumps over the lazy dog\"");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -243,7 +243,7 @@ fn test_words_tms_last_phrase() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("\"the quick brown fox jumps over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -270,7 +270,7 @@ fn test_words_proximity_tms_last_simple() {
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -305,7 +305,7 @@ fn test_words_proximity_tms_last_simple() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("the brown quick fox jumps over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -351,7 +351,7 @@ fn test_words_proximity_tms_last_phrase() {
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("the \"quick brown\" fox jumps over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -382,7 +382,7 @@ fn test_words_proximity_tms_last_phrase() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("the \"quick brown\" \"fox jumps\" over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -421,7 +421,7 @@ fn test_words_tms_all() {
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
@@ -447,7 +447,7 @@ fn test_words_tms_all() {
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = index.search(&txn);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("extravagant");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
|
||||
@@ -6,10 +6,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||
use super::VectorStoreStats;
|
||||
use crate::progress::Progress;
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::new::ranking_rules::RankingRuleId;
|
||||
use crate::search::steps::ComputingBucketSortStep;
|
||||
use crate::vector::{DistributionShift, Embedder, VectorStore};
|
||||
use crate::{DocumentId, Result, SearchContext, SearchLogger, TimeBudget};
|
||||
|
||||
@@ -97,8 +94,8 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
}
|
||||
|
||||
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
|
||||
fn id(&self) -> RankingRuleId {
|
||||
RankingRuleId::VectorSort
|
||||
fn id(&self) -> String {
|
||||
"vector_sort".to_owned()
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
|
||||
@@ -126,9 +123,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
|
||||
_logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
time_budget: &TimeBudget,
|
||||
progress: &Progress,
|
||||
) -> Result<Option<RankingRuleOutput<Q>>> {
|
||||
progress.update_progress(ComputingBucketSortStep::from(self.id()));
|
||||
let query = self.query.as_ref().unwrap().clone();
|
||||
let vector_candidates = &self.vector_candidates & universe;
|
||||
|
||||
@@ -163,7 +158,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
|
||||
}));
|
||||
}
|
||||
|
||||
self.next_bucket(ctx, _logger, universe, time_budget, progress)
|
||||
self.next_bucket(ctx, _logger, universe, time_budget)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
|
||||
@@ -176,9 +171,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
|
||||
_ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
progress: &Progress,
|
||||
) -> Result<Poll<RankingRuleOutput<Q>>> {
|
||||
progress.update_progress(ComputingBucketSortStep::from(self.id()));
|
||||
let query = self.query.as_ref().unwrap().clone();
|
||||
let vector_candidates = &self.vector_candidates & universe;
|
||||
|
||||
|
||||
@@ -57,12 +57,7 @@ impl<'a> Similar<'a> {
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<SearchResult> {
|
||||
let mut universe = filtered_universe(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
&self.filter,
|
||||
&crate::progress::Progress::default(),
|
||||
)?;
|
||||
let mut universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
|
||||
|
||||
// we never want to receive the docid
|
||||
universe.remove(self.id);
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
use crate::make_enum_progress;
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum SearchStep {
|
||||
PreparingSearch,
|
||||
TokenizingQuery,
|
||||
EmbeddingQuery,
|
||||
ComputingFilter,
|
||||
ResolvingUniverse,
|
||||
ComputingBucketSort,
|
||||
FormattingResults,
|
||||
ComputingFacetDistribution,
|
||||
FederatingResults,
|
||||
ApplyingPersonalization,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum ComputingBucketSortStep {
|
||||
Initializing,
|
||||
MergingCandidates,
|
||||
ApplyingDistinctRule,
|
||||
Words,
|
||||
Typo,
|
||||
Proximity,
|
||||
AttributePosition,
|
||||
WordPosition,
|
||||
Exactness,
|
||||
Sort,
|
||||
GeoSort,
|
||||
VectorSort,
|
||||
Asc,
|
||||
Desc,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum FederatingResultsStep {
|
||||
WaitingForRemoteResults,
|
||||
MergingFacets,
|
||||
MergingResults,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum TotalProcessingTimeStep {
|
||||
WaitingForPermit,
|
||||
Searching,
|
||||
FetchingSimilar,
|
||||
PublishingAnalytics,
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,6 @@ pub(crate) struct TempIndex {
|
||||
pub inner: Index,
|
||||
pub indexer_config: IndexerConfig,
|
||||
pub index_documents_config: IndexDocumentsConfig,
|
||||
pub progress: Progress,
|
||||
_tempdir: TempDir,
|
||||
}
|
||||
|
||||
@@ -48,9 +47,7 @@ impl TempIndex {
|
||||
let inner = Index::new(options, _tempdir.path(), true).unwrap();
|
||||
let indexer_config = IndexerConfig::default();
|
||||
let index_documents_config = IndexDocumentsConfig::default();
|
||||
let progress = Progress::default();
|
||||
|
||||
Self { inner, indexer_config, index_documents_config, progress, _tempdir }
|
||||
Self { inner, indexer_config, index_documents_config, _tempdir }
|
||||
}
|
||||
/// Creates a temporary index, with a default `4096 * 2000` size. This should be enough for
|
||||
/// most tests.
|
||||
@@ -213,10 +210,6 @@ impl TempIndex {
|
||||
pub fn delete_document(&self, external_document_id: &str) {
|
||||
self.delete_documents(vec![external_document_id.to_string()])
|
||||
}
|
||||
|
||||
pub fn search<'a>(&'a self, rtxn: &'a heed::RoTxn<'a>) -> Search<'a> {
|
||||
self.inner.search(rtxn, &self.progress)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1102,7 +1095,7 @@ fn bug_3021_fourth() {
|
||||
"###);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let search = index.search(&rtxn);
|
||||
let search = Search::new(&rtxn, &index);
|
||||
let SearchResult {
|
||||
matching_words: _,
|
||||
candidates: _,
|
||||
|
||||
@@ -14,8 +14,8 @@ use crate::heed_codec::facet::{
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
|
||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||
use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};
|
||||
use crate::update::MergeDeladdDeCboRoaringBitmaps;
|
||||
use crate::{DeCboRoaringBitmapCodec, DeCboRoaringBitmapLenCodec, FieldId, Index, Result};
|
||||
|
||||
/// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
|
||||
/// by rebuilding the database "from scratch".
|
||||
@@ -29,7 +29,7 @@ pub struct FacetsUpdateBulk<'i> {
|
||||
facet_type: FacetType,
|
||||
field_ids: Vec<FieldId>,
|
||||
// None if level 0 does not need to be updated
|
||||
delta_data: Option<Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>>,
|
||||
delta_data: Option<Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>>,
|
||||
}
|
||||
|
||||
impl<'i> FacetsUpdateBulk<'i> {
|
||||
@@ -37,7 +37,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
index: &'i Index,
|
||||
field_ids: Vec<FieldId>,
|
||||
facet_type: FacetType,
|
||||
delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
|
||||
delta_data: Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>,
|
||||
group_size: u8,
|
||||
min_level_size: u8,
|
||||
) -> FacetsUpdateBulk<'i> {
|
||||
@@ -90,7 +90,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
|
||||
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
|
||||
pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
pub delta_data: Option<Merger<R, MergeDeladdCboRoaringBitmaps>>,
|
||||
pub delta_data: Option<Merger<R, MergeDeladdDeCboRoaringBitmaps>>,
|
||||
pub group_size: u8,
|
||||
pub min_level_size: u8,
|
||||
}
|
||||
@@ -143,6 +143,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
}
|
||||
} else {
|
||||
let mut buffer = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let database = self.db.remap_types::<Bytes, Bytes>();
|
||||
|
||||
let mut iter = delta_data.into_stream_merger_iter()?;
|
||||
@@ -162,7 +163,12 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
Some(prev_value) => {
|
||||
// prev_value is the group size for level 0, followed by the previous bitmap.
|
||||
let old_bitmap = &prev_value[1..];
|
||||
CboRoaringBitmapCodec::merge_deladd_into(value, old_bitmap, &mut buffer)?;
|
||||
DeCboRoaringBitmapCodec::merge_deladd_into(
|
||||
value,
|
||||
old_bitmap,
|
||||
&mut buffer,
|
||||
&mut tmp_buffer,
|
||||
)?;
|
||||
}
|
||||
None => {
|
||||
// it is safe to ignore the del in that case.
|
||||
@@ -176,7 +182,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
};
|
||||
let new_bitmap = &buffer[1..];
|
||||
// if the new bitmap is empty, let's remove it
|
||||
if CboRoaringBitmapLenCodec::bytes_decode(new_bitmap).unwrap_or_default() == 0 {
|
||||
if DeCboRoaringBitmapLenCodec::bytes_decode(new_bitmap).unwrap_or_default() == 0 {
|
||||
database.delete(wtxn, key)?;
|
||||
} else {
|
||||
database.put(wtxn, key, &buffer)?;
|
||||
|
||||
@@ -15,8 +15,8 @@ use crate::heed_codec::BytesRefCodec;
|
||||
use crate::search::facet::get_highest_level;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::update::index_documents::valid_lmdb_key;
|
||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result};
|
||||
use crate::update::MergeDeladdDeCboRoaringBitmaps;
|
||||
use crate::{DeCboRoaringBitmapCodec, Index, Result};
|
||||
|
||||
/// Enum used as a return value for the facet incremental indexing.
|
||||
///
|
||||
@@ -58,14 +58,14 @@ enum ModificationResult {
|
||||
/// `facet_id_(string/f64)_docids` databases.
|
||||
pub struct FacetsUpdateIncremental {
|
||||
inner: FacetsUpdateIncrementalInner,
|
||||
delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
|
||||
delta_data: Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>,
|
||||
}
|
||||
|
||||
impl FacetsUpdateIncremental {
|
||||
pub fn new(
|
||||
index: &Index,
|
||||
facet_type: FacetType,
|
||||
delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
|
||||
delta_data: Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>,
|
||||
group_size: u8,
|
||||
min_level_size: u8,
|
||||
max_group_size: u8,
|
||||
@@ -112,13 +112,13 @@ impl FacetsUpdateIncremental {
|
||||
let value = KvReader::from_slice(value);
|
||||
let docids_to_delete = value
|
||||
.get(DelAdd::Deletion)
|
||||
.map(CboRoaringBitmapCodec::bytes_decode)
|
||||
.map(DeCboRoaringBitmapCodec::bytes_decode)
|
||||
.map(|o| o.map_err(heed::Error::Encoding))
|
||||
.transpose()?;
|
||||
|
||||
let docids_to_add = value
|
||||
.get(DelAdd::Addition)
|
||||
.map(CboRoaringBitmapCodec::bytes_decode)
|
||||
.map(DeCboRoaringBitmapCodec::bytes_decode)
|
||||
.map(|o| o.map_err(heed::Error::Encoding))
|
||||
.transpose()?;
|
||||
|
||||
|
||||
@@ -90,7 +90,7 @@ use tracing::debug;
|
||||
|
||||
use self::incremental::FacetsUpdateIncremental;
|
||||
use super::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
|
||||
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdDeCboRoaringBitmaps};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
|
||||
@@ -112,7 +112,7 @@ pub struct FacetsUpdate<'i> {
|
||||
index: &'i Index,
|
||||
database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
facet_type: FacetType,
|
||||
delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
|
||||
delta_data: Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>,
|
||||
normalized_delta_data: Option<Merger<BufReader<File>, MergeDeladdBtreesetString>>,
|
||||
group_size: u8,
|
||||
max_group_size: u8,
|
||||
@@ -124,7 +124,7 @@ impl<'i> FacetsUpdate<'i> {
|
||||
pub fn new(
|
||||
index: &'i Index,
|
||||
facet_type: FacetType,
|
||||
delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
|
||||
delta_data: Merger<BufReader<File>, MergeDeladdDeCboRoaringBitmaps>,
|
||||
normalized_delta_data: Option<Merger<BufReader<File>, MergeDeladdBtreesetString>>,
|
||||
data_size: u64,
|
||||
) -> Self {
|
||||
@@ -364,9 +364,9 @@ pub(crate) mod test_helpers {
|
||||
use crate::search::facet::get_highest_level;
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::MergeDeladdCboRoaringBitmaps;
|
||||
use crate::update::index_documents::MergeDeladdDeCboRoaringBitmaps;
|
||||
use crate::update::FacetsUpdateIncrementalInner;
|
||||
use crate::CboRoaringBitmapCodec;
|
||||
use crate::DeCboRoaringBitmapCodec;
|
||||
|
||||
/// Utility function to generate a string whose position in a lexicographically
|
||||
/// ordered list is `i`.
|
||||
@@ -496,13 +496,13 @@ pub(crate) mod test_helpers {
|
||||
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
|
||||
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key).unwrap();
|
||||
let mut inner_writer = KvWriterDelAdd::memory();
|
||||
let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
|
||||
let value = DeCboRoaringBitmapCodec::bytes_encode(docids).unwrap();
|
||||
inner_writer.insert(DelAdd::Addition, value).unwrap();
|
||||
writer.insert(&key, inner_writer.into_inner().unwrap()).unwrap();
|
||||
}
|
||||
writer.finish().unwrap();
|
||||
let reader = grenad::Reader::new(std::io::Cursor::new(new_data)).unwrap();
|
||||
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
builder.push(reader.into_cursor().unwrap());
|
||||
let merger = builder.build();
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::io::{self, BufReader};
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, sorter_into_reader, GrenadParameters, MergeDeladdCboRoaringBitmaps,
|
||||
create_sorter, sorter_into_reader, GrenadParameters, MergeDeladdDeCboRoaringBitmaps,
|
||||
};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec,
|
||||
@@ -27,7 +27,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
|
||||
let mut facet_number_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::helpers::{
|
||||
MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdBtreesetString, MergeDeladdDeCboRoaringBitmaps,
|
||||
};
|
||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
@@ -54,7 +54,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
||||
|
||||
let mut facet_string_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
@@ -154,7 +154,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
||||
|
||||
let mut facet_string_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::{create_writer, writer_into_reader};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
use crate::{DeCboRoaringBitmapCodec, DocumentId, FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
/// The length of the elements that are always in the buffer when inserting new values.
|
||||
const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
|
||||
@@ -311,8 +311,8 @@ fn deladd_obkv_cbo_roaring_bitmaps(
|
||||
) -> io::Result<()> {
|
||||
buffer.clear();
|
||||
let mut obkv = KvWriterDelAdd::new(buffer);
|
||||
let del_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
|
||||
let add_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
|
||||
let del_bitmap_bytes = DeCboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
|
||||
let add_bitmap_bytes = DeCboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
|
||||
obkv.insert(DelAdd::Deletion, del_bitmap_bytes)?;
|
||||
obkv.insert(DelAdd::Addition, add_bitmap_bytes)?;
|
||||
obkv.finish()
|
||||
|
||||
@@ -5,7 +5,7 @@ use obkv::KvReaderU16;
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
};
|
||||
use crate::error::SerializationError;
|
||||
use crate::index::db_name::DOCID_WORD_POSITIONS;
|
||||
@@ -30,7 +30,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
|
||||
let mut fid_word_count_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
|
||||
@@ -7,7 +7,7 @@ use obkv::KvReaderU16;
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, create_writer, try_split_array_at, writer_into_reader, GrenadParameters,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
};
|
||||
use crate::error::SerializationError;
|
||||
use crate::heed_codec::StrBEU16Codec;
|
||||
@@ -38,7 +38,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
|
||||
let mut word_fid_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
@@ -93,7 +93,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
|
||||
let mut word_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
@@ -103,7 +103,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
|
||||
let mut exact_word_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
@@ -166,7 +166,7 @@ fn words_into_sorter(
|
||||
key_buffer: &mut Vec<u8>,
|
||||
del_words: &BTreeSet<Vec<u8>>,
|
||||
add_words: &BTreeSet<Vec<u8>>,
|
||||
word_fid_docids_sorter: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
|
||||
word_fid_docids_sorter: &mut grenad::Sorter<MergeDeladdDeCboRoaringBitmaps>,
|
||||
) -> Result<()> {
|
||||
use itertools::merge_join_by;
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
|
||||
@@ -7,7 +7,7 @@ use obkv::KvReaderU16;
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, create_writer, try_split_array_at, writer_into_reader, GrenadParameters,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
};
|
||||
use crate::error::SerializationError;
|
||||
use crate::index::db_name::DOCID_WORD_POSITIONS;
|
||||
@@ -44,7 +44,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
.map(|_| {
|
||||
create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
@@ -198,7 +198,7 @@ fn document_word_positions_into_sorter(
|
||||
document_id: DocumentId,
|
||||
del_word_pair_proximity: &BTreeMap<(String, String), u8>,
|
||||
add_word_pair_proximity: &BTreeMap<(String, String), u8>,
|
||||
word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeDeladdCboRoaringBitmaps>],
|
||||
word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeDeladdDeCboRoaringBitmaps>],
|
||||
) -> Result<()> {
|
||||
use itertools::merge_join_by;
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
|
||||
@@ -6,7 +6,7 @@ use obkv::KvReaderU16;
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
};
|
||||
use crate::error::SerializationError;
|
||||
use crate::index::db_name::DOCID_WORD_POSITIONS;
|
||||
@@ -28,7 +28,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
|
||||
let mut word_position_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
@@ -100,7 +100,7 @@ fn words_position_into_sorter(
|
||||
key_buffer: &mut Vec<u8>,
|
||||
del_word_positions: &BTreeSet<(u16, Vec<u8>)>,
|
||||
add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
|
||||
word_position_docids_sorter: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
|
||||
word_position_docids_sorter: &mut grenad::Sorter<MergeDeladdDeCboRoaringBitmaps>,
|
||||
) -> Result<()> {
|
||||
use itertools::merge_join_by;
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
|
||||
@@ -7,7 +7,7 @@ use either::Either;
|
||||
use grenad::MergeFunction;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::heed_codec::DeCboRoaringBitmapCodec;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::transform::Operation;
|
||||
use crate::Result;
|
||||
@@ -189,10 +189,10 @@ impl MergeFunction for ObkvsKeepLastAdditionMergeDeletions {
|
||||
}
|
||||
}
|
||||
|
||||
/// Do a union of all the CboRoaringBitmaps in the values.
|
||||
pub struct MergeCboRoaringBitmaps;
|
||||
/// Do a union of all the DeCboRoaringBitmaps in the values.
|
||||
pub struct MergeDeCboRoaringBitmaps;
|
||||
|
||||
impl MergeFunction for MergeCboRoaringBitmaps {
|
||||
impl MergeFunction for MergeDeCboRoaringBitmaps {
|
||||
type Error = crate::Error;
|
||||
|
||||
fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
||||
@@ -200,17 +200,17 @@ impl MergeFunction for MergeCboRoaringBitmaps {
|
||||
Ok(values[0].clone())
|
||||
} else {
|
||||
let mut vec = Vec::new();
|
||||
CboRoaringBitmapCodec::merge_into(values, &mut vec)?;
|
||||
DeCboRoaringBitmapCodec::merge_into(values, &mut vec)?;
|
||||
Ok(Cow::from(vec))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Do a union of CboRoaringBitmaps on both sides of a DelAdd obkv
|
||||
/// Do a union of DeCboRoaringBitmaps on both sides of a DelAdd obkv
|
||||
/// separately and outputs a new DelAdd with both unions.
|
||||
pub struct MergeDeladdCboRoaringBitmaps;
|
||||
pub struct MergeDeladdDeCboRoaringBitmaps;
|
||||
|
||||
impl MergeFunction for MergeDeladdCboRoaringBitmaps {
|
||||
impl MergeFunction for MergeDeladdDeCboRoaringBitmaps {
|
||||
type Error = crate::Error;
|
||||
|
||||
fn merge<'a>(&self, _key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
||||
@@ -232,10 +232,10 @@ impl MergeFunction for MergeDeladdCboRoaringBitmaps {
|
||||
|
||||
let mut output_deladd_obkv = KvWriterDelAdd::memory();
|
||||
let mut buffer = Vec::new();
|
||||
CboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
|
||||
DeCboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
|
||||
output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?;
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
|
||||
DeCboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
|
||||
output_deladd_obkv.insert(DelAdd::Addition, &buffer)?;
|
||||
output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
|
||||
}
|
||||
@@ -246,15 +246,16 @@ impl MergeFunction for MergeDeladdCboRoaringBitmaps {
|
||||
///
|
||||
/// The first argument is the DelAdd obkv of CboRoaringBitmaps and
|
||||
/// the second one is the CboRoaringBitmap to merge into.
|
||||
pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
|
||||
pub fn merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
|
||||
deladd_obkv: &[u8],
|
||||
previous: &[u8],
|
||||
buffer: &'a mut Vec<u8>,
|
||||
) -> Result<Option<&'a [u8]>> {
|
||||
Ok(CboRoaringBitmapCodec::merge_deladd_into(
|
||||
Ok(DeCboRoaringBitmapCodec::merge_deladd_into(
|
||||
KvReaderDelAdd::from_slice(deladd_obkv),
|
||||
previous,
|
||||
buffer,
|
||||
&mut Vec::new(), // tmp_buffer
|
||||
)?)
|
||||
}
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ use crate::update::{
|
||||
};
|
||||
use crate::vector::db::EmbedderInfo;
|
||||
use crate::vector::{RuntimeEmbedders, VectorStore};
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result, UserError};
|
||||
use crate::{DeCboRoaringBitmapCodec, Index, Result, UserError};
|
||||
|
||||
static MERGED_DATABASE_COUNT: usize = 7;
|
||||
static PREFIX_DATABASE_COUNT: usize = 4;
|
||||
@@ -415,7 +415,7 @@ where
|
||||
let cloneable_chunk =
|
||||
unsafe { as_cloneable_grenad(&word_docids_reader)? };
|
||||
let word_docids = word_docids.get_or_insert_with(|| {
|
||||
MergerBuilder::new(MergeDeladdCboRoaringBitmaps)
|
||||
MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps)
|
||||
});
|
||||
word_docids.push(cloneable_chunk.into_cursor()?);
|
||||
let cloneable_chunk =
|
||||
@@ -423,14 +423,14 @@ where
|
||||
let exact_word_docids =
|
||||
exact_word_docids.get_or_insert_with(|| {
|
||||
MergerBuilder::new(
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
)
|
||||
});
|
||||
exact_word_docids.push(cloneable_chunk.into_cursor()?);
|
||||
let cloneable_chunk =
|
||||
unsafe { as_cloneable_grenad(&word_fid_docids_reader)? };
|
||||
let word_fid_docids = word_fid_docids.get_or_insert_with(|| {
|
||||
MergerBuilder::new(MergeDeladdCboRoaringBitmaps)
|
||||
MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps)
|
||||
});
|
||||
word_fid_docids.push(cloneable_chunk.into_cursor()?);
|
||||
TypedChunk::WordDocids {
|
||||
@@ -444,7 +444,7 @@ where
|
||||
let word_position_docids =
|
||||
word_position_docids.get_or_insert_with(|| {
|
||||
MergerBuilder::new(
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
MergeDeladdDeCboRoaringBitmaps,
|
||||
)
|
||||
});
|
||||
word_position_docids.push(cloneable_chunk.into_cursor()?);
|
||||
@@ -577,10 +577,10 @@ where
|
||||
)]
|
||||
pub fn execute_prefix_databases(
|
||||
self,
|
||||
word_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
|
||||
exact_word_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
|
||||
word_position_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
|
||||
word_fid_docids: Option<Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>>,
|
||||
word_docids: Option<Merger<CursorClonableMmap, MergeDeladdDeCboRoaringBitmaps>>,
|
||||
exact_word_docids: Option<Merger<CursorClonableMmap, MergeDeladdDeCboRoaringBitmaps>>,
|
||||
word_position_docids: Option<Merger<CursorClonableMmap, MergeDeladdDeCboRoaringBitmaps>>,
|
||||
word_fid_docids: Option<Merger<CursorClonableMmap, MergeDeladdDeCboRoaringBitmaps>>,
|
||||
) -> Result<()>
|
||||
where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
@@ -778,9 +778,9 @@ where
|
||||
)]
|
||||
fn execute_word_prefix_docids(
|
||||
txn: &mut heed::RwTxn<'_>,
|
||||
merger: Merger<CursorClonableMmap, MergeDeladdCboRoaringBitmaps>,
|
||||
word_docids_db: Database<Str, CboRoaringBitmapCodec>,
|
||||
word_prefix_docids_db: Database<Str, CboRoaringBitmapCodec>,
|
||||
merger: Merger<CursorClonableMmap, MergeDeladdDeCboRoaringBitmaps>,
|
||||
word_docids_db: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
word_prefix_docids_db: Database<Str, DeCboRoaringBitmapCodec>,
|
||||
indexer_config: &IndexerConfig,
|
||||
new_prefix_fst_words: &[String],
|
||||
common_prefix_fst_words: &[&[String]],
|
||||
@@ -1292,7 +1292,7 @@ mod tests {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
// testing the simple query search
|
||||
let mut search = index.search(&rtxn);
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
search.query("document");
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
// all documents should be returned
|
||||
@@ -1333,7 +1333,7 @@ mod tests {
|
||||
assert!(documents_ids.is_empty()); // nested is not searchable
|
||||
|
||||
// testing the filters
|
||||
let mut search = index.search(&rtxn);
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
search.filter(crate::Filter::from_str(r#"title = "The first document""#).unwrap().unwrap());
|
||||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
assert_eq!(documents_ids, vec![1]);
|
||||
@@ -1358,7 +1358,6 @@ mod tests {
|
||||
#[test]
|
||||
fn index_documents_with_nested_primary_key() {
|
||||
let index = TempIndex::new();
|
||||
let progress = Progress::default();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
@@ -1398,7 +1397,7 @@ mod tests {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
// testing the simple query search
|
||||
let mut search = crate::Search::new(&rtxn, &index, &progress);
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
search.query("document");
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
// all documents should be returned
|
||||
@@ -1454,7 +1453,6 @@ mod tests {
|
||||
#[test]
|
||||
fn test_facets_generation() {
|
||||
let index = TempIndex::new();
|
||||
let progress = Progress::default();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
@@ -1509,7 +1507,7 @@ mod tests {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
|
||||
let mut search = crate::Search::new(&rtxn, &index, &progress);
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
let filter = format!(r#""dog.race.bernese mountain" = {s}"#);
|
||||
search.filter(crate::Filter::from_str(&filter).unwrap().unwrap());
|
||||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
@@ -1547,7 +1545,7 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = crate::Search::new(&rtxn, &index, &progress);
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
|
||||
"dog.race.bernese mountain",
|
||||
)))]);
|
||||
@@ -2913,6 +2911,7 @@ mod tests {
|
||||
]
|
||||
*/
|
||||
let index = TempIndex::new();
|
||||
|
||||
// START OF BATCH
|
||||
|
||||
println!("--- ENTERING BATCH 1");
|
||||
@@ -3602,7 +3601,6 @@ mod tests {
|
||||
#[test]
|
||||
fn delete_words_exact_attributes() {
|
||||
let index = TempIndex::new();
|
||||
let progress = Progress::default();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
@@ -3641,7 +3639,7 @@ mod tests {
|
||||
let words = index.words_fst(&txn).unwrap().into_stream().into_strs().unwrap();
|
||||
insta::assert_snapshot!(format!("{words:?}"), @r###"["hello"]"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index, &progress);
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("hello");
|
||||
let crate::SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
||||
|
||||
@@ -12,8 +12,8 @@ use obkv::{KvReader, KvWriter};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::helpers::{
|
||||
self, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
|
||||
CursorClonableMmap, KeepFirst, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
|
||||
self, merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
|
||||
CursorClonableMmap, KeepFirst, MergeDeladdBtreesetString, MergeDeladdDeCboRoaringBitmaps,
|
||||
MergeIgnoreValues,
|
||||
};
|
||||
use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
|
||||
@@ -29,7 +29,7 @@ use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
|
||||
use crate::vector::VectorStore;
|
||||
use crate::{
|
||||
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||
lat_lng_to_xyz, DeCboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||
Result, SerializationError, U8StrStrCodec, UserError,
|
||||
};
|
||||
|
||||
@@ -241,7 +241,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
for typed_chunk in typed_chunks {
|
||||
let TypedChunk::FieldIdWordCountDocids(chunk) = typed_chunk else {
|
||||
unreachable!();
|
||||
@@ -256,7 +256,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
&index.field_id_word_count_docids,
|
||||
wtxn,
|
||||
deladd_serialize_add_side,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@@ -264,9 +264,9 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "word_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut word_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut exact_word_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut word_fid_docids_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut word_docids_builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
let mut exact_word_docids_builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
let mut word_fid_docids_builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
let mut fst_merger_builder = MergerBuilder::new(MergeIgnoreValues);
|
||||
for typed_chunk in typed_chunks {
|
||||
let TypedChunk::WordDocids {
|
||||
@@ -291,7 +291,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
&index.word_docids,
|
||||
wtxn,
|
||||
deladd_serialize_add_side,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
)?;
|
||||
|
||||
let exact_word_docids_merger = exact_word_docids_builder.build();
|
||||
@@ -300,7 +300,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
&index.exact_word_docids,
|
||||
wtxn,
|
||||
deladd_serialize_add_side,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
)?;
|
||||
|
||||
let word_fid_docids_merger = word_fid_docids_builder.build();
|
||||
@@ -309,7 +309,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
&index.word_fid_docids,
|
||||
wtxn,
|
||||
deladd_serialize_add_side,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
)?;
|
||||
|
||||
// create fst from word docids
|
||||
@@ -329,7 +329,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
for typed_chunk in typed_chunks {
|
||||
let TypedChunk::WordPositionDocids(chunk) = typed_chunk else {
|
||||
unreachable!();
|
||||
@@ -344,7 +344,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
&index.word_position_docids,
|
||||
wtxn,
|
||||
deladd_serialize_add_side,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@@ -353,7 +353,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
let mut data_size = 0;
|
||||
for typed_chunk in typed_chunks {
|
||||
let TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids) = typed_chunk
|
||||
@@ -375,7 +375,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut facet_id_string_builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut facet_id_string_builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
let mut normalized_facet_id_string_builder =
|
||||
MergerBuilder::new(MergeDeladdBtreesetString);
|
||||
let mut data_size = 0;
|
||||
@@ -411,7 +411,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
for typed_chunk in typed_chunks {
|
||||
let TypedChunk::FieldIdFacetExistsDocids(chunk) = typed_chunk else {
|
||||
unreachable!();
|
||||
@@ -426,7 +426,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
&index.facet_id_exists_docids,
|
||||
wtxn,
|
||||
deladd_serialize_add_side,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@@ -435,7 +435,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
for typed_chunk in typed_chunks {
|
||||
let TypedChunk::FieldIdFacetIsNullDocids(chunk) = typed_chunk else {
|
||||
unreachable!();
|
||||
@@ -450,7 +450,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
&index.facet_id_is_null_docids,
|
||||
wtxn,
|
||||
deladd_serialize_add_side,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@@ -458,7 +458,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_empty_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
for typed_chunk in typed_chunks {
|
||||
let TypedChunk::FieldIdFacetIsEmptyDocids(chunk) = typed_chunk else {
|
||||
unreachable!();
|
||||
@@ -473,7 +473,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
&index.facet_id_is_empty_docids,
|
||||
wtxn,
|
||||
deladd_serialize_add_side,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@@ -482,7 +482,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut builder = MergerBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
for typed_chunk in typed_chunks {
|
||||
let TypedChunk::WordPairProximityDocids(chunk) = typed_chunk else {
|
||||
unreachable!();
|
||||
@@ -504,7 +504,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
&index.word_pair_proximity_docids,
|
||||
wtxn,
|
||||
deladd_serialize_add_side,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
merge_deladd_de_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||
)?;
|
||||
}
|
||||
|
||||
@@ -866,7 +866,7 @@ where
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
|
||||
fn write_proximity_entries_into_database_additional_searchables<R, MF>(
|
||||
merger: Merger<R, MF>,
|
||||
database: &heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
|
||||
database: &heed::Database<U8StrStrCodec, DeCboRoaringBitmapCodec>,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
) -> Result<()>
|
||||
where
|
||||
@@ -881,7 +881,7 @@ where
|
||||
U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
|
||||
let data_to_insert = match KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
|
||||
Some(value) => {
|
||||
CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
|
||||
DeCboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
|
||||
}
|
||||
None => continue,
|
||||
};
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::index::db_name;
|
||||
use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
|
||||
use crate::update::new::KvReaderFieldId;
|
||||
use crate::vector::Embedding;
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
|
||||
use crate::{DeCboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
|
||||
|
||||
/// Note that the FrameProducer requires up to 9 bytes to
|
||||
/// encode the length, the max grant has been computed accordingly.
|
||||
@@ -971,7 +971,9 @@ pub struct WordDocidsSender<'a, 'b, D> {
|
||||
|
||||
impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
|
||||
pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
|
||||
let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let value_length =
|
||||
DeCboRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, &mut tmp_buffer);
|
||||
let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| {
|
||||
InternalError::StorePut {
|
||||
database_name: D::DATABASE.database_name(),
|
||||
@@ -986,7 +988,10 @@ impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
|
||||
value_length,
|
||||
|key_buffer, value_buffer| {
|
||||
key_buffer.copy_from_slice(key);
|
||||
CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_buffer)?;
|
||||
DeCboRoaringBitmapCodec::serialize_into(
|
||||
bitmap,
|
||||
&mut io::Cursor::new(value_buffer),
|
||||
)?;
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
@@ -1007,7 +1012,9 @@ impl FacetDocidsSender<'_, '_> {
|
||||
let (facet_kind, key) = FacetKind::extract_from_key(key);
|
||||
let database = Database::from(facet_kind);
|
||||
|
||||
let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let value_length =
|
||||
DeCboRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, &mut tmp_buffer);
|
||||
let value_length = match facet_kind {
|
||||
// We must take the facet group size into account
|
||||
// when we serialize strings and numbers.
|
||||
@@ -1041,7 +1048,7 @@ impl FacetDocidsSender<'_, '_> {
|
||||
FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_out,
|
||||
};
|
||||
|
||||
CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
|
||||
DeCboRoaringBitmapCodec::serialize_into(bitmap, &mut io::Cursor::new(value_out))?;
|
||||
|
||||
Ok(())
|
||||
},
|
||||
|
||||
@@ -81,8 +81,8 @@ use rustc_hash::FxBuildHasher;
|
||||
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||
use crate::update::new::thread_local::MostlySend;
|
||||
use crate::update::new::KvReaderDelAdd;
|
||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
use crate::update::MergeDeladdDeCboRoaringBitmaps;
|
||||
use crate::{DeCboRoaringBitmapCodec, Result};
|
||||
|
||||
/// A cache that stores bytes keys associated to CboDelAddRoaringBitmaps.
|
||||
///
|
||||
@@ -320,9 +320,10 @@ struct SpillingCaches<'extractor> {
|
||||
&'extractor Bump,
|
||||
>,
|
||||
>,
|
||||
spilled_entries: Vec<grenad::Sorter<MergeDeladdCboRoaringBitmaps>>,
|
||||
spilled_entries: Vec<grenad::Sorter<MergeDeladdDeCboRoaringBitmaps>>,
|
||||
deladd_buffer: Vec<u8>,
|
||||
cbo_buffer: Vec<u8>,
|
||||
tmp_buffer: Vec<u32>,
|
||||
}
|
||||
|
||||
impl<'extractor> SpillingCaches<'extractor> {
|
||||
@@ -338,7 +339,7 @@ impl<'extractor> SpillingCaches<'extractor> {
|
||||
) -> SpillingCaches<'extractor> {
|
||||
SpillingCaches {
|
||||
spilled_entries: iter::repeat_with(|| {
|
||||
let mut builder = grenad::SorterBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let mut builder = grenad::SorterBuilder::new(MergeDeladdDeCboRoaringBitmaps);
|
||||
builder.dump_threshold(0);
|
||||
builder.allow_realloc(false);
|
||||
builder.build()
|
||||
@@ -348,6 +349,7 @@ impl<'extractor> SpillingCaches<'extractor> {
|
||||
caches,
|
||||
deladd_buffer: Vec::new(),
|
||||
cbo_buffer: Vec::new(),
|
||||
tmp_buffer: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -370,6 +372,7 @@ impl<'extractor> SpillingCaches<'extractor> {
|
||||
&mut self.spilled_entries[bucket],
|
||||
&mut self.deladd_buffer,
|
||||
&mut self.cbo_buffer,
|
||||
&mut self.tmp_buffer,
|
||||
key,
|
||||
DelAddRoaringBitmap::new_del_u32(n),
|
||||
),
|
||||
@@ -395,6 +398,7 @@ impl<'extractor> SpillingCaches<'extractor> {
|
||||
&mut self.spilled_entries[bucket],
|
||||
&mut self.deladd_buffer,
|
||||
&mut self.cbo_buffer,
|
||||
&mut self.tmp_buffer,
|
||||
key,
|
||||
DelAddRoaringBitmap::new_add_u32(n),
|
||||
),
|
||||
@@ -408,9 +412,10 @@ fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize {
|
||||
}
|
||||
|
||||
fn spill_entry_to_sorter(
|
||||
spilled_entries: &mut grenad::Sorter<MergeDeladdCboRoaringBitmaps>,
|
||||
spilled_entries: &mut grenad::Sorter<MergeDeladdDeCboRoaringBitmaps>,
|
||||
deladd_buffer: &mut Vec<u8>,
|
||||
cbo_buffer: &mut Vec<u8>,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
key: &[u8],
|
||||
deladd: DelAddRoaringBitmap,
|
||||
) -> Result<()> {
|
||||
@@ -420,21 +425,21 @@ fn spill_entry_to_sorter(
|
||||
match deladd {
|
||||
DelAddRoaringBitmap { del: Some(del), add: None } => {
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&del, cbo_buffer, tmp_buffer)?;
|
||||
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: None, add: Some(add) } => {
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&add, cbo_buffer, tmp_buffer)?;
|
||||
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&del, cbo_buffer, tmp_buffer)?;
|
||||
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
|
||||
|
||||
cbo_buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
|
||||
DeCboRoaringBitmapCodec::serialize_into_with_tmp_buffer(&add, cbo_buffer, tmp_buffer)?;
|
||||
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
|
||||
}
|
||||
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
|
||||
@@ -637,15 +642,22 @@ pub struct DelAddRoaringBitmap {
|
||||
|
||||
impl DelAddRoaringBitmap {
|
||||
fn from_bytes(bytes: &[u8]) -> io::Result<DelAddRoaringBitmap> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let reader = KvReaderDelAdd::from_slice(bytes);
|
||||
|
||||
let del = match reader.get(DelAdd::Deletion) {
|
||||
Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
|
||||
Some(bytes) => {
|
||||
DeCboRoaringBitmapCodec::deserialize_from_with_tmp_buffer(bytes, &mut tmp_buffer)
|
||||
.map(Some)?
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let add = match reader.get(DelAdd::Addition) {
|
||||
Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?,
|
||||
Some(bytes) => {
|
||||
DeCboRoaringBitmapCodec::deserialize_from_with_tmp_buffer(bytes, &mut tmp_buffer)
|
||||
.map(Some)?
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::update::facet::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use crate::update::{create_writer, writer_into_reader};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, Index};
|
||||
use crate::{DeCboRoaringBitmapCodec, FieldId, Index};
|
||||
|
||||
/// Generate the facet level based on the level 0.
|
||||
///
|
||||
@@ -123,7 +123,7 @@ fn compute_level(
|
||||
ser_buffer.push(group_len);
|
||||
let group_docids = mem::take(&mut group_docids);
|
||||
let docids = group_docids.into_iter().union();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&docids, &mut ser_buffer);
|
||||
DeCboRoaringBitmapCodec::serialize_into(&docids, &mut ser_buffer)?;
|
||||
writer.insert(left_bound, &ser_buffer)?;
|
||||
}
|
||||
left_bound = Some(key.left_bound);
|
||||
@@ -142,7 +142,7 @@ fn compute_level(
|
||||
let group_len: u8 = group_docids.len().try_into().unwrap();
|
||||
ser_buffer.push(group_len);
|
||||
let group_docids = group_docids.into_iter().union();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&group_docids, &mut ser_buffer);
|
||||
DeCboRoaringBitmapCodec::serialize_into(&group_docids, &mut ser_buffer)?;
|
||||
writer.insert(left_bound, &ser_buffer)?;
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ use super::extract::{
|
||||
};
|
||||
use crate::update::facet::new_incremental::FacetFieldIdChange;
|
||||
use crate::update::new::extract::cellulite::GeoJsonExtractorData;
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
||||
use crate::{DeCboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||
pub fn merge_and_send_rtree<'extractor, MSP>(
|
||||
@@ -106,7 +106,7 @@ where
|
||||
}
|
||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
let current = database.get(&rtxn, key)?;
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
match merge_de_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => docids_sender.write(key, &bitmap),
|
||||
Operation::Delete => docids_sender.delete(key),
|
||||
Operation::Ignore => Ok(()),
|
||||
@@ -134,8 +134,8 @@ pub fn merge_and_send_facet_docids(
|
||||
FacetFieldIdsDelta::new(max_string_count, max_number_count);
|
||||
let rtxn = index.read_txn()?;
|
||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
let current = database.get_de_cbo_roaring_bytes_value(&rtxn, key)?;
|
||||
match merge_de_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => {
|
||||
facet_field_ids_delta.register_from_key(key);
|
||||
docids_sender.write(key, &bitmap)?;
|
||||
@@ -166,7 +166,7 @@ impl<'a> FacetDatabases<'a> {
|
||||
Self { index }
|
||||
}
|
||||
|
||||
fn get_cbo_roaring_bytes_value<'t>(
|
||||
fn get_de_cbo_roaring_bytes_value<'t>(
|
||||
&self,
|
||||
rtxn: &'t RoTxn<'_>,
|
||||
key: &[u8],
|
||||
@@ -320,12 +320,12 @@ enum Operation {
|
||||
}
|
||||
|
||||
/// A function that merges the DelAdd CboRoaringBitmaps with the current bitmap.
|
||||
fn merge_cbo_bitmaps(
|
||||
fn merge_de_cbo_bitmaps(
|
||||
current: Option<&[u8]>,
|
||||
del: Option<RoaringBitmap>,
|
||||
add: Option<RoaringBitmap>,
|
||||
) -> Result<Operation> {
|
||||
let current = current.map(CboRoaringBitmapCodec::deserialize_from).transpose()?;
|
||||
let current = current.map(DeCboRoaringBitmapCodec::deserialize_from).transpose()?;
|
||||
match (current, del, add) {
|
||||
(None, None, None) => Ok(Operation::Ignore), // but it's strange
|
||||
(None, None, Some(add)) => Ok(Operation::Write(add)),
|
||||
|
||||
@@ -14,12 +14,12 @@ use thread_local::ThreadLocal;
|
||||
use super::ref_cell_ext::RefCellExt as _;
|
||||
use crate::heed_codec::StrBEU16Codec;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::{CboRoaringBitmapCodec, Index, Prefix, Result};
|
||||
use crate::{DeCboRoaringBitmapCodec, Index, Prefix, Result};
|
||||
|
||||
struct WordPrefixDocids<'i> {
|
||||
index: &'i Index,
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
max_memory_by_thread: Option<usize>,
|
||||
/// Do not use an experimental LMDB feature to read uncommitted data in parallel.
|
||||
no_experimental_post_processing: bool,
|
||||
@@ -28,8 +28,8 @@ struct WordPrefixDocids<'i> {
|
||||
impl<'i> WordPrefixDocids<'i> {
|
||||
fn new(
|
||||
index: &'i Index,
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
grenad_parameters: &GrenadParameters,
|
||||
) -> WordPrefixDocids<'i> {
|
||||
WordPrefixDocids {
|
||||
@@ -87,12 +87,12 @@ impl<'i> WordPrefixDocids<'i> {
|
||||
let output = self
|
||||
.database
|
||||
.prefix_iter(&rtxn, prefix.as_bytes())?
|
||||
.remap_types::<Str, CboRoaringBitmapCodec>()
|
||||
.remap_types::<Str, DeCboRoaringBitmapCodec>()
|
||||
.map(|result| result.map(|(_word, bitmap)| bitmap))
|
||||
.union()?;
|
||||
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, &mut buffer);
|
||||
DeCboRoaringBitmapCodec::serialize_into(&output, &mut buffer)?;
|
||||
indexes.push(PrefixEntry { prefix, serialized_length: buffer.len() });
|
||||
file.write_all(&buffer)?;
|
||||
}
|
||||
@@ -150,11 +150,11 @@ impl<'i> WordPrefixDocids<'i> {
|
||||
.bitmaps(prefix)
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
|
||||
.map(|bytes| DeCboRoaringBitmapCodec::deserialize_from(bytes))
|
||||
.union()?;
|
||||
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
|
||||
DeCboRoaringBitmapCodec::serialize_into(&output, buffer)?;
|
||||
index.push(PrefixEntry { prefix, serialized_length: buffer.len() });
|
||||
file.write_all(buffer)
|
||||
})?;
|
||||
@@ -203,7 +203,7 @@ struct FrozenPrefixBitmaps<'a, 'rtxn> {
|
||||
impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||
pub fn from_prefixes(
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
rtxn: &'rtxn RoTxn,
|
||||
prefixes: &'a BTreeSet<Prefix>,
|
||||
) -> heed::Result<Self> {
|
||||
@@ -231,8 +231,8 @@ unsafe impl Sync for FrozenPrefixBitmaps<'_, '_> {}
|
||||
|
||||
struct WordPrefixIntegerDocids<'i> {
|
||||
index: &'i Index,
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
max_memory_by_thread: Option<usize>,
|
||||
/// Do not use an experimental LMDB feature to read uncommitted data in parallel.
|
||||
no_experimental_post_processing: bool,
|
||||
@@ -241,8 +241,8 @@ struct WordPrefixIntegerDocids<'i> {
|
||||
impl<'i> WordPrefixIntegerDocids<'i> {
|
||||
fn new(
|
||||
index: &'i Index,
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
grenad_parameters: &'_ GrenadParameters,
|
||||
) -> WordPrefixIntegerDocids<'i> {
|
||||
WordPrefixIntegerDocids {
|
||||
@@ -338,10 +338,10 @@ impl<'i> WordPrefixIntegerDocids<'i> {
|
||||
} else {
|
||||
let output = bitmaps_bytes
|
||||
.into_iter()
|
||||
.map(CboRoaringBitmapCodec::deserialize_from)
|
||||
.map(DeCboRoaringBitmapCodec::deserialize_from)
|
||||
.union()?;
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, &mut buffer);
|
||||
DeCboRoaringBitmapCodec::serialize_into(&output, &mut buffer)?;
|
||||
indexes.push(PrefixIntegerEntry {
|
||||
prefix,
|
||||
pos,
|
||||
@@ -419,10 +419,10 @@ impl<'i> WordPrefixIntegerDocids<'i> {
|
||||
} else {
|
||||
let output = bitmaps_bytes
|
||||
.iter()
|
||||
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
|
||||
.map(|bytes| DeCboRoaringBitmapCodec::deserialize_from(bytes))
|
||||
.union()?;
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
|
||||
DeCboRoaringBitmapCodec::serialize_into(&output, buffer)?;
|
||||
index.push(PrefixIntegerEntry {
|
||||
prefix,
|
||||
pos,
|
||||
@@ -486,7 +486,7 @@ struct FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
|
||||
impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||
pub fn from_prefixes(
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
database: Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
rtxn: &'rtxn RoTxn,
|
||||
prefixes: &'a BTreeSet<Prefix>,
|
||||
) -> heed::Result<Self> {
|
||||
@@ -516,7 +516,7 @@ unsafe impl Sync for FrozenPrefixIntegerBitmaps<'_, '_> {}
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||
fn delete_prefixes(
|
||||
wtxn: &mut RwTxn,
|
||||
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefix_database: &Database<Bytes, DeCboRoaringBitmapCodec>,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
// We remove all the entries that are no more required in this word prefix docids database.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user