Compare commits

..

9 Commits

Author SHA1 Message Date
ManyTheFish
1cee5b52e5 Fix: Avoid a scoped progress to be started twice and add debug asserts 2025-12-23 18:46:25 +01:00
ManyTheFish
5380ae4da8 Fix clippy errors 2025-12-23 15:22:45 +01:00
ManyTheFish
8df9b41022 Update benchmarks 2025-12-23 15:17:50 +01:00
ManyTheFish
b69a553752 Update tests 2025-12-23 15:17:28 +01:00
ManyTheFish
1ed2107621 Log progress trace at the end of the search process 2025-12-23 15:12:21 +01:00
ManyTheFish
b7deb85cc0 Add progress traces in search processes. 2025-12-23 15:10:27 +01:00
ManyTheFish
01de2be3d9 Implement a ScopedProgressStep helper to finish the step at the end of the scope.
This helper will mark the step as finished when it is dropped.
the struct must be assigned to an named variable `let step` or `let _step` to be dropped at the end of the scope properly.
2025-12-23 14:32:43 +01:00
ManyTheFish
830b62a142 Sum the durations of similar steps in accumulate_durations function.
the function was previously keeping the most recent duration for a step,
this is not compatible with processes doing several iterations like the
bucket sort algorithm.
2025-12-23 14:28:00 +01:00
ManyTheFish
63d1aeb42e Add search progress steps.
theses steps will be used to track the progress of the search process
2025-12-23 14:23:49 +01:00
63 changed files with 749 additions and 440 deletions

View File

@@ -15,7 +15,7 @@ env:
jobs:
test-linux:
name: Tests on ${{ matrix.runner }} ${{ matrix.features }}
name: Tests on Ubuntu
runs-on: ${{ matrix.runner }}
strategy:
matrix:

38
Cargo.lock generated
View File

@@ -580,7 +580,7 @@ source = "git+https://github.com/meilisearch/bbqueue#e8af4a4bccc8eb36b2b0442c4a9
[[package]]
name = "benchmarks"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"anyhow",
"bumpalo",
@@ -790,7 +790,7 @@ dependencies = [
[[package]]
name = "build-info"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"anyhow",
"time",
@@ -1786,7 +1786,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"anyhow",
"big_s",
@@ -2018,7 +2018,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "file-store"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"tempfile",
"thiserror 2.0.17",
@@ -2040,7 +2040,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"insta",
"levenshtein_automata",
@@ -2068,7 +2068,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"criterion",
"serde_json",
@@ -2231,7 +2231,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"arbitrary",
"bumpalo",
@@ -2698,9 +2698,9 @@ dependencies = [
[[package]]
name = "hannoy"
version = "0.1.2-nested-rtxns"
version = "0.1.0-nested-rtxns"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "533c952127a7e73448f26af313ac7b98012516561e48e953781cd6b30e573436"
checksum = "be82bf3f2108ddc8885e3d306fcd7f4692066bfe26065ca8b42ba417f3c26dd1"
dependencies = [
"bytemuck",
"byteorder",
@@ -3185,7 +3185,7 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"anyhow",
"backoff",
@@ -3449,7 +3449,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"criterion",
"serde_json",
@@ -3939,7 +3939,7 @@ checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0"
[[package]]
name = "meili-snap"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"insta",
"md5 0.8.0",
@@ -3950,7 +3950,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"actix-cors",
"actix-http",
@@ -4048,7 +4048,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"base64 0.22.1",
"enum-iterator",
@@ -4067,7 +4067,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"actix-web",
"anyhow",
@@ -4105,7 +4105,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"anyhow",
"clap",
@@ -4139,7 +4139,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"arroy",
"bbqueue",
@@ -4718,7 +4718,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "permissive-json-pointer"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"big_s",
"serde_json",
@@ -7758,7 +7758,7 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.31.0"
version = "1.30.0"
dependencies = [
"anyhow",
"build-info",

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.31.0"
version = "1.30.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -172,7 +172,8 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let progress = Progress::default();
let mut search = index.search(&rtxn, &progress);
search
.query(query)
.terms_matching_strategy(TermsMatchingStrategy::default());

View File

@@ -153,7 +153,8 @@ fn main() {
.unwrap();
// after executing a batch we check if the database is corrupted
let res = index.search(&wtxn).execute().unwrap();
let progress = Progress::default();
let res = index.search(&wtxn, &progress).execute().unwrap();
index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed);
}

View File

@@ -662,8 +662,13 @@ impl IndexScheduler {
// 2. Get the task set for index = name that appeared before the index swap task
let mut index_lhs_task_ids = self.queue.tasks.index_tasks(wtxn, lhs)?;
index_lhs_task_ids.remove_range(task_id..);
let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?;
index_rhs_task_ids.remove_range(task_id..);
let index_rhs_task_ids = if rename {
let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?;
index_rhs_task_ids.remove_range(task_id..);
index_rhs_task_ids
} else {
RoaringBitmap::new()
};
// 3. before_name -> new_name in the task's KindWithContent
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);

View File

@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
4 {uid: 4, batch_uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "b"), rename: false }, IndexSwap { indexes: ("c", "d"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b"), rename: false }, IndexSwap { indexes: ("c", "d"), rename: false }] }}
5 {uid: 5, status: enqueued, details: { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }}
----------------------------------------------------------------------
@@ -22,10 +22,10 @@ succeeded [0,1,2,3,4,]
"indexSwap" [4,5,]
----------------------------------------------------------------------
### Index Tasks:
a [1,4,5,]
b [0,4,]
c [3,4,5,]
d [2,4,]
a [4,5,]
b [0,1,4,]
c [4,5,]
d [2,3,4,]
----------------------------------------------------------------------
### Index Mapper:
a: { number_of_documents: 0, field_distribution: {} }

View File

@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
4 {uid: 4, batch_uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }}
5 {uid: 5, batch_uid: 5, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }}
----------------------------------------------------------------------
@@ -22,10 +22,10 @@ succeeded [0,1,2,3,4,5,]
"indexSwap" [4,5,]
----------------------------------------------------------------------
### Index Tasks:
a [3,4,5,]
b [0,4,]
c [1,4,5,]
d [2,4,]
a [5,]
b [0,1,4,]
c [4,5,]
d [2,3,4,]
----------------------------------------------------------------------
### Index Mapper:
a: { number_of_documents: 0, field_distribution: {} }

View File

@@ -7,9 +7,9 @@ source: crates/index-scheduler/src/scheduler/test.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
3 {uid: 3, batch_uid: 3, status: succeeded, details: { primary_key: Some("id"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
4 {uid: 4, batch_uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("c", "b"), rename: false }, IndexSwap { indexes: ("a", "d"), rename: false }] }}
5 {uid: 5, batch_uid: 5, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c"), rename: false }] }}
6 {uid: 6, batch_uid: 6, status: succeeded, details: { swaps: [] }, kind: IndexSwap { swaps: [] }}
@@ -23,10 +23,10 @@ succeeded [0,1,2,3,4,5,6,]
"indexSwap" [4,5,6,]
----------------------------------------------------------------------
### Index Tasks:
a [3,4,5,]
b [0,4,]
c [1,4,5,]
d [2,4,]
a [5,]
b [0,1,4,]
c [4,5,]
d [2,3,4,]
----------------------------------------------------------------------
### Index Mapper:
a: { number_of_documents: 0, field_distribution: {} }

View File

@@ -5,7 +5,7 @@ use crate::test_utils::Breakpoint::*;
use crate::test_utils::{
index_creation_task, read_json, replace_document_import_task, sample_documents,
};
use crate::{IndexScheduler, Query};
use crate::IndexScheduler;
use big_s::S;
use meili_snap::{json_string, snapshot};
use meilisearch_auth::AuthFilter;
@@ -404,103 +404,6 @@ fn swap_indexes() {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed");
}
#[test]
fn swap_indexes_with_correct_task_allocations() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let to_enqueue = [index_creation_task("a", "id"), index_creation_task("b", "id")];
for task in to_enqueue {
let _ = index_scheduler.register(task, None, false).unwrap();
index_scheduler.assert_internally_consistent();
}
handle.advance_n_successful_batches(2);
let (file0, count0) = sample_documents(&index_scheduler, 1, 1);
let (file1, count1) = sample_documents(&index_scheduler, 2, 2);
let (file2, count2) = sample_documents(&index_scheduler, 3, 3);
let (file3, count3) = sample_documents(&index_scheduler, 4, 4);
file0.persist().unwrap();
file1.persist().unwrap();
file2.persist().unwrap();
file3.persist().unwrap();
index_scheduler
.register(replace_document_import_task("a", Some("id"), 1, count0), None, false)
.unwrap();
index_scheduler
.register(replace_document_import_task("a", Some("id"), 2, count1), None, false)
.unwrap();
index_scheduler
.register(replace_document_import_task("b", Some("id"), 3, count2), None, false)
.unwrap();
index_scheduler
.register(replace_document_import_task("b", Some("id"), 4, count3), None, false)
.unwrap();
handle.advance_n_successful_batches(2);
let (a_tasks, _) = index_scheduler
.get_tasks_from_authorized_indexes(
&Query { index_uids: Some(vec!["a".to_string()]), ..Default::default() },
&AuthFilter::default(),
)
.unwrap();
assert_eq!(a_tasks.len(), 3);
let (b_tasks, _) = index_scheduler
.get_tasks_from_authorized_indexes(
&Query { index_uids: Some(vec!["b".to_string()]), ..Default::default() },
&AuthFilter::default(),
)
.unwrap();
assert_eq!(b_tasks.len(), 3);
index_scheduler
.register(
KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: ("a".to_owned(), "b".to_owned()), rename: false }],
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
let (a_after_tasks, _) = index_scheduler
.get_tasks_from_authorized_indexes(
&Query { index_uids: Some(vec!["a".to_string()]), ..Default::default() },
&AuthFilter::default(),
)
.unwrap();
let (b_after_tasks, _) = index_scheduler
.get_tasks_from_authorized_indexes(
&Query { index_uids: Some(vec!["b".to_string()]), ..Default::default() },
&AuthFilter::default(),
)
.unwrap();
assert_eq!(a_after_tasks.len(), 3);
assert_eq!(a_after_tasks.len(), b_after_tasks.len());
for (a, b) in a_tasks.iter().zip(b_after_tasks.iter()) {
assert_eq!(a.uid, b.uid);
}
for (b, a) in b_tasks.iter().zip(a_after_tasks.iter()) {
assert_eq!(b.uid, a.uid);
}
}
#[test]
fn swap_indexes_errors() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);

View File

@@ -1,7 +1,8 @@
use std::time::Duration;
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::milli::TimeBudget;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{SearchStep, TimeBudget};
use rand::Rng;
use reqwest::Client;
use serde::{Deserialize, Serialize};
@@ -346,9 +347,11 @@ impl PersonalizationService {
personalize: &Personalize,
query: Option<&str>,
time_budget: TimeBudget,
progress: &Progress,
) -> Result<SearchResult, ResponseError> {
match self {
Self::Cohere(cohere_service) => {
let _step = progress.update_progress_scoped(SearchStep::Personalization);
cohere_service
.rerank_search_results(search_result, personalize, query, time_budget)
.await

View File

@@ -30,7 +30,10 @@ use meilisearch_types::features::{
use meilisearch_types::heed::RoTxn;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::index::ChatConfig;
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget};
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{
all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget, TotalProcessingTimeStep,
};
use meilisearch_types::{Document, Index};
use serde::Deserialize;
use serde_json::json;
@@ -262,6 +265,7 @@ async fn process_search_request(
filter: Option<String>,
) -> Result<(Index, Vec<Document>, String), ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let progress = Progress::default();
let rtxn = index.static_read_txn()?;
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
let mut query = SearchQuery {
@@ -285,7 +289,9 @@ async fn process_search_request(
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
progress.update_progress(TotalProcessingTimeStep::WaitForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Search);
let features = index_scheduler.features();
let index_cloned = index.clone();
let output = tokio::task::spawn_blocking(move || -> Result<_, ResponseError> {
@@ -297,8 +303,15 @@ async fn process_search_request(
None => TimeBudget::default(),
};
let (search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
let (search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search(
&index_cloned,
&rtxn,
&query,
&search_kind,
time_budget,
features,
&progress,
)?;
match search_from_kind(index_uid, search_kind, search) {
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),

View File

@@ -8,7 +8,8 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use meilisearch_types::milli;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{self, TotalProcessingTimeStep};
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::debug;
@@ -336,6 +337,10 @@ pub async fn search_with_url_query(
) -> Result<HttpResponse, ResponseError> {
let request_uid = Uuid::now_v7();
debug!(request_uid = ?request_uid, parameters = ?params, "Search get");
let progress = Progress::default();
progress.update_progress(TotalProcessingTimeStep::WaitForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Search);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().try_into()?;
@@ -359,9 +364,9 @@ pub async fn search_with_url_query(
// Save the query string for personalization if requested
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
let permit = search_queue.try_get_search_permit().await?;
let include_metadata = parse_include_metadata_header(&req);
let progress_clone = progress.clone();
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
SearchParams {
@@ -374,11 +379,13 @@ pub async fn search_with_url_query(
include_metadata,
},
&index,
&progress_clone,
)
})
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok((search_result, _)) = search_result.as_ref() {
aggregate.succeed(search_result);
}
@@ -394,11 +401,12 @@ pub async fn search_with_url_query(
personalize,
personalize_query.as_deref(),
time_budget,
&progress,
)
.await?;
}
debug!(request_uid = ?request_uid, returns = ?search_result, "Search get");
debug!(request_uid = ?request_uid, returns = ?search_result, progress = ?progress.accumulated_durations(), "Search get");
Ok(HttpResponse::Ok().json(search_result))
}
@@ -470,6 +478,11 @@ pub async fn search_with_post(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let request_uid = Uuid::now_v7();
let progress = Progress::default();
progress.update_progress(TotalProcessingTimeStep::WaitForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Search);
let mut query = params.into_inner();
debug!(request_uid = ?request_uid, parameters = ?query, "Search post");
@@ -494,7 +507,7 @@ pub async fn search_with_post(
// Save the query string for personalization if requested
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
let permit = search_queue.try_get_search_permit().await?;
let progress_clone = progress.clone();
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
SearchParams {
@@ -507,6 +520,7 @@ pub async fn search_with_post(
include_metadata,
},
&index,
&progress_clone,
)
})
.await;
@@ -530,11 +544,12 @@ pub async fn search_with_post(
personalize,
personalize_query.as_deref(),
time_budget,
&progress,
)
.await?;
}
debug!(request_uid = ?request_uid, returns = ?search_result, "Search post");
debug!(request_uid = ?request_uid, returns = ?search_result, progress = ?progress.accumulated_durations(), "Search post");
Ok(HttpResponse::Ok().json(search_result))
}

View File

@@ -8,6 +8,8 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::TotalProcessingTimeStep;
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::debug;
@@ -217,7 +219,7 @@ async fn similar(
mut query: SimilarQuery,
) -> Result<SimilarResult, ResponseError> {
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
let progress = Progress::default();
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
@@ -234,7 +236,10 @@ async fn similar(
Route::Similar,
)?;
tokio::task::spawn_blocking(move || {
let progress_clone = progress.clone();
let result = tokio::task::spawn_blocking(move || {
let _step = progress_clone.update_progress_scoped(TotalProcessingTimeStep::Search);
perform_similar(
&index,
query,
@@ -243,9 +248,14 @@ async fn similar(
quantized,
retrieve_vectors,
index_scheduler.features(),
&progress_clone,
)
})
.await?
.await;
debug!(progress = ?progress.accumulated_durations(), "Similar");
result?
}
#[derive(Debug, deserr::Deserr, IntoParams)]

View File

@@ -6,6 +6,8 @@ use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::TotalProcessingTimeStep;
use serde::Serialize;
use tracing::debug;
use utoipa::{OpenApi, ToSchema};
@@ -153,7 +155,10 @@ pub async fn multi_search_with_post(
) -> Result<HttpResponse, ResponseError> {
// Since we don't want to process half of the search requests and then get a permit refused
// we're going to get one permit for the whole duration of the multi-search request.
let progress = Progress::default();
progress.update_progress(TotalProcessingTimeStep::WaitForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Search);
let request_uid = Uuid::now_v7();
let federated_search = params.into_inner();
@@ -213,6 +218,7 @@ pub async fn multi_search_with_post(
is_proxy,
request_uid,
include_metadata,
&progress,
)
.await;
permit.drop().await;
@@ -226,6 +232,7 @@ pub async fn multi_search_with_post(
debug!(
request_uid = ?request_uid,
returns = ?search_result,
progress = ?progress.accumulated_durations(),
"Federated-search"
);
@@ -288,6 +295,7 @@ pub async fn multi_search_with_post(
.with_index(query_index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
let progress_clone = progress.clone();
let (mut search_result, time_budget) = tokio::task::spawn_blocking(move || {
perform_search(
SearchParams {
@@ -300,6 +308,7 @@ pub async fn multi_search_with_post(
include_metadata,
},
&index,
&progress_clone,
)
})
.await
@@ -314,6 +323,7 @@ pub async fn multi_search_with_post(
personalize,
personalize_query.as_deref(),
time_budget,
&progress,
)
.await
.with_index(query_index)?;
@@ -345,6 +355,7 @@ pub async fn multi_search_with_post(
debug!(
request_uid = ?request_uid,
returns = ?search_results,
progress = ?progress.accumulated_durations(),
"Multi-search"
);

View File

@@ -11,9 +11,13 @@ use index_scheduler::{IndexScheduler, RoFeatures};
use itertools::Itertools;
use meilisearch_types::error::ResponseError;
use meilisearch_types::milli::order_by_map::OrderByMap;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
use meilisearch_types::milli::vector::Embedding;
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
use meilisearch_types::milli::{
self, DocumentId, FederatingResultsStep, OrderBy, SearchStep, TimeBudget,
DEFAULT_VALUES_PER_FACET,
};
use meilisearch_types::network::{Network, Remote};
use roaring::RoaringBitmap;
use tokio::task::JoinHandle;
@@ -35,6 +39,7 @@ use crate::error::MeilisearchHttpError;
use crate::routes::indexes::search::search_kind;
use crate::search::federated::types::{INDEX_UID, QUERIES_POSITION, WEIGHTED_RANKING_SCORE};
#[allow(clippy::too_many_arguments)]
pub async fn perform_federated_search(
index_scheduler: &IndexScheduler,
queries: Vec<SearchQueryWithIndex>,
@@ -43,6 +48,7 @@ pub async fn perform_federated_search(
is_proxy: bool,
request_uid: Uuid,
include_metadata: bool,
progress: &Progress,
) -> Result<FederatedSearchResult, ResponseError> {
if is_proxy {
features.check_network("Performing a remote federated search")?;
@@ -111,7 +117,7 @@ pub async fn perform_federated_search(
for (index_uid, queries) in partitioned_queries.local_queries_by_index {
// note: this is the only place we open `index_uid`
search_by_index.execute(index_uid, queries, &params)?;
search_by_index.execute(index_uid, queries, &params, progress)?;
}
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
@@ -126,6 +132,8 @@ pub async fn perform_federated_search(
facet_order,
} = search_by_index;
progress.update_progress(SearchStep::Federation);
progress.update_progress(FederatingResultsStep::WaitForRemoteResults);
let before_waiting_remote_results = std::time::Instant::now();
// 2.3. Wait for proxy search requests to complete
@@ -134,7 +142,7 @@ pub async fn perform_federated_search(
let after_waiting_remote_results = std::time::Instant::now();
// 3. merge hits and metadata across indexes and hosts
progress.update_progress(FederatingResultsStep::MergeResults);
// 3.1. Build metadata in the same order as the original queries
let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| {
// If a remote is present, set the local remote name
@@ -187,6 +195,7 @@ pub async fn perform_federated_search(
};
// 3.5. merge facets
progress.update_progress(FederatingResultsStep::MergeFacets);
let (facet_distribution, facet_stats, facets_by_index) =
facet_order.merge(federation.merge_facets, remote_results, facets);
@@ -831,6 +840,7 @@ impl SearchByIndex {
index_uid: String,
queries: Vec<QueryByIndex>,
params: &SearchByIndexParams<'_>,
progress: &Progress,
) -> Result<(), ResponseError> {
let first_query_index = queries.first().map(|query| query.query_index);
let index = match params.index_scheduler.index(&index_uid) {
@@ -957,6 +967,7 @@ impl SearchByIndex {
// clones of `TimeBudget` share the budget rather than restart it
time_budget.clone(),
params.features,
progress,
)?;
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
@@ -1044,7 +1055,7 @@ impl SearchByIndex {
hit_maker,
query_index,
}| {
let mut hit = hit_maker.make_hit(docid, &score)?;
let mut hit = hit_maker.make_hit(docid, &score, progress)?;
let weighted_score = ScoreDetails::global_score(score.iter()) * (*weight);
let mut _federation = serde_json::json!(

View File

@@ -17,11 +17,13 @@ use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use meilisearch_types::milli::index::{self, EmbeddingsWithMetadata, SearchParameters};
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder;
use meilisearch_types::milli::{
FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, TimeBudget,
FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, SearchStep,
TimeBudget,
};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
@@ -1024,11 +1026,12 @@ pub fn prepare_search<'t>(
search_kind: &SearchKind,
time_budget: TimeBudget,
features: RoFeatures,
progress: &'t Progress,
) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> {
if query.media.is_some() {
features.check_multimodal("passing `media` in a search query")?;
}
let mut search = index.search(rtxn);
let mut search = index.search(rtxn, progress);
search.time_budget(time_budget);
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
search.ranking_score_threshold(ranking_score_threshold.0);
@@ -1048,6 +1051,7 @@ pub fn prepare_search<'t>(
let vector = match query.vector.clone() {
Some(vector) => vector,
None => {
let _step = progress.update_progress_scoped(SearchStep::Embed);
let span = tracing::trace_span!(target: "search::vector", "embed_one");
let _entered = span.enter();
@@ -1173,6 +1177,7 @@ pub struct SearchParams {
pub fn perform_search(
params: SearchParams,
index: &Index,
progress: &Progress,
) -> Result<(SearchResult, TimeBudget), ResponseError> {
let SearchParams {
index_uid,
@@ -1191,8 +1196,15 @@ pub fn perform_search(
None => TimeBudget::default(),
};
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, &search_kind, time_budget.clone(), features)?;
let (search, is_finite_pagination, max_total_hits, offset) = prepare_search(
index,
&rtxn,
&query,
&search_kind,
time_budget.clone(),
features,
progress,
)?;
let (
milli::SearchResult {
@@ -1275,6 +1287,7 @@ pub fn perform_search(
format,
matching_words,
documents_ids.iter().copied().zip(document_scores.iter()),
progress,
)?;
let number_of_hits = min(candidates.len() as usize, max_total_hits);
@@ -1297,6 +1310,7 @@ pub fn perform_search(
let (facet_distribution, facet_stats) = facets
.map(move |facets| {
let _step = progress.update_progress_scoped(SearchStep::FacetDistribution);
compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search)
})
.transpose()?
@@ -1580,7 +1594,13 @@ impl<'a> HitMaker<'a> {
})
}
pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
pub fn make_hit(
&self,
id: u32,
score: &[ScoreDetails],
progress: &Progress,
) -> milli::Result<SearchHit> {
let _step = progress.update_progress_scoped(SearchStep::Format);
let (_, obkv) =
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
@@ -1669,6 +1689,7 @@ fn make_hits<'a>(
format: AttributesFormat,
matching_words: milli::MatchingWords,
documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
progress: &Progress,
) -> milli::Result<Vec<SearchHit>> {
let mut documents = Vec::new();
@@ -1686,7 +1707,7 @@ fn make_hits<'a>(
let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?;
for (id, score) in documents_ids_scores {
documents.push(hit_maker.make_hit(id, score)?);
documents.push(hit_maker.make_hit(id, score, progress)?);
}
Ok(documents)
}
@@ -1701,6 +1722,7 @@ pub fn perform_facet_search(
locales: Option<Vec<Language>>,
) -> Result<FacetSearchResult, ResponseError> {
let before_search = Instant::now();
let progress = Progress::default();
let rtxn = index.read_txn()?;
let time_budget = match index.search_cutoff(&rtxn)? {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
@@ -1729,8 +1751,15 @@ pub fn perform_facet_search(
.collect()
});
let (search, _, _, _) =
prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?;
let (search, _, _, _) = prepare_search(
index,
&rtxn,
&search_query,
&search_kind,
time_budget,
features,
&progress,
)?;
let mut facet_search = SearchForFacetValues::new(
facet_name,
search,
@@ -1754,6 +1783,7 @@ pub fn perform_facet_search(
})
}
#[allow(clippy::too_many_arguments)]
pub fn perform_similar(
index: &Index,
query: SimilarQuery,
@@ -1762,6 +1792,7 @@ pub fn perform_similar(
quantized: bool,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
progress: &Progress,
) -> Result<SimilarResult, ResponseError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
@@ -1802,6 +1833,7 @@ pub fn perform_similar(
embedder_name,
embedder,
quantized,
progress,
);
if let Some(ref filter) = query.filter {
@@ -1851,6 +1883,7 @@ pub fn perform_similar(
format,
Default::default(),
documents_ids.iter().copied().zip(document_scores.iter()),
progress,
)?;
let max_total_hits = index

View File

@@ -452,7 +452,6 @@ async fn limit_offset() {
}
#[actix_rt::test]
#[cfg(not(windows))]
async fn simple_search_hf() {
let server = Server::new_shared();
let index = index_with_documents_hf(server, &SIMPLE_SEARCH_DOCUMENTS).await;

View File

@@ -103,7 +103,7 @@ async fn swap_indexes() {
{
"uid": 1,
"batchUid": 1,
"indexUid": "a",
"indexUid": "b",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
@@ -266,7 +266,7 @@ async fn swap_indexes() {
{
"uid": 4,
"batchUid": 4,
"indexUid": "c",
"indexUid": "d",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
@@ -341,7 +341,7 @@ async fn swap_indexes() {
{
"uid": 0,
"batchUid": 0,
"indexUid": "a",
"indexUid": "b",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,

View File

@@ -274,19 +274,19 @@ async fn test_both_apis() {
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
@@ -314,19 +314,19 @@ async fn test_both_apis() {
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
@@ -354,19 +354,19 @@ async fn test_both_apis() {
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
@@ -394,19 +394,19 @@ async fn test_both_apis() {
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
@@ -420,13 +420,6 @@ async fn test_both_apis() {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 0,
"name": "kefir",
@@ -435,11 +428,11 @@ async fn test_both_apis() {
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 3,
@@ -447,6 +440,13 @@ async fn test_both_apis() {
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
@@ -460,13 +460,6 @@ async fn test_both_apis() {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 0,
"name": "kefir",
@@ -475,11 +468,11 @@ async fn test_both_apis() {
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 3,
@@ -487,6 +480,13 @@ async fn test_both_apis() {
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);

View File

@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
"sync",
] }
arroy = "0.6.4-nested-rtxns"
hannoy = { version = "0.1.2-nested-rtxns", features = ["arroy"] }
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
rand = "0.8.5"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }

View File

@@ -28,6 +28,7 @@ use crate::heed_codec::facet::{
use crate::heed_codec::version::VersionCodec;
use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
use crate::order_by_map::OrderByMap;
use crate::progress::Progress;
use crate::prompt::PromptData;
use crate::proximity::ProximityPrecision;
use crate::update::new::StdResult;
@@ -1477,8 +1478,8 @@ impl Index {
FacetDistribution::new(rtxn, self)
}
pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>) -> Search<'a> {
Search::new(rtxn, self)
pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>, progress: &'a Progress) -> Search<'a> {
Search::new(rtxn, self, progress)
}
/// Returns the index creation time.

View File

@@ -81,6 +81,7 @@ pub use self::index::Index;
pub use self::localized_attributes_rules::LocalizedAttributesRule;
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
pub use self::search::similar::Similar;
pub use self::search::steps::{FederatingResultsStep, SearchStep, TotalProcessingTimeStep};
pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,

View File

@@ -50,18 +50,58 @@ struct InnerProgress {
}
impl Progress {
pub fn update_progress<P: Step>(&self, sub_progress: P) {
/// Update the progress and return `true` if the step was started, `false` if it was already started.
pub fn update_progress<P: Step>(&self, sub_progress: P) -> bool {
let mut inner = self.steps.write().unwrap();
let InnerProgress { steps, durations } = &mut *inner;
let now = Instant::now();
let step_type = TypeId::of::<P>();
if let Some(idx) = steps.iter().position(|(id, _, _)| *id == step_type) {
if steps[idx].1.name() == sub_progress.name() {
// The step is already started, so we don't need to start it again.
return false;
}
push_steps_durations(steps, durations, now, idx);
steps.truncate(idx);
}
steps.push((step_type, Box::new(sub_progress), now));
true
}
/// End a step that has been started without having to start a new step.
fn end_progress_step<P: Step>(&self, sub_progress: P) {
let mut inner = self.steps.write().unwrap();
let InnerProgress { steps, durations } = &mut *inner;
let now = Instant::now();
let step_type = TypeId::of::<P>();
debug_assert!(
steps.iter().any(|(id, s, _)| *id == step_type && s.name() == sub_progress.name()),
"Step `{}` must have been started",
sub_progress.name()
);
if let Some(idx) = steps.iter().position(|(id, _, _)| *id == step_type) {
push_steps_durations(steps, durations, now, idx);
steps.truncate(idx);
}
}
/// Update the progress and return a scoped progress step that will end the progress step when dropped.
pub fn update_progress_scoped<P: Step + Copy>(&self, step: P) -> ScopedProgressStep<'_, P> {
let started = self.update_progress(step);
debug_assert!(
started,
"Step `{}` can't be scoped because it was already started",
step.name()
);
ScopedProgressStep { progress: self, step: started.then_some(step) }
}
// TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
@@ -95,7 +135,15 @@ impl Progress {
let now = Instant::now();
push_steps_durations(steps, &mut durations, now, 0);
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
let mut accumulated_durations = IndexMap::new();
for (name, duration) in durations.drain(..) {
accumulated_durations.entry(name).and_modify(|d| *d += duration).or_insert(duration);
}
accumulated_durations
.into_iter()
.map(|(name, duration)| (name, format!("{duration:.2?}")))
.collect()
}
// TODO: ideally we should expose the progress in a way that let arroy use it directly
@@ -343,3 +391,16 @@ impl<T: steppe::Step> Step for Compat<T> {
self.0.total().try_into().unwrap_or(u32::MAX)
}
}
pub struct ScopedProgressStep<'a, P: Step + Copy> {
progress: &'a Progress,
step: Option<P>,
}
impl<'a, P: Step + Copy> Drop for ScopedProgressStep<'a, P> {
fn drop(&mut self) {
if let Some(step) = self.step {
self.progress.end_progress_step(step);
}
}
}

View File

@@ -1165,7 +1165,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
// this filter is copy pasted from #2380 with the exact same espace sequence
search.filter(Filter::from_str("monitor_diagonal = '27\" to 30\\''").unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
@@ -1225,7 +1225,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.filter(Filter::from_str("_geoRadius(45.4777599, 9.1967508, 0)").unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@@ -6,6 +6,7 @@ use roaring::RoaringBitmap;
use crate::score_details::{ScoreDetails, ScoreValue, ScoringStrategy};
use crate::search::new::{distinct_fid, distinct_single_docid};
use crate::search::steps::SearchStep;
use crate::search::SemanticSearch;
use crate::vector::{Embedding, SearchQuery};
use crate::{Index, MatchingWords, Result, Search, SearchResult};
@@ -221,6 +222,7 @@ impl Search<'_> {
time_budget: self.time_budget.clone(),
ranking_score_threshold: self.ranking_score_threshold,
locales: self.locales.clone(),
progress: self.progress,
};
let semantic = search.semantic.take();
@@ -241,6 +243,7 @@ impl Search<'_> {
Some(vector_query) => vector_query,
None => {
// attempt to embed the vector
self.progress.update_progress(SearchStep::Embed);
let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
let _entered = span.enter();

View File

@@ -12,6 +12,7 @@ use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats};
use crate::documents::GeoSortParameter;
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::index::MatchingStrategy;
use crate::progress::Progress;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::{Embedder, Embedding};
use crate::{
@@ -29,6 +30,7 @@ mod fst_utils;
pub mod hybrid;
pub mod new;
pub mod similar;
pub mod steps;
#[derive(Debug, Clone)]
pub struct SemanticSearch {
@@ -61,10 +63,11 @@ pub struct Search<'a> {
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
locales: Option<Vec<Language>>,
progress: &'a Progress,
}
impl<'a> Search<'a> {
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Search<'a> {
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index, progress: &'a Progress) -> Search<'a> {
Search {
query: None,
filter: None,
@@ -86,6 +89,7 @@ impl<'a> Search<'a> {
locales: None,
time_budget: TimeBudget::max(),
ranking_score_threshold: None,
progress,
}
}
@@ -198,7 +202,7 @@ impl<'a> Search<'a> {
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
if has_vector_search {
let ctx = SearchContext::new(self.index, self.rtxn)?;
filtered_universe(ctx.index, ctx.txn, &self.filter)
filtered_universe(ctx.index, ctx.txn, &self.filter, self.progress)
} else {
Ok(self.execute()?.candidates)
}
@@ -239,7 +243,7 @@ impl<'a> Search<'a> {
}
}
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter, self.progress)?;
let mut query_vector = None;
let PartialSearchResult {
located_query_terms,
@@ -276,6 +280,7 @@ impl<'a> Search<'a> {
*quantized,
self.time_budget.clone(),
self.ranking_score_threshold,
self.progress,
)?
}
_ => execute_search(
@@ -297,6 +302,7 @@ impl<'a> Search<'a> {
self.time_budget.clone(),
self.ranking_score_threshold,
self.locales.as_ref(),
self.progress,
)?,
};
@@ -347,6 +353,7 @@ impl fmt::Debug for Search<'_> {
time_budget,
ranking_score_threshold,
locales,
progress: _,
} = self;
f.debug_struct("Search")
.field("query", query)

View File

@@ -3,10 +3,12 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
use super::SearchContext;
use crate::progress::Progress;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::{
apply_distinct_rule, distinct_fid, distinct_single_docid, DistinctOutput,
};
use crate::search::steps::ComputingBucketSortStep;
use crate::{Result, TimeBudget};
pub struct BucketSortOutput {
@@ -34,6 +36,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ranking_score_threshold: Option<f64>,
exhaustive_number_hits: bool,
max_total_hits: Option<usize>,
progress: &Progress,
) -> Result<BucketSortOutput> {
logger.initial_query(query);
logger.ranking_rules(&ranking_rules);
@@ -97,7 +100,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
ranking_rules[0].start_iteration(ctx, logger, universe, query, &time_budget)?;
ranking_rules[0].start_iteration(ctx, logger, universe, query, &time_budget, progress)?;
let mut ranking_rule_scores: Vec<ScoreDetails> = vec![];
@@ -157,6 +160,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
distinct_fid,
&ranking_rule_scores,
$candidates,
progress,
)?;
};
}
@@ -185,6 +189,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx,
logger,
&ranking_rule_universes[cur_ranking_rule_index],
progress,
)? {
std::task::Poll::Ready(bucket) => bucket,
std::task::Poll::Pending => {
@@ -231,6 +236,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger,
&ranking_rule_universes[cur_ranking_rule_index],
&time_budget,
progress,
)?
else {
back!();
@@ -289,6 +295,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
&next_bucket.candidates,
&next_bucket.query,
&time_budget,
progress,
)?;
}
@@ -323,9 +330,11 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
distinct_fid: Option<u16>,
ranking_rule_scores: &[ScoreDetails],
candidates: RoaringBitmap,
progress: &Progress,
) -> Result<()> {
// First apply the distinct rule on the candidates, reducing the universes if necessary
let candidates = if let Some(distinct_fid) = distinct_fid {
progress.update_progress(ComputingBucketSortStep::Distinct);
let DistinctOutput { remaining, excluded } =
apply_distinct_rule(ctx, distinct_fid, &candidates)?;
for universe in ranking_rule_universes.iter_mut() {
@@ -336,6 +345,8 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
} else {
candidates.clone()
};
progress.update_progress(ComputingBucketSortStep::MergeCandidates);
*all_candidates |= &candidates;
// if the candidates are empty, there is nothing to do;

View File

@@ -3,9 +3,12 @@ use roaring::{MultiOps, RoaringBitmap};
use super::query_graph::QueryGraph;
use super::ranking_rules::{RankingRule, RankingRuleOutput};
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::ExactTerm;
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::{ComputingBucketSortStep, RankingRuleStep};
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
/// A ranking rule that produces 3 disjoint buckets:
@@ -24,8 +27,8 @@ impl ExactAttribute {
}
impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
fn id(&self) -> String {
"exact_attribute".to_owned()
fn id(&self) -> RankingRuleId {
RankingRuleId::Exactness
}
#[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
@@ -36,7 +39,10 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
universe: &roaring::RoaringBitmap,
query: &QueryGraph,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::StartIteration);
self.state = State::start_iteration(ctx, universe, query)?;
Ok(())
}
@@ -48,7 +54,10 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
_logger: &mut dyn SearchLogger<QueryGraph>,
universe: &roaring::RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::NextBucket);
let state = std::mem::take(&mut self.state);
let (state, output) = State::next(state, universe);
self.state = state;

View File

@@ -6,7 +6,10 @@ use rstar::RTree;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use crate::documents::geo_sort::{fill_cache, next_bucket};
use crate::documents::{GeoSortParameter, GeoSortStrategy};
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::{ComputingBucketSortStep, RankingRuleStep};
use crate::{GeoPoint, Result, SearchContext, SearchLogger, TimeBudget};
pub struct GeoSort<Q: RankingRuleQueryTrait> {
@@ -73,8 +76,8 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
}
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
fn id(&self) -> String {
"geo_sort".to_owned()
fn id(&self) -> RankingRuleId {
RankingRuleId::GeoSort
}
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
@@ -85,7 +88,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
universe: &RoaringBitmap,
query: &Q,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::StartIteration);
assert!(self.query.is_none());
self.query = Some(query.clone());
@@ -112,7 +118,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
_logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Q>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::NextBucket);
let query = self.query.as_ref().unwrap().clone();
next_bucket(

View File

@@ -50,51 +50,54 @@ use super::ranking_rule_graph::{
};
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::progress::Progress;
use crate::score_details::Rank;
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::ranking_rule_graph::PathVisitor;
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::{ComputingBucketSortStep, RankingRuleStep};
use crate::{Result, TermsMatchingStrategy, TimeBudget};
pub type Words = GraphBasedRankingRule<WordsGraph>;
impl GraphBasedRankingRule<WordsGraph> {
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
Self::new_with_id("words".to_owned(), Some(terms_matching_strategy))
Self::new_with_id(RankingRuleId::Words, Some(terms_matching_strategy))
}
}
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
impl GraphBasedRankingRule<ProximityGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("proximity".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::Proximity, terms_matching_strategy)
}
}
pub type Fid = GraphBasedRankingRule<FidGraph>;
impl GraphBasedRankingRule<FidGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("fid".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::AttributePosition, terms_matching_strategy)
}
}
pub type Position = GraphBasedRankingRule<PositionGraph>;
impl GraphBasedRankingRule<PositionGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("position".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::WordPosition, terms_matching_strategy)
}
}
pub type Typo = GraphBasedRankingRule<TypoGraph>;
impl GraphBasedRankingRule<TypoGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("typo".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::Typo, terms_matching_strategy)
}
}
pub type Exactness = GraphBasedRankingRule<ExactnessGraph>;
impl GraphBasedRankingRule<ExactnessGraph> {
pub fn new() -> Self {
Self::new_with_id("exactness".to_owned(), None)
Self::new_with_id(RankingRuleId::Exactness, None)
}
}
/// A generic graph-based ranking rule
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
id: String,
id: RankingRuleId,
terms_matching_strategy: Option<TermsMatchingStrategy>,
// When the ranking rule is not iterating over its buckets,
// its state is `None`.
@@ -102,7 +105,10 @@ pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
}
impl<G: RankingRuleGraphTrait> GraphBasedRankingRule<G> {
/// Creates the ranking rule with the given identifier
pub fn new_with_id(id: String, terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
pub fn new_with_id(
id: RankingRuleId,
terms_matching_strategy: Option<TermsMatchingStrategy>,
) -> Self {
Self { id, terms_matching_strategy, state: None }
}
}
@@ -124,7 +130,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
}
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
fn id(&self) -> String {
fn id(&self) -> RankingRuleId {
self.id.clone()
}
@@ -136,7 +142,10 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
_universe: &RoaringBitmap,
query_graph: &QueryGraph,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::StartIteration);
// the `next_max_cost` is the successor integer to the maximum cost of the paths in the graph.
//
// When there is a matching strategy, it also factors the additional costs of:
@@ -219,7 +228,10 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::NextBucket);
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
// should never happen
let mut state = self.state.take().unwrap();

View File

@@ -14,7 +14,7 @@ use crate::search::new::ranking_rule_graph::{
ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph,
WordsCondition, WordsGraph,
};
use crate::search::new::ranking_rules::BoxRankingRule;
use crate::search::new::ranking_rules::{BoxRankingRule, RankingRuleId};
use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
use crate::Result;
@@ -45,13 +45,26 @@ enum Location {
Other,
}
impl From<RankingRuleId> for Location {
fn from(ranking_rule_id: RankingRuleId) -> Self {
match ranking_rule_id {
RankingRuleId::Words => Self::Words,
RankingRuleId::Typo => Self::Typo,
RankingRuleId::Proximity => Self::Proximity,
RankingRuleId::AttributePosition => Self::Fid,
RankingRuleId::WordPosition => Self::Position,
_ => Self::Other,
}
}
}
#[derive(Default)]
pub struct VisualSearchLogger {
initial_query: Option<QueryGraph>,
initial_query_time: Option<Instant>,
query_for_universe: Option<QueryGraph>,
initial_universe: Option<RoaringBitmap>,
ranking_rules_ids: Option<Vec<String>>,
ranking_rules_ids: Option<Vec<RankingRuleId>>,
events: Vec<SearchEvents>,
location: Vec<Location>,
}
@@ -84,14 +97,7 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
ranking_rule_idx,
universe_len: universe.len(),
});
self.location.push(match ranking_rule.id().as_str() {
"words" => Location::Words,
"typo" => Location::Typo,
"proximity" => Location::Proximity,
"fid" => Location::Fid,
"position" => Location::Position,
_ => Location::Other,
});
self.location.push(ranking_rule.id().into());
}
fn next_bucket_ranking_rule(

View File

@@ -498,12 +498,14 @@ mod tests {
use super::*;
use crate::index::tests::TempIndex;
use crate::progress::Progress;
use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
impl<'a> MatcherBuilder<'a> {
fn new_test(rtxn: &'a heed::RoTxn<'a>, index: &'a TempIndex, query: &str) -> Self {
let progress = Progress::default();
let mut ctx = SearchContext::new(index, rtxn).unwrap();
let universe = filtered_universe(ctx.index, ctx.txn, &None).unwrap();
let universe = filtered_universe(ctx.index, ctx.txn, &None, &progress).unwrap();
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
&mut ctx,
Some(query),
@@ -523,6 +525,7 @@ mod tests {
TimeBudget::max(),
None,
None,
&progress,
)
.unwrap();

View File

@@ -56,8 +56,10 @@ use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::documents::GeoSortParameter;
use crate::index::PrefixSearch;
use crate::localized_attributes_rules::LocalizedFieldIds;
use crate::progress::Progress;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::search::steps::SearchStep;
use crate::vector::Embedder;
use crate::{
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
@@ -294,7 +296,9 @@ fn resolve_universe(
query_graph: &QueryGraph,
matching_strategy: TermsMatchingStrategy,
logger: &mut dyn SearchLogger<QueryGraph>,
progress: &Progress,
) -> Result<RoaringBitmap> {
let _step = progress.update_progress_scoped(SearchStep::ResolveUniverse);
resolve_maximally_reduced_query_graph(
ctx,
initial_universe,
@@ -632,8 +636,10 @@ pub fn filtered_universe(
index: &Index,
txn: &RoTxn<'_>,
filters: &Option<Filter<'_>>,
progress: &Progress,
) -> Result<RoaringBitmap> {
Ok(if let Some(filters) = filters {
let _step = progress.update_progress_scoped(SearchStep::Filter);
filters.evaluate(txn, index)?
} else {
index.documents_ids(txn)?
@@ -658,6 +664,7 @@ pub fn execute_vector_search(
quantized: bool,
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
progress: &Progress,
) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?;
@@ -678,6 +685,7 @@ pub fn execute_vector_search(
let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> =
&mut placeholder_search_logger;
let _step = progress.update_progress_scoped(SearchStep::SemanticSearch);
let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort(
ctx,
ranking_rules,
@@ -692,6 +700,7 @@ pub fn execute_vector_search(
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
progress,
)?;
Ok(PartialSearchResult {
@@ -725,12 +734,14 @@ pub fn execute_search(
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
locales: Option<&Vec<Language>>,
progress: &Progress,
) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?;
let mut used_negative_operator = false;
let mut located_query_terms = None;
let query_terms = if let Some(query) = query {
let _step = progress.update_progress_scoped(SearchStep::Tokenize);
let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
let entered = span.enter();
@@ -834,9 +845,16 @@ pub fn execute_search(
terms_matching_strategy,
)?;
universe &=
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
universe &= resolve_universe(
ctx,
&universe,
&graph,
terms_matching_strategy,
query_graph_logger,
progress,
)?;
let _step = progress.update_progress_scoped(SearchStep::KeywordSearch);
bucket_sort(
ctx,
ranking_rules,
@@ -851,10 +869,12 @@ pub fn execute_search(
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
progress,
)?
} else {
let ranking_rules =
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_param)?;
let _step = progress.update_progress_scoped(SearchStep::PlaceholderSearch);
bucket_sort(
ctx,
ranking_rules,
@@ -869,6 +889,7 @@ pub fn execute_search(
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
progress,
)?
};

View File

@@ -79,27 +79,16 @@ impl RankingRuleGraphTrait for FidGraph {
let mut edges = vec![];
for fid in all_fields.iter().copied() {
match weights_map.weight(fid) {
Some(weight) => {
if weight > current_max_weight {
current_max_weight = weight;
}
edges.push((
weight as u32 * term.term_ids.len() as u32,
conditions_interner
.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
));
}
None => {
// Hotfix: this is a temporary solution to handle the case where the weight is not found in the weights map.
// This is due to a database corruption in word_fid_docids database.
tracing::warn!(
"{:?}",
InternalError::FieldidsWeightsMapMissingEntry { key: fid }
);
}
let weight = weights_map
.weight(fid)
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
if weight > current_max_weight {
current_max_weight = weight;
}
edges.push((
weight as u32 * term.term_ids.len() as u32,
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
));
}
// always lookup the max_fid if we don't already and add an artificial condition for max scoring

View File

@@ -4,7 +4,9 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{QueryGraph, SearchContext};
use crate::progress::Progress;
use crate::score_details::ScoreDetails;
use crate::search::steps::ComputingBucketSortStep;
use crate::{Result, TimeBudget};
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
@@ -24,7 +26,7 @@ pub type BoxRankingRule<'ctx, Query> = Box<dyn RankingRule<'ctx, Query> + 'ctx>;
/// (i.e. the read transaction and the cache) and over `Query`, which
/// can be either [`PlaceholderQuery`] or [`QueryGraph`].
pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
fn id(&self) -> String;
fn id(&self) -> RankingRuleId;
/// Prepare the ranking rule such that it can start iterating over its
/// buckets using [`next_bucket`](RankingRule::next_bucket).
@@ -39,6 +41,7 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
universe: &RoaringBitmap,
query: &Query,
time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()>;
/// Return the next bucket of this ranking rule.
@@ -56,6 +59,7 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
logger: &mut dyn SearchLogger<Query>,
universe: &RoaringBitmap,
time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Query>>>;
/// Return the next bucket of this ranking rule, if doing so can be done without blocking
@@ -69,6 +73,7 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
_ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<Query>,
_universe: &RoaringBitmap,
_progress: &Progress,
) -> Result<Poll<RankingRuleOutput<Query>>> {
Ok(Poll::Pending)
}
@@ -93,3 +98,54 @@ pub struct RankingRuleOutput<Q> {
/// The score for the candidates of the current bucket
pub score: ScoreDetails,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RankingRuleId {
Words,
Typo,
Proximity,
AttributePosition,
WordPosition,
Exactness,
Sort,
GeoSort,
VectorSort,
Asc(String),
Desc(String),
}
impl std::fmt::Display for RankingRuleId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RankingRuleId::Words => write!(f, "words"),
RankingRuleId::Typo => write!(f, "typo"),
RankingRuleId::Proximity => write!(f, "proximity"),
RankingRuleId::AttributePosition => write!(f, "attribute_position"),
RankingRuleId::WordPosition => write!(f, "word_position"),
RankingRuleId::Exactness => write!(f, "exactness"),
RankingRuleId::Sort => write!(f, "sort"),
RankingRuleId::GeoSort => write!(f, "geo_sort"),
RankingRuleId::VectorSort => write!(f, "vector_sort"),
RankingRuleId::Asc(field_name) => write!(f, "asc:{}", field_name),
RankingRuleId::Desc(field_name) => write!(f, "desc:{}", field_name),
}
}
}
impl From<RankingRuleId> for ComputingBucketSortStep {
fn from(ranking_rule_id: RankingRuleId) -> Self {
match ranking_rule_id {
RankingRuleId::Words => Self::Words,
RankingRuleId::Typo => Self::Typo,
RankingRuleId::Proximity => Self::Proximity,
RankingRuleId::AttributePosition => Self::AttributePosition,
RankingRuleId::WordPosition => Self::WordPosition,
RankingRuleId::Exactness => Self::Exactness,
RankingRuleId::Sort => Self::Sort,
RankingRuleId::GeoSort => Self::GeoSort,
RankingRuleId::VectorSort => Self::VectorSort,
RankingRuleId::Asc(_) => Self::Asc,
RankingRuleId::Desc(_) => Self::Desc,
}
}
}

View File

@@ -5,8 +5,11 @@ use super::logger::SearchLogger;
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::{ComputingBucketSortStep, RankingRuleStep};
use crate::{FieldId, Index, Result, TimeBudget};
pub trait RankingRuleOutputIter<'ctx, Query> {
@@ -84,9 +87,13 @@ impl<'ctx, Query> Sort<'ctx, Query> {
}
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> {
fn id(&self) -> String {
fn id(&self) -> RankingRuleId {
let Self { field_name, is_ascending, .. } = self;
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
if *is_ascending {
RankingRuleId::Asc(field_name.clone())
} else {
RankingRuleId::Desc(field_name.clone())
}
}
#[tracing::instrument(level = "trace", skip_all, target = "search::sort")]
@@ -97,7 +104,10 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
parent_candidates: &RoaringBitmap,
parent_query: &Query,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::StartIteration);
let iter: RankingRuleOutputIterWrapper<'ctx, Query> = match self.field_id {
Some(field_id) => {
let number_db = ctx
@@ -196,7 +206,10 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
_logger: &mut dyn SearchLogger<Query>,
universe: &RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Query>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::NextBucket);
let iter = self.iter.as_mut().unwrap();
if let Some(mut bucket) = iter.next_bucket()? {
bucket.candidates &= universe;

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{db_snap, Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -119,7 +119,7 @@ fn test_attribute_fid_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -147,7 +147,7 @@ fn test_attribute_fid_ngrams() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{db_snap, Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -134,7 +134,7 @@ fn test_attribute_position_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -150,7 +150,7 @@ fn test_attribute_position_repeated() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("a a a a a");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -167,7 +167,7 @@ fn test_attribute_position_different_fields() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -184,7 +184,7 @@ fn test_attribute_position_ngrams() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -14,7 +14,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::update::Setting;
use crate::vector::settings::EmbeddingSettings;
use crate::vector::{Embedder, EmbedderOptions};
use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget};
use crate::{Criterion, Filter, FilterableAttributesRule, TimeBudget};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -61,7 +61,7 @@ fn basic_degraded_search() {
let index = create_index();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("hello puppy kefir");
search.limit(3);
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
@@ -75,7 +75,7 @@ fn degraded_search_cannot_skip_filter() {
let index = create_index();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("hello puppy kefir");
search.limit(100);
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
@@ -96,7 +96,7 @@ fn degraded_search_and_score_details() {
let index = create_index();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("hello puppy kefir");
search.limit(4);
search.scoring_strategy(ScoringStrategy::Detailed);
@@ -560,7 +560,7 @@ fn degraded_search_and_score_details_vector() {
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
let embedder = Arc::new(
Embedder::new(

View File

@@ -20,7 +20,7 @@ use maplit::hashset;
use super::collect_field_values;
use crate::index::tests::TempIndex;
use crate::{
AscDesc, Criterion, FilterableAttributesRule, Index, Member, Search, SearchResult,
AscDesc, Criterion, FilterableAttributesRule, Index, Member, SearchResult,
TermsMatchingStrategy,
};
@@ -246,7 +246,7 @@ fn test_distinct_placeholder_no_ranking_rules() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.distinct(S("letter"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
@@ -275,7 +275,7 @@ fn test_distinct_at_search_placeholder_no_ranking_rules() {
let txn = index.read_txn().unwrap();
let s = Search::new(&txn, &index);
let s = index.search(&txn);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
@@ -308,7 +308,7 @@ fn test_distinct_placeholder_sort() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -348,7 +348,7 @@ fn test_distinct_placeholder_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -388,7 +388,7 @@ fn test_distinct_placeholder_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.sort_criteria(vec![
AscDesc::Desc(Member::Field(S("letter"))),
AscDesc::Desc(Member::Field(S("rank1"))),
@@ -443,7 +443,7 @@ fn test_distinct_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
@@ -496,7 +496,7 @@ fn test_distinct_sort_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
@@ -569,7 +569,7 @@ fn test_distinct_all_candidates() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
s.exhaustive_number_hits(true);
@@ -592,7 +592,7 @@ fn test_distinct_typo() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);

View File

@@ -21,7 +21,7 @@ This module tests the following properties about the exactness ranking rule:
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index_simple_ordered() -> TempIndex {
let index = TempIndex::new();
@@ -471,7 +471,7 @@ fn test_exactness_simple_ordered() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -503,7 +503,7 @@ fn test_exactness_simple_reversed() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -526,7 +526,7 @@ fn test_exactness_simple_reversed() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -556,7 +556,7 @@ fn test_exactness_simple_random() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -585,7 +585,7 @@ fn test_exactness_attribute_starts_with_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("this balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -611,7 +611,7 @@ fn test_exactness_attribute_starts_with_phrase() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("\"overlooking the sea\" is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -631,7 +631,7 @@ fn test_exactness_attribute_starts_with_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("overlooking the sea is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -660,7 +660,7 @@ fn test_exactness_all_candidates_with_typo() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("overlocking the sea is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -696,7 +696,7 @@ fn test_exactness_after_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -744,7 +744,7 @@ fn test_words_after_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -792,7 +792,7 @@ fn test_proximity_after_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -829,7 +829,7 @@ fn test_proximity_after_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -862,7 +862,7 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("quick brown fox extra");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -897,7 +897,7 @@ fn test_typo_followed_by_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("extraordinarily quick brown fox");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -82,7 +82,7 @@ fn test_geo_sort() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
@@ -118,7 +118,7 @@ fn test_geo_sort_with_following_ranking_rules() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![
AscDesc::Asc(Member::Geo([0., 0.])),
@@ -159,7 +159,7 @@ fn test_geo_sort_reached_max_bucket_size() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.geo_max_bucket_size(2);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![
@@ -219,7 +219,7 @@ fn test_geo_sort_around_the_edge_of_the_flat_earth() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
// --- asc
@@ -295,7 +295,7 @@ fn geo_sort_mixed_with_words() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
@@ -406,7 +406,7 @@ fn geo_sort_without_any_geo_faceted_documents() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{Search, SearchResult};
use crate::SearchResult;
#[test]
fn test_kanji_language_detection() {
@@ -14,7 +14,7 @@ fn test_kanji_language_detection() {
.unwrap();
let txn = index.write_txn().unwrap();
let mut search = Search::new(&txn, &index);
let mut search = index.search(&txn);
search.query("東京");
let SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@@ -19,7 +19,7 @@ This module tests the following properties:
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -78,7 +78,7 @@ fn test_2gram_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("sun flower");
@@ -109,7 +109,7 @@ fn test_3gram_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flower s are");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -129,7 +129,7 @@ fn test_2gram_typo() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flawer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -159,7 +159,7 @@ fn test_no_disable_ngrams() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -185,7 +185,7 @@ fn test_2gram_prefix() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flow");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -214,7 +214,7 @@ fn test_3gram_prefix() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("su nf l");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -237,7 +237,7 @@ fn test_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunflower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -266,7 +266,7 @@ fn test_disable_split_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunflower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -286,7 +286,7 @@ fn test_2gram_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf lower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -310,7 +310,7 @@ fn test_3gram_no_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf lo wer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -333,7 +333,7 @@ fn test_3gram_no_typos() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf la wer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -352,7 +352,7 @@ fn test_no_ngram_phrases() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("\"sun\" flower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -366,7 +366,7 @@ fn test_no_ngram_phrases() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("\"sun\" \"flower\"");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -385,7 +385,7 @@ fn test_short_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("xyz");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -412,7 +412,7 @@ fn test_split_words_never_disabled() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the sunflower is tall");
let SearchResult { documents_ids, .. } = s.execute().unwrap();

View File

@@ -18,7 +18,7 @@ use std::collections::BTreeMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_simple_index() -> TempIndex {
let index = TempIndex::new();
@@ -268,7 +268,7 @@ fn test_proximity_simple() {
let index = create_simple_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -295,7 +295,7 @@ fn test_proximity_split_word() {
let index = create_edge_cases_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("sunflower wilting");
@@ -315,7 +315,7 @@ fn test_proximity_split_word() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("\"sun flower\" wilting");
@@ -342,7 +342,7 @@ fn test_proximity_split_word() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("xyz wilting");
@@ -365,7 +365,7 @@ fn test_proximity_prefix_db() {
let index = create_edge_cases_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best s");
@@ -390,7 +390,7 @@ fn test_proximity_prefix_db() {
"###);
// Difference when using the `su` prefix, which is not in the prefix DB
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best su");
@@ -417,7 +417,7 @@ fn test_proximity_prefix_db() {
// **proximity** prefix DB. In that case, its sprximity score will always be
// the maximum. This happens for prefixes that are larger than 2 bytes.
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best win");
@@ -441,7 +441,7 @@ fn test_proximity_prefix_db() {
// Now using `wint`, which is not in the prefix DB:
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wint");
@@ -465,7 +465,7 @@ fn test_proximity_prefix_db() {
// and using `wi` which is in the prefix DB and proximity prefix DB
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wi");

View File

@@ -8,7 +8,7 @@ implemented.
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -57,7 +57,7 @@ fn test_trap_basic() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("summer holiday");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -17,9 +17,7 @@ use meili_snap::insta;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{
score_details, AscDesc, Criterion, Member, Search, SearchResult, TermsMatchingStrategy,
};
use crate::{score_details, AscDesc, Criterion, Member, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -184,7 +182,7 @@ fn test_sort() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
@@ -219,7 +217,7 @@ fn test_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank")))]);
@@ -254,7 +252,7 @@ fn test_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Field(S("vague")))]);
@@ -289,7 +287,7 @@ fn test_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("vague")))]);
@@ -338,7 +336,7 @@ fn test_redacted() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![

View File

@@ -13,7 +13,7 @@ use std::collections::BTreeSet;
use std::iter::FromIterator;
use crate::index::tests::TempIndex;
use crate::{Search, SearchResult, TermsMatchingStrategy};
use crate::{SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -79,7 +79,7 @@ fn test_ignore_stop_words() {
let txn = index.read_txn().unwrap();
// `the` is treated as a prefix here, so it's not ignored
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("xyz to the");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -132,7 +132,7 @@ fn test_ignore_stop_words() {
"###);
// `xyz` is treated as a prefix here, so it's not ignored
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("to the xyz");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -185,7 +185,7 @@ fn test_ignore_stop_words() {
"###);
// `xyz` is not treated as a prefix anymore because of the trailing space, so it's ignored
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("to the xyz ");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -237,7 +237,7 @@ fn test_ignore_stop_words() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("to the dragon xyz");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -296,7 +296,7 @@ fn test_stop_words_in_phrase() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"how to train your dragon\"");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -389,7 +389,7 @@ fn test_stop_words_in_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("how \"to\" train \"the");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -441,7 +441,7 @@ fn test_stop_words_in_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("how \"to\" train \"The dragon");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -449,7 +449,7 @@ fn test_stop_words_in_phrase() {
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 6, 5]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"to\"");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -22,7 +22,7 @@ use std::collections::BTreeMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -157,7 +157,7 @@ fn test_no_typo() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -182,7 +182,7 @@ fn test_default_typo() {
insta::assert_debug_snapshot!(tt, @"9");
// 0 typo
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -202,7 +202,7 @@ fn test_default_typo() {
"###);
// 1 typo on one word, replaced letter
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -216,7 +216,7 @@ fn test_default_typo() {
"###);
// 1 typo on one word, missing letter, extra letter
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -235,7 +235,7 @@ fn test_phrase_no_typo_allowed() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the \"quick brewn\" fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -265,7 +265,7 @@ fn test_typo_exact_word() {
insta::assert_debug_snapshot!(tt, @"9");
// don't match quivk
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -279,7 +279,7 @@ fn test_typo_exact_word() {
"###);
// Don't match quick
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -287,7 +287,7 @@ fn test_typo_exact_word() {
insta::assert_snapshot!(format!("{document_scores:?}"), @"[]");
// words not in exact_words (quicest, jummps) have normal typo handling
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -301,7 +301,7 @@ fn test_typo_exact_word() {
"###);
// exact words do not disable prefix (sunflowering OK, but no sunflowar)
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
@@ -340,7 +340,7 @@ fn test_typo_exact_attribute() {
insta::assert_debug_snapshot!(tt, @"9");
// Exact match returns both exact attributes and tolerant ones.
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
@@ -365,7 +365,7 @@ fn test_typo_exact_attribute() {
"###);
// 1 typo only returns the tolerant attribute
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quidk brown fox jumps over the lazy dog");
@@ -386,7 +386,7 @@ fn test_typo_exact_attribute() {
"###);
// combine with exact words
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quivk brown fox jumps over the lazy dog");
@@ -414,7 +414,7 @@ fn test_typo_exact_attribute() {
"###);
// No result in tolerant attribute
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quicest brownest fox jummps over the laziest dog");
@@ -428,7 +428,7 @@ fn test_ngram_typos() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the extra lagant fox skyrocketed over the languorous dog");
@@ -442,7 +442,7 @@ fn test_ngram_typos() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the ex tra lagant fox skyrocketed over the languorous dog");
@@ -463,7 +463,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
@@ -499,7 +499,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
})
.unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
@@ -517,7 +517,7 @@ fn test_typo_bucketing() {
let txn = index.read_txn().unwrap();
// First do the search with just the Words ranking rule
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
@@ -545,7 +545,7 @@ fn test_typo_bucketing() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
@@ -564,7 +564,7 @@ fn test_typo_bucketing() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sun flower");
@@ -600,7 +600,7 @@ fn test_typo_synonyms() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lackadaisical dog");
@@ -616,7 +616,7 @@ fn test_typo_synonyms() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the fast brownish fox jumps over the lackadaisical dog");

View File

@@ -17,7 +17,7 @@ because the typo ranking rule before it only used the derivation `beautiful`.
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -87,7 +87,7 @@ fn test_trap_basic_and_complex1() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("beautiful summer");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -110,7 +110,7 @@ fn test_trap_complex2() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("delicious sweet dessert");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -14,7 +14,7 @@ This module tests the following properties:
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -131,7 +131,7 @@ fn test_words_tms_last_simple() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -166,7 +166,7 @@ fn test_words_tms_last_simple() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("extravagant the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -180,7 +180,7 @@ fn test_words_tms_last_phrase() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox\" jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -205,7 +205,7 @@ fn test_words_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox\" jumps over the \"lazy\" dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -227,7 +227,7 @@ fn test_words_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox jumps over the lazy dog\"");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -243,7 +243,7 @@ fn test_words_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -270,7 +270,7 @@ fn test_words_proximity_tms_last_simple() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -305,7 +305,7 @@ fn test_words_proximity_tms_last_simple() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the brown quick fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -351,7 +351,7 @@ fn test_words_proximity_tms_last_phrase() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the \"quick brown\" fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -382,7 +382,7 @@ fn test_words_proximity_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the \"quick brown\" \"fox jumps\" over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -421,7 +421,7 @@ fn test_words_tms_all() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -447,7 +447,7 @@ fn test_words_tms_all() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("extravagant");
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -6,7 +6,10 @@ use roaring::RoaringBitmap;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use super::VectorStoreStats;
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::{ComputingBucketSortStep, RankingRuleStep};
use crate::vector::{DistributionShift, Embedder, VectorStore};
use crate::{DocumentId, Result, SearchContext, SearchLogger, TimeBudget};
@@ -94,8 +97,8 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
}
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
fn id(&self) -> String {
"vector_sort".to_owned()
fn id(&self) -> RankingRuleId {
RankingRuleId::VectorSort
}
#[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
@@ -106,7 +109,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
universe: &RoaringBitmap,
query: &Q,
time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::StartIteration);
assert!(self.query.is_none());
self.query = Some(query.clone());
@@ -123,7 +129,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
_logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Q>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::NextBucket);
let query = self.query.as_ref().unwrap().clone();
let vector_candidates = &self.vector_candidates & universe;
@@ -158,7 +167,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
}));
}
self.next_bucket(ctx, _logger, universe, time_budget)
self.next_bucket(ctx, _logger, universe, time_budget, progress)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
@@ -171,7 +180,9 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
_ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
progress: &Progress,
) -> Result<Poll<RankingRuleOutput<Q>>> {
let _step = progress.update_progress_scoped(RankingRuleStep::NonBlockingNextBucket);
let query = self.query.as_ref().unwrap().clone();
let vector_candidates = &self.vector_candidates & universe;

View File

@@ -2,6 +2,7 @@ use std::sync::Arc;
use roaring::RoaringBitmap;
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::vector::{Embedder, VectorStore};
use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};
@@ -18,6 +19,7 @@ pub struct Similar<'a> {
embedder: Arc<Embedder>,
ranking_score_threshold: Option<f64>,
quantized: bool,
progress: &'a Progress,
}
impl<'a> Similar<'a> {
@@ -31,6 +33,7 @@ impl<'a> Similar<'a> {
embedder_name: String,
embedder: Arc<Embedder>,
quantized: bool,
progress: &'a Progress,
) -> Self {
Self {
id,
@@ -43,6 +46,7 @@ impl<'a> Similar<'a> {
embedder,
ranking_score_threshold: None,
quantized,
progress,
}
}
@@ -57,7 +61,7 @@ impl<'a> Similar<'a> {
}
pub fn execute(&self) -> Result<SearchResult> {
let mut universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
let mut universe = filtered_universe(self.index, self.rtxn, &self.filter, self.progress)?;
// we never want to receive the docid
universe.remove(self.id);

View File

@@ -0,0 +1,59 @@
use crate::make_enum_progress;
make_enum_progress! {
pub enum SearchStep {
Tokenize,
Embed,
Filter,
ResolveUniverse,
KeywordSearch,
PlaceholderSearch,
SemanticSearch,
Format,
FacetDistribution,
Federation,
Personalization,
}
}
make_enum_progress! {
pub enum ComputingBucketSortStep {
MergeCandidates,
Distinct,
Words,
Typo,
Proximity,
AttributePosition,
WordPosition,
Exactness,
Sort,
GeoSort,
VectorSort,
Asc,
Desc,
}
}
make_enum_progress! {
pub enum RankingRuleStep {
StartIteration,
NextBucket,
NonBlockingNextBucket,
}
}
make_enum_progress! {
pub enum FederatingResultsStep {
WaitForRemoteResults,
MergeFacets,
MergeResults,
}
}
make_enum_progress! {
pub enum TotalProcessingTimeStep {
WaitForPermit,
Search,
Similar,
}
}

View File

@@ -26,6 +26,7 @@ pub(crate) struct TempIndex {
pub inner: Index,
pub indexer_config: IndexerConfig,
pub index_documents_config: IndexDocumentsConfig,
pub progress: Progress,
_tempdir: TempDir,
}
@@ -47,7 +48,9 @@ impl TempIndex {
let inner = Index::new(options, _tempdir.path(), true).unwrap();
let indexer_config = IndexerConfig::default();
let index_documents_config = IndexDocumentsConfig::default();
Self { inner, indexer_config, index_documents_config, _tempdir }
let progress = Progress::default();
Self { inner, indexer_config, index_documents_config, progress, _tempdir }
}
/// Creates a temporary index, with a default `4096 * 2000` size. This should be enough for
/// most tests.
@@ -210,6 +213,10 @@ impl TempIndex {
pub fn delete_document(&self, external_document_id: &str) {
self.delete_documents(vec![external_document_id.to_string()])
}
pub fn search<'a>(&'a self, rtxn: &'a heed::RoTxn<'a>) -> Search<'a> {
self.inner.search(rtxn, &self.progress)
}
}
#[test]
@@ -1095,7 +1102,7 @@ fn bug_3021_fourth() {
"###);
let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index);
let search = index.search(&rtxn);
let SearchResult {
matching_words: _,
candidates: _,

View File

@@ -1292,7 +1292,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
// testing the simple query search
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("document");
search.terms_matching_strategy(TermsMatchingStrategy::default());
// all documents should be returned
@@ -1333,7 +1333,7 @@ mod tests {
assert!(documents_ids.is_empty()); // nested is not searchable
// testing the filters
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.filter(crate::Filter::from_str(r#"title = "The first document""#).unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![1]);
@@ -1358,6 +1358,7 @@ mod tests {
#[test]
fn index_documents_with_nested_primary_key() {
let index = TempIndex::new();
let progress = Progress::default();
index
.update_settings(|settings| {
@@ -1397,7 +1398,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
// testing the simple query search
let mut search = crate::Search::new(&rtxn, &index);
let mut search = crate::Search::new(&rtxn, &index, &progress);
search.query("document");
search.terms_matching_strategy(TermsMatchingStrategy::default());
// all documents should be returned
@@ -1453,6 +1454,7 @@ mod tests {
#[test]
fn test_facets_generation() {
let index = TempIndex::new();
let progress = Progress::default();
index
.add_documents(documents!([
@@ -1507,7 +1509,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
let mut search = crate::Search::new(&rtxn, &index);
let mut search = crate::Search::new(&rtxn, &index, &progress);
let filter = format!(r#""dog.race.bernese mountain" = {s}"#);
search.filter(crate::Filter::from_str(&filter).unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
@@ -1545,7 +1547,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
let mut search = crate::Search::new(&rtxn, &index, &progress);
search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
"dog.race.bernese mountain",
)))]);
@@ -3601,6 +3603,7 @@ mod tests {
#[test]
fn delete_words_exact_attributes() {
let index = TempIndex::new();
let progress = Progress::default();
index
.update_settings(|settings| {
@@ -3639,7 +3642,7 @@ mod tests {
let words = index.words_fst(&txn).unwrap().into_stream().into_strs().unwrap();
insta::assert_snapshot!(format!("{words:?}"), @r###"["hello"]"###);
let mut s = Search::new(&txn, &index);
let mut s = Search::new(&txn, &index, &progress);
s.query("hello");
let crate::SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");

View File

@@ -25,7 +25,8 @@ macro_rules! test_distinct {
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.query(search::TEST_QUERY);
search.limit($limit);
search.offset($offset);

View File

@@ -1,5 +1,7 @@
use either::{Either, Left, Right};
use milli::progress::Progress;
use milli::{Criterion, Filter, Search, SearchResult, TermsMatchingStrategy};
use Criterion::*;
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
@@ -15,7 +17,8 @@ macro_rules! test_filter {
let filter_conditions =
Filter::from_array::<Vec<Either<Vec<&str>, &str>>, _>($filter).unwrap().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());

View File

@@ -24,7 +24,8 @@ fn test_phrase_search_with_stop_words_given_criteria(criteria: &[Criterion]) {
// Phrase search containing stop words
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("\"the use of force\"");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::All);

View File

@@ -27,7 +27,8 @@ macro_rules! test_criterion {
let index = search::setup_search_index_with_criteria(&criteria);
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.terms_matching_strategy($optional_word);
@@ -241,7 +242,8 @@ fn criteria_mixup() {
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.terms_matching_strategy(ALLOW_OPTIONAL_WORDS);
@@ -365,7 +367,8 @@ fn criteria_ascdesc() {
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.limit(ASC_DESC_CANDIDATES_THRESHOLD + 1);
let SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@@ -1,4 +1,5 @@
use big_s::S;
use milli::progress::Progress;
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
use milli::{AscDesc, Error, Member, Search, TermsMatchingStrategy, UserError};
@@ -11,7 +12,8 @@ fn sort_ranking_rule_missing() {
let index = search::setup_search_index_with_criteria(&criteria);
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());

View File

@@ -22,7 +22,8 @@ fn test_typo_tolerance_one_typo() {
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zeal");
search.limit(10);
@@ -31,7 +32,8 @@ fn test_typo_tolerance_one_typo() {
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zean");
search.limit(10);
@@ -49,7 +51,8 @@ fn test_typo_tolerance_one_typo() {
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
// typo is now supported for 4 letters words
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zean");
search.limit(10);
@@ -68,7 +71,8 @@ fn test_typo_tolerance_two_typo() {
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zealand");
search.limit(10);
@@ -77,7 +81,8 @@ fn test_typo_tolerance_two_typo() {
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zealemd");
search.limit(10);
@@ -95,7 +100,8 @@ fn test_typo_tolerance_two_typo() {
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
// typo is now supported for 4 letters words
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zealemd");
search.limit(10);
@@ -164,7 +170,8 @@ fn test_typo_disabled_on_word() {
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zealand");
search.limit(10);
@@ -184,7 +191,8 @@ fn test_typo_disabled_on_word() {
builder.set_exact_words(exact_words);
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zealand");
search.limit(10);
@@ -203,7 +211,8 @@ fn test_disable_typo_on_attribute() {
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
// typo in `antebel(l)um`
search.query("antebelum");
search.limit(10);
@@ -222,7 +231,8 @@ fn test_disable_typo_on_attribute() {
builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect());
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("antebelum");
search.limit(10);