Compare commits

..

9 Commits

Author SHA1 Message Date
ManyTheFish
1cee5b52e5 Fix: Avoid a scoped progress to be started twice and add debug asserts 2025-12-23 18:46:25 +01:00
ManyTheFish
5380ae4da8 Fix clippy errors 2025-12-23 15:22:45 +01:00
ManyTheFish
8df9b41022 Update benchmarks 2025-12-23 15:17:50 +01:00
ManyTheFish
b69a553752 Update tests 2025-12-23 15:17:28 +01:00
ManyTheFish
1ed2107621 Log progress trace at the end of the search process 2025-12-23 15:12:21 +01:00
ManyTheFish
b7deb85cc0 Add progress traces in search processes. 2025-12-23 15:10:27 +01:00
ManyTheFish
01de2be3d9 Implement a ScopedProgressStep helper to finish the step at the end of the scope.
This helper will mark the step as finished when it is dropped.
the struct must be assigned to an named variable `let step` or `let _step` to be dropped at the end of the scope properly.
2025-12-23 14:32:43 +01:00
ManyTheFish
830b62a142 Sum the durations of similar steps in accumulate_durations function.
the function was previously keeping the most recent duration for a step,
this is not compatible with processes doing several iterations like the
bucket sort algorithm.
2025-12-23 14:28:00 +01:00
ManyTheFish
63d1aeb42e Add search progress steps.
theses steps will be used to track the progress of the search process
2025-12-23 14:23:49 +01:00
74 changed files with 1010 additions and 1771 deletions

View File

@@ -1,50 +0,0 @@
name: Check OpenAPI file
on:
workflow_dispatch:
pull_request:
merge_group:
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
RUSTFLAGS: "-D warnings"
jobs:
check-openapi:
name: Check OpenAPI specification
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v5
- name: Setup Rust
uses: dtolnay/rust-toolchain@1.91.1
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install OpenAPI tools
run: npm install -g @apidevtools/swagger-cli @stoplight/spectral-cli
- name: Generate OpenAPI specification
run: cargo run --release -p openapi-generator -- -o /tmp/openapi.json
- name: Check all routes have summaries
run: cargo run --release -p openapi-generator -- --check-summaries
# Validates that the OpenAPI file is syntactically correct and conforms to the OpenAPI specification
- name: Validate OpenAPI schema
run: swagger-cli validate /tmp/openapi.json
# Lints the OpenAPI file for best practices (descriptions, examples, naming conventions, etc.)
# Ruleset is defined in crates/openapi-generator/.spectral.yaml
- name: Lint OpenAPI specification
run: spectral lint /tmp/openapi.json --ruleset crates/openapi-generator/.spectral.yaml

View File

@@ -7,9 +7,6 @@ on:
release:
types: [published]
permissions:
contents: read
jobs:
check-version:
name: Check the version validity
@@ -92,8 +89,8 @@ jobs:
asset_name: meilisearch-${{ matrix.edition-suffix }}${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-openapi-files:
name: Publish OpenAPI files
publish-openapi-file:
name: Publish OpenAPI file
needs: check-version
runs-on: ubuntu-latest
steps:
@@ -104,26 +101,16 @@ jobs:
with:
toolchain: stable
override: true
- name: Generate OpenAPI files
- name: Generate OpenAPI file
run: |
cd crates/openapi-generator
cargo run --release -- --pretty --debug --output ../../meilisearch-openapi.json
cargo run --release -- --pretty --debug --with-mintlify-code-samples --output ../../meilisearch-openapi-mintlify.json
- name: Upload OpenAPI file to Release
cargo run --release -- --pretty --output ../../meilisearch.json
- name: Upload OpenAPI to Release
# No need to upload for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: ./meilisearch-openapi.json
file: ./meilisearch.json
asset_name: meilisearch-openapi.json
tag: ${{ github.ref }}
- name: Upload Mintlify OpenAPI file to Release
# No need to upload for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: ./meilisearch-openapi-mintlify.json
asset_name: meilisearch-openapi-mintlify.json
tag: ${{ github.ref }}

View File

@@ -15,7 +15,7 @@ env:
jobs:
test-linux:
name: Tests on ${{ matrix.runner }} ${{ matrix.features }}
name: Tests on Ubuntu
runs-on: ${{ matrix.runner }}
strategy:
matrix:

3
.gitignore vendored
View File

@@ -29,6 +29,3 @@ crates/meilisearch/db.snapshot
# Fuzzcheck data for the facet indexing fuzz test
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
# OpenAPI generator
**/meilisearch-openapi.json

View File

@@ -117,7 +117,7 @@ With swagger:
With the internal crate:
```bash
cd crates/openapi-generator
cargo run --release -- --pretty
cargo run --release -- --pretty --output meilisearch.json
```
### Logging

981
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -172,7 +172,8 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let progress = Progress::default();
let mut search = index.search(&rtxn, &progress);
search
.query(query)
.terms_matching_strategy(TermsMatchingStrategy::default());

View File

@@ -153,7 +153,8 @@ fn main() {
.unwrap();
// after executing a batch we check if the database is corrupted
let res = index.search(&wtxn).execute().unwrap();
let progress = Progress::default();
let res = index.search(&wtxn, &progress).execute().unwrap();
index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed);
}

View File

@@ -46,7 +46,7 @@ pub struct CreateApiKey {
#[deserr(default, error = DeserrJsonError<InvalidApiKeyName>)]
pub name: Option<String>,
/// A uuid v4 to identify the API Key. If not specified, it's generated by Meilisearch.
#[schema(value_type = Option<String>, example = "01b4bc42-eb33-4041-b481-254d00cce834")]
#[schema(value_type = Uuid, example = json!(null))]
#[deserr(default = Uuid::new_v4(), error = DeserrJsonError<InvalidApiKeyUid>, try_from(&String) = Uuid::from_str -> uuid::Error)]
pub uid: KeyId,
/// A list of actions permitted for the key. `["*"]` for all actions. The `*` character can be used as a wildcard when located at the last position. e.g. `documents.*` to authorize access on all documents endpoints.

View File

@@ -302,7 +302,7 @@ pub struct Settings<T> {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsLocalizedAttributes>)]
#[schema(value_type = Option<Vec<LocalizedAttributesRuleView>>, example = json!(null))]
#[schema(value_type = Option<Vec<LocalizedAttributesRuleView>>, example = json!(50))]
pub localized_attributes: Setting<Vec<LocalizedAttributesRuleView>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]

View File

@@ -21,8 +21,8 @@ pub struct TaskView {
/// The unique sequential identifier of the task.
#[schema(value_type = u32, example = 4312)]
pub uid: TaskId,
/// The unique identifier of the batch where this task is grouped.
#[schema(value_type = Option<u32>, example = json!(12))]
/// The unique identifier of the index where this task is operated.
#[schema(value_type = Option<u32>, example = json!("movies"))]
pub batch_uid: Option<BatchId>,
#[serde(default)]
pub index_uid: Option<String>,

View File

@@ -190,7 +190,6 @@ pub enum KindWithContent {
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct IndexSwap {
#[schema(value_type = Vec<String>, example = json!(["indexA", "indexB"]))]
pub indexes: (String, String),
#[serde(default)]
pub rename: bool,
@@ -608,7 +607,7 @@ impl std::error::Error for ParseTaskStatusError {}
ToSchema,
)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase", example = "documentAdditionOrUpdate")]
#[schema(rename_all = "camelCase", example = json!(enum_iterator::all::<Kind>().collect::<Vec<_>>()))]
pub enum Kind {
DocumentAdditionOrUpdate,
DocumentEdition,

View File

@@ -1,7 +1,8 @@
use std::time::Duration;
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::milli::TimeBudget;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{SearchStep, TimeBudget};
use rand::Rng;
use reqwest::Client;
use serde::{Deserialize, Serialize};
@@ -346,9 +347,11 @@ impl PersonalizationService {
personalize: &Personalize,
query: Option<&str>,
time_budget: TimeBudget,
progress: &Progress,
) -> Result<SearchResult, ResponseError> {
match self {
Self::Cohere(cohere_service) => {
let _step = progress.update_progress_scoped(SearchStep::Personalization);
cohere_service
.rerank_search_results(search_result, personalize, query, time_budget)
.await

View File

@@ -46,7 +46,10 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
(status = OK, description = "Return the batch", body = BatchView, content_type = "application/json", example = json!(
{
"uid": 1,
"details": {},
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"progress": null,
"stats": {
"totalNbTasks": 1,

View File

@@ -30,7 +30,10 @@ use meilisearch_types::features::{
use meilisearch_types::heed::RoTxn;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::index::ChatConfig;
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget};
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{
all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget, TotalProcessingTimeStep,
};
use meilisearch_types::{Document, Index};
use serde::Deserialize;
use serde_json::json;
@@ -262,6 +265,7 @@ async fn process_search_request(
filter: Option<String>,
) -> Result<(Index, Vec<Document>, String), ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let progress = Progress::default();
let rtxn = index.static_read_txn()?;
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
let mut query = SearchQuery {
@@ -285,7 +289,9 @@ async fn process_search_request(
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
progress.update_progress(TotalProcessingTimeStep::WaitForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Search);
let features = index_scheduler.features();
let index_cloned = index.clone();
let output = tokio::task::spawn_blocking(move || -> Result<_, ResponseError> {
@@ -297,8 +303,15 @@ async fn process_search_request(
None => TimeBudget::default(),
};
let (search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
let (search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search(
&index_cloned,
&rtxn,
&query,
&search_kind,
time_budget,
features,
&progress,
)?;
match search_from_kind(index_uid, search_kind, search) {
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),

View File

@@ -54,7 +54,7 @@ crate::empty_analytics!(DumpAnalytics, "Dump Created");
"taskUid": 0,
"indexUid": null,
"status": "enqueued",
"type": "dumpCreation",
"type": "DumpCreation",
"enqueuedAt": "2021-01-01T09:39:00.000000Z"
}
)),

View File

@@ -41,10 +41,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(export)));
}
/// Export to a remote Meilisearch
///
/// Triggers an export process to a remote Meilisearch instance. This allows you to send
/// documents and settings from the current instance to another Meilisearch server.
#[utoipa::path(
post,
path = "",

View File

@@ -32,8 +32,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
}
/// Compact an index
///
/// Triggers a compaction process on the specified index. Compaction reorganizes the index database to make it smaller and more efficient.
#[utoipa::path(
post,
path = "{indexUid}/compact",

View File

@@ -696,7 +696,7 @@ pub struct UpdateDocumentsQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexCustomMetadata>)]
pub custom_metadata: Option<String>,
#[param(example = true)]
#[param(example = "true")]
#[deserr(default, try_from(&String) = from_string_skip_creation -> DeserrQueryParamError<InvalidSkipCreation>, error = DeserrQueryParamError<InvalidSkipCreation>)]
/// Only update documents if they already exist.
pub skip_creation: Option<bool>,

View File

@@ -51,7 +51,7 @@ mod similar_analytics;
(path = "/", api = settings::SettingsApi),
(path = "/", api = compact::CompactApi),
),
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats),
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats, compact::compact),
tags(
(
name = "Indexes",

View File

@@ -8,7 +8,8 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use meilisearch_types::milli;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{self, TotalProcessingTimeStep};
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::debug;
@@ -336,6 +337,10 @@ pub async fn search_with_url_query(
) -> Result<HttpResponse, ResponseError> {
let request_uid = Uuid::now_v7();
debug!(request_uid = ?request_uid, parameters = ?params, "Search get");
let progress = Progress::default();
progress.update_progress(TotalProcessingTimeStep::WaitForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Search);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().try_into()?;
@@ -359,9 +364,9 @@ pub async fn search_with_url_query(
// Save the query string for personalization if requested
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
let permit = search_queue.try_get_search_permit().await?;
let include_metadata = parse_include_metadata_header(&req);
let progress_clone = progress.clone();
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
SearchParams {
@@ -374,11 +379,13 @@ pub async fn search_with_url_query(
include_metadata,
},
&index,
&progress_clone,
)
})
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok((search_result, _)) = search_result.as_ref() {
aggregate.succeed(search_result);
}
@@ -394,11 +401,12 @@ pub async fn search_with_url_query(
personalize,
personalize_query.as_deref(),
time_budget,
&progress,
)
.await?;
}
debug!(request_uid = ?request_uid, returns = ?search_result, "Search get");
debug!(request_uid = ?request_uid, returns = ?search_result, progress = ?progress.accumulated_durations(), "Search get");
Ok(HttpResponse::Ok().json(search_result))
}
@@ -470,6 +478,11 @@ pub async fn search_with_post(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let request_uid = Uuid::now_v7();
let progress = Progress::default();
progress.update_progress(TotalProcessingTimeStep::WaitForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Search);
let mut query = params.into_inner();
debug!(request_uid = ?request_uid, parameters = ?query, "Search post");
@@ -494,7 +507,7 @@ pub async fn search_with_post(
// Save the query string for personalization if requested
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
let permit = search_queue.try_get_search_permit().await?;
let progress_clone = progress.clone();
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
SearchParams {
@@ -507,6 +520,7 @@ pub async fn search_with_post(
include_metadata,
},
&index,
&progress_clone,
)
})
.await;
@@ -530,11 +544,12 @@ pub async fn search_with_post(
personalize,
personalize_query.as_deref(),
time_budget,
&progress,
)
.await?;
}
debug!(request_uid = ?request_uid, returns = ?search_result, "Search post");
debug!(request_uid = ?request_uid, returns = ?search_result, progress = ?progress.accumulated_durations(), "Search post");
Ok(HttpResponse::Ok().json(search_result))
}

View File

@@ -8,6 +8,8 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::TotalProcessingTimeStep;
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::debug;
@@ -77,11 +79,11 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
"release_date": 1418256000
}
],
"id": "143",
"offset": 0,
"limit": 2,
"estimatedTotalHits": 976,
"processingTimeMs": 35
"processingTimeMs": 35,
"query": "american "
}
)),
(status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!(
@@ -159,11 +161,11 @@ pub async fn similar_get(
"release_date": 1418256000
}
],
"id": "143",
"offset": 0,
"limit": 2,
"estimatedTotalHits": 976,
"processingTimeMs": 35
"processingTimeMs": 35,
"query": "american "
}
)),
(status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!(
@@ -217,7 +219,7 @@ async fn similar(
mut query: SimilarQuery,
) -> Result<SimilarResult, ResponseError> {
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
let progress = Progress::default();
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
@@ -234,7 +236,10 @@ async fn similar(
Route::Similar,
)?;
tokio::task::spawn_blocking(move || {
let progress_clone = progress.clone();
let result = tokio::task::spawn_blocking(move || {
let _step = progress_clone.update_progress_scoped(TotalProcessingTimeStep::Search);
perform_similar(
&index,
query,
@@ -243,9 +248,14 @@ async fn similar(
quantized,
retrieve_vectors,
index_scheduler.features(),
&progress_clone,
)
})
.await?
.await;
debug!(progress = ?progress.accumulated_durations(), "Similar");
result?
}
#[derive(Debug, deserr::Deserr, IntoParams)]

View File

@@ -99,8 +99,6 @@ mod webhooks;
paths(get_health, get_version, get_stats),
tags(
(name = "Stats", description = "Stats gives extended information and metrics about indexes and the Meilisearch database."),
(name = "Health", description = "The health check endpoint enables you to periodically test the health of your Meilisearch instance."),
(name = "Version", description = "Returns the version of the running Meilisearch instance."),
),
modifiers(&OpenApiAuth),
servers((

View File

@@ -6,6 +6,8 @@ use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::TotalProcessingTimeStep;
use serde::Serialize;
use tracing::debug;
use utoipa::{OpenApi, ToSchema};
@@ -153,7 +155,10 @@ pub async fn multi_search_with_post(
) -> Result<HttpResponse, ResponseError> {
// Since we don't want to process half of the search requests and then get a permit refused
// we're going to get one permit for the whole duration of the multi-search request.
let progress = Progress::default();
progress.update_progress(TotalProcessingTimeStep::WaitForPermit);
let permit = search_queue.try_get_search_permit().await?;
progress.update_progress(TotalProcessingTimeStep::Search);
let request_uid = Uuid::now_v7();
let federated_search = params.into_inner();
@@ -213,6 +218,7 @@ pub async fn multi_search_with_post(
is_proxy,
request_uid,
include_metadata,
&progress,
)
.await;
permit.drop().await;
@@ -226,6 +232,7 @@ pub async fn multi_search_with_post(
debug!(
request_uid = ?request_uid,
returns = ?search_result,
progress = ?progress.accumulated_durations(),
"Federated-search"
);
@@ -288,6 +295,7 @@ pub async fn multi_search_with_post(
.with_index(query_index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
let progress_clone = progress.clone();
let (mut search_result, time_budget) = tokio::task::spawn_blocking(move || {
perform_search(
SearchParams {
@@ -300,6 +308,7 @@ pub async fn multi_search_with_post(
include_metadata,
},
&index,
&progress_clone,
)
})
.await
@@ -314,6 +323,7 @@ pub async fn multi_search_with_post(
personalize,
personalize_query.as_deref(),
time_budget,
&progress,
)
.await
.with_index(query_index)?;
@@ -345,6 +355,7 @@ pub async fn multi_search_with_post(
debug!(
request_uid = ?request_uid,
returns = ?search_results,
progress = ?progress.accumulated_durations(),
"Multi-search"
);

View File

@@ -99,7 +99,11 @@ async fn get_network(
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct Remote {
#[schema(value_type = Option<String>, example = "http://localhost:7700")]
#[schema(value_type = Option<String>, example = json!({
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
}))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)]
#[serde(default)]
pub url: Setting<String>,

View File

@@ -78,48 +78,48 @@ pub struct TasksFilterQuery {
/// Permits to filter tasks by their uid. By default, when the uids query parameter is not set, all task uids are returned. It's possible to specify several uids by separating them with the `,` character.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
#[param(required = false, value_type = Option<Vec<u32>>, example = json!([231, 423, 598]))]
#[param(required = false, value_type = Option<Vec<u32>>, example = json!([231, 423, 598, "*"]))]
pub uids: OptionStarOrList<u32>,
/// Permits to filter tasks using the uid of the task that canceled them. It's possible to specify several task uids by separating them with the `,` character.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
#[param(required = false, value_type = Option<Vec<u32>>, example = json!([374]))]
#[param(required = false, value_type = Option<Vec<u32>>, example = json!([374, "*"]))]
pub canceled_by: OptionStarOrList<u32>,
/// Permits to filter tasks by their related type. By default, when `types` query parameter is not set, all task types are returned. It's possible to specify several types by separating them with the `,` character.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
#[param(required = false, value_type = Option<Vec<String>>, example = json!([Kind::DocumentAdditionOrUpdate]))]
#[param(required = false, value_type = Option<Vec<String>>, example = json!([Kind::DocumentAdditionOrUpdate, "*"]))]
pub types: OptionStarOrList<Kind>,
/// Permits to filter tasks by their status. By default, when `statuses` query parameter is not set, all task statuses are returned. It's possible to specify several statuses by separating them with the `,` character.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
#[param(required = false, value_type = Option<Vec<Status>>, example = json!([Status::Succeeded, Status::Failed, Status::Canceled, Status::Enqueued, Status::Processing]))]
#[param(required = false, value_type = Option<Vec<Status>>, example = json!([Status::Succeeded, Status::Failed, Status::Canceled, Status::Enqueued, Status::Processing, "*"]))]
pub statuses: OptionStarOrList<Status>,
/// Permits to filter tasks by their related index. By default, when `indexUids` query parameter is not set, the tasks of all the indexes are returned. It is possible to specify several indexes by separating them with the `,` character.
#[deserr(default, error = DeserrQueryParamError<InvalidIndexUid>)]
#[param(required = false, value_type = Option<Vec<String>>, example = json!(["movies", "theater"]))]
#[param(required = false, value_type = Option<Vec<String>>, example = json!(["movies", "theater", "*"]))]
pub index_uids: OptionStarOrList<IndexUid>,
/// Permits to filter tasks based on their enqueuedAt time. Matches tasks enqueued after the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub after_enqueued_at: OptionStarOr<OffsetDateTime>,
/// Permits to filter tasks based on their enqueuedAt time. Matches tasks enqueued before the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub before_enqueued_at: OptionStarOr<OffsetDateTime>,
/// Permits to filter tasks based on their startedAt time. Matches tasks started after the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub after_started_at: OptionStarOr<OffsetDateTime>,
/// Permits to filter tasks based on their startedAt time. Matches tasks started before the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub before_started_at: OptionStarOr<OffsetDateTime>,
/// Permits to filter tasks based on their finishedAt time. Matches tasks finished after the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub after_finished_at: OptionStarOr<OffsetDateTime>,
/// Permits to filter tasks based on their finishedAt time. Matches tasks finished before the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub before_finished_at: OptionStarOr<OffsetDateTime>,
}
@@ -173,52 +173,52 @@ impl TaskDeletionOrCancelationQuery {
pub struct TaskDeletionOrCancelationQuery {
/// Permits to filter tasks by their uid. By default, when the `uids` query parameter is not set, all task uids are returned. It's possible to specify several uids by separating them with the `,` character.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
#[param(required = false, value_type = Option<Vec<u32>>, example = json!([231, 423, 598]))]
#[param(required = false, value_type = Option<Vec<u32>>, example = json!([231, 423, 598, "*"]))]
pub uids: OptionStarOrList<u32>,
/// Lets you filter tasks by their `batchUid`.
#[deserr(default, error = DeserrQueryParamError<InvalidBatchUids>)]
#[param(required = false, value_type = Option<Vec<u32>>, example = json!([231, 423, 598]))]
#[param(required = false, value_type = Option<Vec<u32>>, example = json!([231, 423, 598, "*"]))]
pub batch_uids: OptionStarOrList<BatchId>,
/// Permits to filter tasks using the uid of the task that canceled them. It's possible to specify several task uids by separating them with the `,` character.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
#[param(required = false, value_type = Option<Vec<u32>>, example = json!([374]))]
#[param(required = false, value_type = Option<Vec<u32>>, example = json!([374, "*"]))]
pub canceled_by: OptionStarOrList<u32>,
/// Permits to filter tasks by their related type. By default, when `types` query parameter is not set, all task types are returned. It's possible to specify several types by separating them with the `,` character.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
#[param(required = false, value_type = Option<Vec<Kind>>, example = json!([Kind::DocumentDeletion]))]
#[param(required = false, value_type = Option<Vec<Kind>>, example = json!([Kind::DocumentDeletion, "*"]))]
pub types: OptionStarOrList<Kind>,
/// Permits to filter tasks by their status. By default, when `statuses` query parameter is not set, all task statuses are returned. It's possible to specify several statuses by separating them with the `,` character.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
#[param(required = false, value_type = Option<Vec<Status>>, example = json!([Status::Succeeded, Status::Failed, Status::Canceled]))]
#[param(required = false, value_type = Option<Vec<Status>>, example = json!([Status::Succeeded, Status::Failed, Status::Canceled, "*"]))]
pub statuses: OptionStarOrList<Status>,
/// Permits to filter tasks by their related index. By default, when `indexUids` query parameter is not set, the tasks of all the indexes are returned. It is possible to specify several indexes by separating them with the `,` character.
#[deserr(default, error = DeserrQueryParamError<InvalidIndexUid>)]
#[param(required = false, value_type = Option<Vec<String>>, example = json!(["movies", "theater"]))]
#[param(required = false, value_type = Option<Vec<String>>, example = json!(["movies", "theater", "*"]))]
pub index_uids: OptionStarOrList<IndexUid>,
/// Permits to filter tasks based on their enqueuedAt time. Matches tasks enqueued after the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub after_enqueued_at: OptionStarOr<OffsetDateTime>,
/// Permits to filter tasks based on their enqueuedAt time. Matches tasks enqueued before the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub before_enqueued_at: OptionStarOr<OffsetDateTime>,
/// Permits to filter tasks based on their startedAt time. Matches tasks started after the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub after_started_at: OptionStarOr<OffsetDateTime>,
/// Permits to filter tasks based on their startedAt time. Matches tasks started before the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub before_started_at: OptionStarOr<OffsetDateTime>,
/// Permits to filter tasks based on their finishedAt time. Matches tasks finished after the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub after_finished_at: OptionStarOr<OffsetDateTime>,
/// Permits to filter tasks based on their finishedAt time. Matches tasks finished before the given date. Supports RFC 3339 date format.
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[param(required = false, value_type = Option<String>, example = "2024-08-08T16:37:09.971Z")]
#[param(required = false, value_type = Option<String>, example = json!(["2024-08-08T16:37:09.971Z", "*"]))]
pub before_finished_at: OptionStarOr<OffsetDateTime>,
}
@@ -519,9 +519,15 @@ pub struct AllTasks {
"uid": 144,
"indexUid": "mieli",
"status": "succeeded",
"type": "indexCreation",
"type": "settingsUpdate",
"canceledBy": null,
"details": null,
"details": {
"settings": {
"filterableAttributes": [
"play_count"
]
}
},
"error": null,
"duration": "PT0.009330S",
"enqueuedAt": "2024-08-08T09:01:13.348471Z",
@@ -577,16 +583,19 @@ async fn get_tasks(
path = "/{taskUid}",
tag = "Tasks",
security(("Bearer" = ["tasks.get", "tasks.*", "*"])),
params(("taskUid", format = UInt32, example = "0", description = "The task identifier", nullable = false)),
params(("taskUid", format = UInt32, example = 0, description = "The task identifier", nullable = false)),
responses(
(status = 200, description = "Task successfully retrieved", body = TaskView, content_type = "application/json", example = json!(
{
"uid": 1,
"indexUid": "movies",
"status": "succeeded",
"type": "indexCreation",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": null,
"details": {
"receivedDocuments": 79000,
"indexedDocuments": 79000
},
"error": null,
"duration": "PT1S",
"enqueuedAt": "2021-01-01T09:39:00.000000Z",
@@ -645,7 +654,7 @@ async fn get_task(
path = "/{taskUid}/documents",
tag = "Tasks",
security(("Bearer" = ["tasks.get", "tasks.*", "*"])),
params(("taskUid", format = UInt32, example = "0", description = "The task identifier", nullable = false)),
params(("taskUid", format = UInt32, example = 0, description = "The task identifier", nullable = false)),
responses(
(status = 200, description = "The content of the task update", body = serde_json::Value, content_type = "application/x-ndjson"),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(

View File

@@ -111,9 +111,6 @@ pub(super) struct WebhookResults {
results: Vec<WebhookWithMetadataRedactedAuthorization>,
}
/// List webhooks
///
/// Get the list of all registered webhooks.
#[utoipa::path(
get,
path = "",
@@ -299,9 +296,6 @@ fn check_changed(uuid: Uuid, webhook: &Webhook) -> Result<(), WebhooksError> {
Ok(())
}
/// Get a webhook
///
/// Get a single webhook by its UUID.
#[utoipa::path(
get,
path = "/{uuid}",
@@ -337,9 +331,6 @@ async fn get_webhook(
Ok(HttpResponse::Ok().json(webhook))
}
/// Create a webhook
///
/// Create a new webhook to receive task notifications.
#[utoipa::path(
post,
path = "",
@@ -398,9 +389,6 @@ async fn post_webhook(
Ok(HttpResponse::Created().json(response))
}
/// Update a webhook
///
/// Update an existing webhook's URL or headers.
#[utoipa::path(
patch,
path = "/{uuid}",
@@ -453,9 +441,6 @@ async fn patch_webhook(
Ok(HttpResponse::Ok().json(response))
}
/// Delete a webhook
///
/// Delete an existing webhook by its UUID.
#[utoipa::path(
delete,
path = "/{uuid}",

View File

@@ -11,9 +11,13 @@ use index_scheduler::{IndexScheduler, RoFeatures};
use itertools::Itertools;
use meilisearch_types::error::ResponseError;
use meilisearch_types::milli::order_by_map::OrderByMap;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
use meilisearch_types::milli::vector::Embedding;
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
use meilisearch_types::milli::{
self, DocumentId, FederatingResultsStep, OrderBy, SearchStep, TimeBudget,
DEFAULT_VALUES_PER_FACET,
};
use meilisearch_types::network::{Network, Remote};
use roaring::RoaringBitmap;
use tokio::task::JoinHandle;
@@ -35,6 +39,7 @@ use crate::error::MeilisearchHttpError;
use crate::routes::indexes::search::search_kind;
use crate::search::federated::types::{INDEX_UID, QUERIES_POSITION, WEIGHTED_RANKING_SCORE};
#[allow(clippy::too_many_arguments)]
pub async fn perform_federated_search(
index_scheduler: &IndexScheduler,
queries: Vec<SearchQueryWithIndex>,
@@ -43,6 +48,7 @@ pub async fn perform_federated_search(
is_proxy: bool,
request_uid: Uuid,
include_metadata: bool,
progress: &Progress,
) -> Result<FederatedSearchResult, ResponseError> {
if is_proxy {
features.check_network("Performing a remote federated search")?;
@@ -111,7 +117,7 @@ pub async fn perform_federated_search(
for (index_uid, queries) in partitioned_queries.local_queries_by_index {
// note: this is the only place we open `index_uid`
search_by_index.execute(index_uid, queries, &params)?;
search_by_index.execute(index_uid, queries, &params, progress)?;
}
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
@@ -126,6 +132,8 @@ pub async fn perform_federated_search(
facet_order,
} = search_by_index;
progress.update_progress(SearchStep::Federation);
progress.update_progress(FederatingResultsStep::WaitForRemoteResults);
let before_waiting_remote_results = std::time::Instant::now();
// 2.3. Wait for proxy search requests to complete
@@ -134,7 +142,7 @@ pub async fn perform_federated_search(
let after_waiting_remote_results = std::time::Instant::now();
// 3. merge hits and metadata across indexes and hosts
progress.update_progress(FederatingResultsStep::MergeResults);
// 3.1. Build metadata in the same order as the original queries
let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| {
// If a remote is present, set the local remote name
@@ -187,6 +195,7 @@ pub async fn perform_federated_search(
};
// 3.5. merge facets
progress.update_progress(FederatingResultsStep::MergeFacets);
let (facet_distribution, facet_stats, facets_by_index) =
facet_order.merge(federation.merge_facets, remote_results, facets);
@@ -831,6 +840,7 @@ impl SearchByIndex {
index_uid: String,
queries: Vec<QueryByIndex>,
params: &SearchByIndexParams<'_>,
progress: &Progress,
) -> Result<(), ResponseError> {
let first_query_index = queries.first().map(|query| query.query_index);
let index = match params.index_scheduler.index(&index_uid) {
@@ -957,6 +967,7 @@ impl SearchByIndex {
// clones of `TimeBudget` share the budget rather than restart it
time_budget.clone(),
params.features,
progress,
)?;
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
@@ -1044,7 +1055,7 @@ impl SearchByIndex {
hit_maker,
query_index,
}| {
let mut hit = hit_maker.make_hit(docid, &score)?;
let mut hit = hit_maker.make_hit(docid, &score, progress)?;
let weighted_score = ScoreDetails::global_score(score.iter()) * (*weight);
let mut _federation = serde_json::json!(

View File

@@ -17,11 +17,13 @@ use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use meilisearch_types::milli::index::{self, EmbeddingsWithMetadata, SearchParameters};
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder;
use meilisearch_types::milli::{
FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, TimeBudget,
FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, SearchStep,
TimeBudget,
};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
@@ -1024,11 +1026,12 @@ pub fn prepare_search<'t>(
search_kind: &SearchKind,
time_budget: TimeBudget,
features: RoFeatures,
progress: &'t Progress,
) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> {
if query.media.is_some() {
features.check_multimodal("passing `media` in a search query")?;
}
let mut search = index.search(rtxn);
let mut search = index.search(rtxn, progress);
search.time_budget(time_budget);
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
search.ranking_score_threshold(ranking_score_threshold.0);
@@ -1048,6 +1051,7 @@ pub fn prepare_search<'t>(
let vector = match query.vector.clone() {
Some(vector) => vector,
None => {
let _step = progress.update_progress_scoped(SearchStep::Embed);
let span = tracing::trace_span!(target: "search::vector", "embed_one");
let _entered = span.enter();
@@ -1173,6 +1177,7 @@ pub struct SearchParams {
pub fn perform_search(
params: SearchParams,
index: &Index,
progress: &Progress,
) -> Result<(SearchResult, TimeBudget), ResponseError> {
let SearchParams {
index_uid,
@@ -1191,8 +1196,15 @@ pub fn perform_search(
None => TimeBudget::default(),
};
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, &search_kind, time_budget.clone(), features)?;
let (search, is_finite_pagination, max_total_hits, offset) = prepare_search(
index,
&rtxn,
&query,
&search_kind,
time_budget.clone(),
features,
progress,
)?;
let (
milli::SearchResult {
@@ -1275,6 +1287,7 @@ pub fn perform_search(
format,
matching_words,
documents_ids.iter().copied().zip(document_scores.iter()),
progress,
)?;
let number_of_hits = min(candidates.len() as usize, max_total_hits);
@@ -1297,6 +1310,7 @@ pub fn perform_search(
let (facet_distribution, facet_stats) = facets
.map(move |facets| {
let _step = progress.update_progress_scoped(SearchStep::FacetDistribution);
compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search)
})
.transpose()?
@@ -1580,7 +1594,13 @@ impl<'a> HitMaker<'a> {
})
}
pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
pub fn make_hit(
&self,
id: u32,
score: &[ScoreDetails],
progress: &Progress,
) -> milli::Result<SearchHit> {
let _step = progress.update_progress_scoped(SearchStep::Format);
let (_, obkv) =
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
@@ -1669,6 +1689,7 @@ fn make_hits<'a>(
format: AttributesFormat,
matching_words: milli::MatchingWords,
documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
progress: &Progress,
) -> milli::Result<Vec<SearchHit>> {
let mut documents = Vec::new();
@@ -1686,7 +1707,7 @@ fn make_hits<'a>(
let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?;
for (id, score) in documents_ids_scores {
documents.push(hit_maker.make_hit(id, score)?);
documents.push(hit_maker.make_hit(id, score, progress)?);
}
Ok(documents)
}
@@ -1701,6 +1722,7 @@ pub fn perform_facet_search(
locales: Option<Vec<Language>>,
) -> Result<FacetSearchResult, ResponseError> {
let before_search = Instant::now();
let progress = Progress::default();
let rtxn = index.read_txn()?;
let time_budget = match index.search_cutoff(&rtxn)? {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
@@ -1729,8 +1751,15 @@ pub fn perform_facet_search(
.collect()
});
let (search, _, _, _) =
prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?;
let (search, _, _, _) = prepare_search(
index,
&rtxn,
&search_query,
&search_kind,
time_budget,
features,
&progress,
)?;
let mut facet_search = SearchForFacetValues::new(
facet_name,
search,
@@ -1754,6 +1783,7 @@ pub fn perform_facet_search(
})
}
#[allow(clippy::too_many_arguments)]
pub fn perform_similar(
index: &Index,
query: SimilarQuery,
@@ -1762,6 +1792,7 @@ pub fn perform_similar(
quantized: bool,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
progress: &Progress,
) -> Result<SimilarResult, ResponseError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
@@ -1802,6 +1833,7 @@ pub fn perform_similar(
embedder_name,
embedder,
quantized,
progress,
);
if let Some(ref filter) = query.filter {
@@ -1851,6 +1883,7 @@ pub fn perform_similar(
format,
Default::default(),
documents_ids.iter().copied().zip(document_scores.iter()),
progress,
)?;
let max_total_hits = index

View File

@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
"sync",
] }
arroy = "0.6.4-nested-rtxns"
hannoy = { version = "0.1.2-nested-rtxns", features = ["arroy"] }
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
rand = "0.8.5"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }

View File

@@ -28,6 +28,7 @@ use crate::heed_codec::facet::{
use crate::heed_codec::version::VersionCodec;
use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
use crate::order_by_map::OrderByMap;
use crate::progress::Progress;
use crate::prompt::PromptData;
use crate::proximity::ProximityPrecision;
use crate::update::new::StdResult;
@@ -1477,8 +1478,8 @@ impl Index {
FacetDistribution::new(rtxn, self)
}
pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>) -> Search<'a> {
Search::new(rtxn, self)
pub fn search<'a>(&'a self, rtxn: &'a RoTxn<'a>, progress: &'a Progress) -> Search<'a> {
Search::new(rtxn, self, progress)
}
/// Returns the index creation time.

View File

@@ -81,6 +81,7 @@ pub use self::index::Index;
pub use self::localized_attributes_rules::LocalizedAttributesRule;
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
pub use self::search::similar::Similar;
pub use self::search::steps::{FederatingResultsStep, SearchStep, TotalProcessingTimeStep};
pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,

View File

@@ -50,18 +50,58 @@ struct InnerProgress {
}
impl Progress {
pub fn update_progress<P: Step>(&self, sub_progress: P) {
/// Update the progress and return `true` if the step was started, `false` if it was already started.
pub fn update_progress<P: Step>(&self, sub_progress: P) -> bool {
let mut inner = self.steps.write().unwrap();
let InnerProgress { steps, durations } = &mut *inner;
let now = Instant::now();
let step_type = TypeId::of::<P>();
if let Some(idx) = steps.iter().position(|(id, _, _)| *id == step_type) {
if steps[idx].1.name() == sub_progress.name() {
// The step is already started, so we don't need to start it again.
return false;
}
push_steps_durations(steps, durations, now, idx);
steps.truncate(idx);
}
steps.push((step_type, Box::new(sub_progress), now));
true
}
/// End a step that has been started without having to start a new step.
fn end_progress_step<P: Step>(&self, sub_progress: P) {
let mut inner = self.steps.write().unwrap();
let InnerProgress { steps, durations } = &mut *inner;
let now = Instant::now();
let step_type = TypeId::of::<P>();
debug_assert!(
steps.iter().any(|(id, s, _)| *id == step_type && s.name() == sub_progress.name()),
"Step `{}` must have been started",
sub_progress.name()
);
if let Some(idx) = steps.iter().position(|(id, _, _)| *id == step_type) {
push_steps_durations(steps, durations, now, idx);
steps.truncate(idx);
}
}
/// Update the progress and return a scoped progress step that will end the progress step when dropped.
pub fn update_progress_scoped<P: Step + Copy>(&self, step: P) -> ScopedProgressStep<'_, P> {
let started = self.update_progress(step);
debug_assert!(
started,
"Step `{}` can't be scoped because it was already started",
step.name()
);
ScopedProgressStep { progress: self, step: started.then_some(step) }
}
// TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
@@ -95,7 +135,15 @@ impl Progress {
let now = Instant::now();
push_steps_durations(steps, &mut durations, now, 0);
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
let mut accumulated_durations = IndexMap::new();
for (name, duration) in durations.drain(..) {
accumulated_durations.entry(name).and_modify(|d| *d += duration).or_insert(duration);
}
accumulated_durations
.into_iter()
.map(|(name, duration)| (name, format!("{duration:.2?}")))
.collect()
}
// TODO: ideally we should expose the progress in a way that let arroy use it directly
@@ -343,3 +391,16 @@ impl<T: steppe::Step> Step for Compat<T> {
self.0.total().try_into().unwrap_or(u32::MAX)
}
}
pub struct ScopedProgressStep<'a, P: Step + Copy> {
progress: &'a Progress,
step: Option<P>,
}
impl<'a, P: Step + Copy> Drop for ScopedProgressStep<'a, P> {
fn drop(&mut self) {
if let Some(step) = self.step {
self.progress.end_progress_step(step);
}
}
}

View File

@@ -1165,7 +1165,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
// this filter is copy pasted from #2380 with the exact same espace sequence
search.filter(Filter::from_str("monitor_diagonal = '27\" to 30\\''").unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
@@ -1225,7 +1225,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.filter(Filter::from_str("_geoRadius(45.4777599, 9.1967508, 0)").unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@@ -6,6 +6,7 @@ use roaring::RoaringBitmap;
use crate::score_details::{ScoreDetails, ScoreValue, ScoringStrategy};
use crate::search::new::{distinct_fid, distinct_single_docid};
use crate::search::steps::SearchStep;
use crate::search::SemanticSearch;
use crate::vector::{Embedding, SearchQuery};
use crate::{Index, MatchingWords, Result, Search, SearchResult};
@@ -221,6 +222,7 @@ impl Search<'_> {
time_budget: self.time_budget.clone(),
ranking_score_threshold: self.ranking_score_threshold,
locales: self.locales.clone(),
progress: self.progress,
};
let semantic = search.semantic.take();
@@ -241,6 +243,7 @@ impl Search<'_> {
Some(vector_query) => vector_query,
None => {
// attempt to embed the vector
self.progress.update_progress(SearchStep::Embed);
let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
let _entered = span.enter();

View File

@@ -12,6 +12,7 @@ use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats};
use crate::documents::GeoSortParameter;
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::index::MatchingStrategy;
use crate::progress::Progress;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::{Embedder, Embedding};
use crate::{
@@ -29,6 +30,7 @@ mod fst_utils;
pub mod hybrid;
pub mod new;
pub mod similar;
pub mod steps;
#[derive(Debug, Clone)]
pub struct SemanticSearch {
@@ -61,10 +63,11 @@ pub struct Search<'a> {
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
locales: Option<Vec<Language>>,
progress: &'a Progress,
}
impl<'a> Search<'a> {
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Search<'a> {
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index, progress: &'a Progress) -> Search<'a> {
Search {
query: None,
filter: None,
@@ -86,6 +89,7 @@ impl<'a> Search<'a> {
locales: None,
time_budget: TimeBudget::max(),
ranking_score_threshold: None,
progress,
}
}
@@ -198,7 +202,7 @@ impl<'a> Search<'a> {
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
if has_vector_search {
let ctx = SearchContext::new(self.index, self.rtxn)?;
filtered_universe(ctx.index, ctx.txn, &self.filter)
filtered_universe(ctx.index, ctx.txn, &self.filter, self.progress)
} else {
Ok(self.execute()?.candidates)
}
@@ -239,7 +243,7 @@ impl<'a> Search<'a> {
}
}
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter, self.progress)?;
let mut query_vector = None;
let PartialSearchResult {
located_query_terms,
@@ -276,6 +280,7 @@ impl<'a> Search<'a> {
*quantized,
self.time_budget.clone(),
self.ranking_score_threshold,
self.progress,
)?
}
_ => execute_search(
@@ -297,6 +302,7 @@ impl<'a> Search<'a> {
self.time_budget.clone(),
self.ranking_score_threshold,
self.locales.as_ref(),
self.progress,
)?,
};
@@ -347,6 +353,7 @@ impl fmt::Debug for Search<'_> {
time_budget,
ranking_score_threshold,
locales,
progress: _,
} = self;
f.debug_struct("Search")
.field("query", query)

View File

@@ -3,10 +3,12 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
use super::SearchContext;
use crate::progress::Progress;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::{
apply_distinct_rule, distinct_fid, distinct_single_docid, DistinctOutput,
};
use crate::search::steps::ComputingBucketSortStep;
use crate::{Result, TimeBudget};
pub struct BucketSortOutput {
@@ -34,6 +36,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ranking_score_threshold: Option<f64>,
exhaustive_number_hits: bool,
max_total_hits: Option<usize>,
progress: &Progress,
) -> Result<BucketSortOutput> {
logger.initial_query(query);
logger.ranking_rules(&ranking_rules);
@@ -97,7 +100,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
ranking_rules[0].start_iteration(ctx, logger, universe, query, &time_budget)?;
ranking_rules[0].start_iteration(ctx, logger, universe, query, &time_budget, progress)?;
let mut ranking_rule_scores: Vec<ScoreDetails> = vec![];
@@ -157,6 +160,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
distinct_fid,
&ranking_rule_scores,
$candidates,
progress,
)?;
};
}
@@ -185,6 +189,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx,
logger,
&ranking_rule_universes[cur_ranking_rule_index],
progress,
)? {
std::task::Poll::Ready(bucket) => bucket,
std::task::Poll::Pending => {
@@ -231,6 +236,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger,
&ranking_rule_universes[cur_ranking_rule_index],
&time_budget,
progress,
)?
else {
back!();
@@ -289,6 +295,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
&next_bucket.candidates,
&next_bucket.query,
&time_budget,
progress,
)?;
}
@@ -323,9 +330,11 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
distinct_fid: Option<u16>,
ranking_rule_scores: &[ScoreDetails],
candidates: RoaringBitmap,
progress: &Progress,
) -> Result<()> {
// First apply the distinct rule on the candidates, reducing the universes if necessary
let candidates = if let Some(distinct_fid) = distinct_fid {
progress.update_progress(ComputingBucketSortStep::Distinct);
let DistinctOutput { remaining, excluded } =
apply_distinct_rule(ctx, distinct_fid, &candidates)?;
for universe in ranking_rule_universes.iter_mut() {
@@ -336,6 +345,8 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
} else {
candidates.clone()
};
progress.update_progress(ComputingBucketSortStep::MergeCandidates);
*all_candidates |= &candidates;
// if the candidates are empty, there is nothing to do;

View File

@@ -3,9 +3,12 @@ use roaring::{MultiOps, RoaringBitmap};
use super::query_graph::QueryGraph;
use super::ranking_rules::{RankingRule, RankingRuleOutput};
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::ExactTerm;
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::{ComputingBucketSortStep, RankingRuleStep};
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger, TimeBudget};
/// A ranking rule that produces 3 disjoint buckets:
@@ -24,8 +27,8 @@ impl ExactAttribute {
}
impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
fn id(&self) -> String {
"exact_attribute".to_owned()
fn id(&self) -> RankingRuleId {
RankingRuleId::Exactness
}
#[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
@@ -36,7 +39,10 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
universe: &roaring::RoaringBitmap,
query: &QueryGraph,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::StartIteration);
self.state = State::start_iteration(ctx, universe, query)?;
Ok(())
}
@@ -48,7 +54,10 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
_logger: &mut dyn SearchLogger<QueryGraph>,
universe: &roaring::RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::NextBucket);
let state = std::mem::take(&mut self.state);
let (state, output) = State::next(state, universe);
self.state = state;

View File

@@ -6,7 +6,10 @@ use rstar::RTree;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use crate::documents::geo_sort::{fill_cache, next_bucket};
use crate::documents::{GeoSortParameter, GeoSortStrategy};
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::{ComputingBucketSortStep, RankingRuleStep};
use crate::{GeoPoint, Result, SearchContext, SearchLogger, TimeBudget};
pub struct GeoSort<Q: RankingRuleQueryTrait> {
@@ -73,8 +76,8 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
}
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
fn id(&self) -> String {
"geo_sort".to_owned()
fn id(&self) -> RankingRuleId {
RankingRuleId::GeoSort
}
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
@@ -85,7 +88,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
universe: &RoaringBitmap,
query: &Q,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::StartIteration);
assert!(self.query.is_none());
self.query = Some(query.clone());
@@ -112,7 +118,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
_logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Q>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::NextBucket);
let query = self.query.as_ref().unwrap().clone();
next_bucket(

View File

@@ -50,51 +50,54 @@ use super::ranking_rule_graph::{
};
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::progress::Progress;
use crate::score_details::Rank;
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::ranking_rule_graph::PathVisitor;
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::{ComputingBucketSortStep, RankingRuleStep};
use crate::{Result, TermsMatchingStrategy, TimeBudget};
pub type Words = GraphBasedRankingRule<WordsGraph>;
impl GraphBasedRankingRule<WordsGraph> {
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
Self::new_with_id("words".to_owned(), Some(terms_matching_strategy))
Self::new_with_id(RankingRuleId::Words, Some(terms_matching_strategy))
}
}
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
impl GraphBasedRankingRule<ProximityGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("proximity".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::Proximity, terms_matching_strategy)
}
}
pub type Fid = GraphBasedRankingRule<FidGraph>;
impl GraphBasedRankingRule<FidGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("fid".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::AttributePosition, terms_matching_strategy)
}
}
pub type Position = GraphBasedRankingRule<PositionGraph>;
impl GraphBasedRankingRule<PositionGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("position".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::WordPosition, terms_matching_strategy)
}
}
pub type Typo = GraphBasedRankingRule<TypoGraph>;
impl GraphBasedRankingRule<TypoGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
Self::new_with_id("typo".to_owned(), terms_matching_strategy)
Self::new_with_id(RankingRuleId::Typo, terms_matching_strategy)
}
}
pub type Exactness = GraphBasedRankingRule<ExactnessGraph>;
impl GraphBasedRankingRule<ExactnessGraph> {
pub fn new() -> Self {
Self::new_with_id("exactness".to_owned(), None)
Self::new_with_id(RankingRuleId::Exactness, None)
}
}
/// A generic graph-based ranking rule
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
id: String,
id: RankingRuleId,
terms_matching_strategy: Option<TermsMatchingStrategy>,
// When the ranking rule is not iterating over its buckets,
// its state is `None`.
@@ -102,7 +105,10 @@ pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
}
impl<G: RankingRuleGraphTrait> GraphBasedRankingRule<G> {
/// Creates the ranking rule with the given identifier
pub fn new_with_id(id: String, terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
pub fn new_with_id(
id: RankingRuleId,
terms_matching_strategy: Option<TermsMatchingStrategy>,
) -> Self {
Self { id, terms_matching_strategy, state: None }
}
}
@@ -124,7 +130,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
}
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
fn id(&self) -> String {
fn id(&self) -> RankingRuleId {
self.id.clone()
}
@@ -136,7 +142,10 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
_universe: &RoaringBitmap,
query_graph: &QueryGraph,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::StartIteration);
// the `next_max_cost` is the successor integer to the maximum cost of the paths in the graph.
//
// When there is a matching strategy, it also factors the additional costs of:
@@ -219,7 +228,10 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::NextBucket);
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
// should never happen
let mut state = self.state.take().unwrap();

View File

@@ -14,7 +14,7 @@ use crate::search::new::ranking_rule_graph::{
ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph,
WordsCondition, WordsGraph,
};
use crate::search::new::ranking_rules::BoxRankingRule;
use crate::search::new::ranking_rules::{BoxRankingRule, RankingRuleId};
use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
use crate::Result;
@@ -45,13 +45,26 @@ enum Location {
Other,
}
impl From<RankingRuleId> for Location {
fn from(ranking_rule_id: RankingRuleId) -> Self {
match ranking_rule_id {
RankingRuleId::Words => Self::Words,
RankingRuleId::Typo => Self::Typo,
RankingRuleId::Proximity => Self::Proximity,
RankingRuleId::AttributePosition => Self::Fid,
RankingRuleId::WordPosition => Self::Position,
_ => Self::Other,
}
}
}
#[derive(Default)]
pub struct VisualSearchLogger {
initial_query: Option<QueryGraph>,
initial_query_time: Option<Instant>,
query_for_universe: Option<QueryGraph>,
initial_universe: Option<RoaringBitmap>,
ranking_rules_ids: Option<Vec<String>>,
ranking_rules_ids: Option<Vec<RankingRuleId>>,
events: Vec<SearchEvents>,
location: Vec<Location>,
}
@@ -84,14 +97,7 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
ranking_rule_idx,
universe_len: universe.len(),
});
self.location.push(match ranking_rule.id().as_str() {
"words" => Location::Words,
"typo" => Location::Typo,
"proximity" => Location::Proximity,
"fid" => Location::Fid,
"position" => Location::Position,
_ => Location::Other,
});
self.location.push(ranking_rule.id().into());
}
fn next_bucket_ranking_rule(

View File

@@ -498,12 +498,14 @@ mod tests {
use super::*;
use crate::index::tests::TempIndex;
use crate::progress::Progress;
use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
impl<'a> MatcherBuilder<'a> {
fn new_test(rtxn: &'a heed::RoTxn<'a>, index: &'a TempIndex, query: &str) -> Self {
let progress = Progress::default();
let mut ctx = SearchContext::new(index, rtxn).unwrap();
let universe = filtered_universe(ctx.index, ctx.txn, &None).unwrap();
let universe = filtered_universe(ctx.index, ctx.txn, &None, &progress).unwrap();
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
&mut ctx,
Some(query),
@@ -523,6 +525,7 @@ mod tests {
TimeBudget::max(),
None,
None,
&progress,
)
.unwrap();

View File

@@ -56,8 +56,10 @@ use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::documents::GeoSortParameter;
use crate::index::PrefixSearch;
use crate::localized_attributes_rules::LocalizedFieldIds;
use crate::progress::Progress;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::search::steps::SearchStep;
use crate::vector::Embedder;
use crate::{
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
@@ -294,7 +296,9 @@ fn resolve_universe(
query_graph: &QueryGraph,
matching_strategy: TermsMatchingStrategy,
logger: &mut dyn SearchLogger<QueryGraph>,
progress: &Progress,
) -> Result<RoaringBitmap> {
let _step = progress.update_progress_scoped(SearchStep::ResolveUniverse);
resolve_maximally_reduced_query_graph(
ctx,
initial_universe,
@@ -632,8 +636,10 @@ pub fn filtered_universe(
index: &Index,
txn: &RoTxn<'_>,
filters: &Option<Filter<'_>>,
progress: &Progress,
) -> Result<RoaringBitmap> {
Ok(if let Some(filters) = filters {
let _step = progress.update_progress_scoped(SearchStep::Filter);
filters.evaluate(txn, index)?
} else {
index.documents_ids(txn)?
@@ -658,6 +664,7 @@ pub fn execute_vector_search(
quantized: bool,
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
progress: &Progress,
) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?;
@@ -678,6 +685,7 @@ pub fn execute_vector_search(
let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> =
&mut placeholder_search_logger;
let _step = progress.update_progress_scoped(SearchStep::SemanticSearch);
let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort(
ctx,
ranking_rules,
@@ -692,6 +700,7 @@ pub fn execute_vector_search(
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
progress,
)?;
Ok(PartialSearchResult {
@@ -725,12 +734,14 @@ pub fn execute_search(
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
locales: Option<&Vec<Language>>,
progress: &Progress,
) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?;
let mut used_negative_operator = false;
let mut located_query_terms = None;
let query_terms = if let Some(query) = query {
let _step = progress.update_progress_scoped(SearchStep::Tokenize);
let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
let entered = span.enter();
@@ -834,9 +845,16 @@ pub fn execute_search(
terms_matching_strategy,
)?;
universe &=
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
universe &= resolve_universe(
ctx,
&universe,
&graph,
terms_matching_strategy,
query_graph_logger,
progress,
)?;
let _step = progress.update_progress_scoped(SearchStep::KeywordSearch);
bucket_sort(
ctx,
ranking_rules,
@@ -851,10 +869,12 @@ pub fn execute_search(
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
progress,
)?
} else {
let ranking_rules =
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_param)?;
let _step = progress.update_progress_scoped(SearchStep::PlaceholderSearch);
bucket_sort(
ctx,
ranking_rules,
@@ -869,6 +889,7 @@ pub fn execute_search(
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
progress,
)?
};

View File

@@ -4,7 +4,9 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{QueryGraph, SearchContext};
use crate::progress::Progress;
use crate::score_details::ScoreDetails;
use crate::search::steps::ComputingBucketSortStep;
use crate::{Result, TimeBudget};
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
@@ -24,7 +26,7 @@ pub type BoxRankingRule<'ctx, Query> = Box<dyn RankingRule<'ctx, Query> + 'ctx>;
/// (i.e. the read transaction and the cache) and over `Query`, which
/// can be either [`PlaceholderQuery`] or [`QueryGraph`].
pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
fn id(&self) -> String;
fn id(&self) -> RankingRuleId;
/// Prepare the ranking rule such that it can start iterating over its
/// buckets using [`next_bucket`](RankingRule::next_bucket).
@@ -39,6 +41,7 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
universe: &RoaringBitmap,
query: &Query,
time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()>;
/// Return the next bucket of this ranking rule.
@@ -56,6 +59,7 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
logger: &mut dyn SearchLogger<Query>,
universe: &RoaringBitmap,
time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Query>>>;
/// Return the next bucket of this ranking rule, if doing so can be done without blocking
@@ -69,6 +73,7 @@ pub trait RankingRule<'ctx, Query: RankingRuleQueryTrait> {
_ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<Query>,
_universe: &RoaringBitmap,
_progress: &Progress,
) -> Result<Poll<RankingRuleOutput<Query>>> {
Ok(Poll::Pending)
}
@@ -93,3 +98,54 @@ pub struct RankingRuleOutput<Q> {
/// The score for the candidates of the current bucket
pub score: ScoreDetails,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RankingRuleId {
Words,
Typo,
Proximity,
AttributePosition,
WordPosition,
Exactness,
Sort,
GeoSort,
VectorSort,
Asc(String),
Desc(String),
}
impl std::fmt::Display for RankingRuleId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RankingRuleId::Words => write!(f, "words"),
RankingRuleId::Typo => write!(f, "typo"),
RankingRuleId::Proximity => write!(f, "proximity"),
RankingRuleId::AttributePosition => write!(f, "attribute_position"),
RankingRuleId::WordPosition => write!(f, "word_position"),
RankingRuleId::Exactness => write!(f, "exactness"),
RankingRuleId::Sort => write!(f, "sort"),
RankingRuleId::GeoSort => write!(f, "geo_sort"),
RankingRuleId::VectorSort => write!(f, "vector_sort"),
RankingRuleId::Asc(field_name) => write!(f, "asc:{}", field_name),
RankingRuleId::Desc(field_name) => write!(f, "desc:{}", field_name),
}
}
}
impl From<RankingRuleId> for ComputingBucketSortStep {
fn from(ranking_rule_id: RankingRuleId) -> Self {
match ranking_rule_id {
RankingRuleId::Words => Self::Words,
RankingRuleId::Typo => Self::Typo,
RankingRuleId::Proximity => Self::Proximity,
RankingRuleId::AttributePosition => Self::AttributePosition,
RankingRuleId::WordPosition => Self::WordPosition,
RankingRuleId::Exactness => Self::Exactness,
RankingRuleId::Sort => Self::Sort,
RankingRuleId::GeoSort => Self::GeoSort,
RankingRuleId::VectorSort => Self::VectorSort,
RankingRuleId::Asc(_) => Self::Asc,
RankingRuleId::Desc(_) => Self::Desc,
}
}
}

View File

@@ -5,8 +5,11 @@ use super::logger::SearchLogger;
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::{ComputingBucketSortStep, RankingRuleStep};
use crate::{FieldId, Index, Result, TimeBudget};
pub trait RankingRuleOutputIter<'ctx, Query> {
@@ -84,9 +87,13 @@ impl<'ctx, Query> Sort<'ctx, Query> {
}
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> {
fn id(&self) -> String {
fn id(&self) -> RankingRuleId {
let Self { field_name, is_ascending, .. } = self;
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
if *is_ascending {
RankingRuleId::Asc(field_name.clone())
} else {
RankingRuleId::Desc(field_name.clone())
}
}
#[tracing::instrument(level = "trace", skip_all, target = "search::sort")]
@@ -97,7 +104,10 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
parent_candidates: &RoaringBitmap,
parent_query: &Query,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::StartIteration);
let iter: RankingRuleOutputIterWrapper<'ctx, Query> = match self.field_id {
Some(field_id) => {
let number_db = ctx
@@ -196,7 +206,10 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
_logger: &mut dyn SearchLogger<Query>,
universe: &RoaringBitmap,
_time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Query>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::NextBucket);
let iter = self.iter.as_mut().unwrap();
if let Some(mut bucket) = iter.next_bucket()? {
bucket.candidates &= universe;

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{db_snap, Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -119,7 +119,7 @@ fn test_attribute_fid_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -147,7 +147,7 @@ fn test_attribute_fid_ngrams() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{db_snap, Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -134,7 +134,7 @@ fn test_attribute_position_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -150,7 +150,7 @@ fn test_attribute_position_repeated() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("a a a a a");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -167,7 +167,7 @@ fn test_attribute_position_different_fields() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -184,7 +184,7 @@ fn test_attribute_position_ngrams() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -14,7 +14,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::update::Setting;
use crate::vector::settings::EmbeddingSettings;
use crate::vector::{Embedder, EmbedderOptions};
use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget};
use crate::{Criterion, Filter, FilterableAttributesRule, TimeBudget};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -61,7 +61,7 @@ fn basic_degraded_search() {
let index = create_index();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("hello puppy kefir");
search.limit(3);
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
@@ -75,7 +75,7 @@ fn degraded_search_cannot_skip_filter() {
let index = create_index();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("hello puppy kefir");
search.limit(100);
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
@@ -96,7 +96,7 @@ fn degraded_search_and_score_details() {
let index = create_index();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("hello puppy kefir");
search.limit(4);
search.scoring_strategy(ScoringStrategy::Detailed);
@@ -560,7 +560,7 @@ fn degraded_search_and_score_details_vector() {
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
let embedder = Arc::new(
Embedder::new(

View File

@@ -20,7 +20,7 @@ use maplit::hashset;
use super::collect_field_values;
use crate::index::tests::TempIndex;
use crate::{
AscDesc, Criterion, FilterableAttributesRule, Index, Member, Search, SearchResult,
AscDesc, Criterion, FilterableAttributesRule, Index, Member, SearchResult,
TermsMatchingStrategy,
};
@@ -246,7 +246,7 @@ fn test_distinct_placeholder_no_ranking_rules() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.distinct(S("letter"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
@@ -275,7 +275,7 @@ fn test_distinct_at_search_placeholder_no_ranking_rules() {
let txn = index.read_txn().unwrap();
let s = Search::new(&txn, &index);
let s = index.search(&txn);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
@@ -308,7 +308,7 @@ fn test_distinct_placeholder_sort() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -348,7 +348,7 @@ fn test_distinct_placeholder_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -388,7 +388,7 @@ fn test_distinct_placeholder_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.sort_criteria(vec![
AscDesc::Desc(Member::Field(S("letter"))),
AscDesc::Desc(Member::Field(S("rank1"))),
@@ -443,7 +443,7 @@ fn test_distinct_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
@@ -496,7 +496,7 @@ fn test_distinct_sort_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
@@ -569,7 +569,7 @@ fn test_distinct_all_candidates() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
s.exhaustive_number_hits(true);
@@ -592,7 +592,7 @@ fn test_distinct_typo() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);

View File

@@ -21,7 +21,7 @@ This module tests the following properties about the exactness ranking rule:
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index_simple_ordered() -> TempIndex {
let index = TempIndex::new();
@@ -471,7 +471,7 @@ fn test_exactness_simple_ordered() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -503,7 +503,7 @@ fn test_exactness_simple_reversed() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -526,7 +526,7 @@ fn test_exactness_simple_reversed() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -556,7 +556,7 @@ fn test_exactness_simple_random() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -585,7 +585,7 @@ fn test_exactness_attribute_starts_with_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("this balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -611,7 +611,7 @@ fn test_exactness_attribute_starts_with_phrase() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("\"overlooking the sea\" is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -631,7 +631,7 @@ fn test_exactness_attribute_starts_with_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("overlooking the sea is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -660,7 +660,7 @@ fn test_exactness_all_candidates_with_typo() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("overlocking the sea is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -696,7 +696,7 @@ fn test_exactness_after_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -744,7 +744,7 @@ fn test_words_after_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -792,7 +792,7 @@ fn test_proximity_after_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -829,7 +829,7 @@ fn test_proximity_after_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -862,7 +862,7 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("quick brown fox extra");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -897,7 +897,7 @@ fn test_typo_followed_by_exactness() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("extraordinarily quick brown fox");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -82,7 +82,7 @@ fn test_geo_sort() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
@@ -118,7 +118,7 @@ fn test_geo_sort_with_following_ranking_rules() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![
AscDesc::Asc(Member::Geo([0., 0.])),
@@ -159,7 +159,7 @@ fn test_geo_sort_reached_max_bucket_size() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.geo_max_bucket_size(2);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![
@@ -219,7 +219,7 @@ fn test_geo_sort_around_the_edge_of_the_flat_earth() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
// --- asc
@@ -295,7 +295,7 @@ fn geo_sort_mixed_with_words() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
@@ -406,7 +406,7 @@ fn geo_sort_without_any_geo_faceted_documents() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
let mut s = index.search(&rtxn);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{Search, SearchResult};
use crate::SearchResult;
#[test]
fn test_kanji_language_detection() {
@@ -14,7 +14,7 @@ fn test_kanji_language_detection() {
.unwrap();
let txn = index.write_txn().unwrap();
let mut search = Search::new(&txn, &index);
let mut search = index.search(&txn);
search.query("東京");
let SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@@ -19,7 +19,7 @@ This module tests the following properties:
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -78,7 +78,7 @@ fn test_2gram_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("sun flower");
@@ -109,7 +109,7 @@ fn test_3gram_simple() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flower s are");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -129,7 +129,7 @@ fn test_2gram_typo() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flawer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -159,7 +159,7 @@ fn test_no_disable_ngrams() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -185,7 +185,7 @@ fn test_2gram_prefix() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flow");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -214,7 +214,7 @@ fn test_3gram_prefix() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("su nf l");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -237,7 +237,7 @@ fn test_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunflower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -266,7 +266,7 @@ fn test_disable_split_words() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunflower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -286,7 +286,7 @@ fn test_2gram_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf lower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -310,7 +310,7 @@ fn test_3gram_no_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf lo wer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -333,7 +333,7 @@ fn test_3gram_no_typos() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf la wer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -352,7 +352,7 @@ fn test_no_ngram_phrases() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("\"sun\" flower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -366,7 +366,7 @@ fn test_no_ngram_phrases() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("\"sun\" \"flower\"");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -385,7 +385,7 @@ fn test_short_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("xyz");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -412,7 +412,7 @@ fn test_split_words_never_disabled() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the sunflower is tall");
let SearchResult { documents_ids, .. } = s.execute().unwrap();

View File

@@ -18,7 +18,7 @@ use std::collections::BTreeMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_simple_index() -> TempIndex {
let index = TempIndex::new();
@@ -268,7 +268,7 @@ fn test_proximity_simple() {
let index = create_simple_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
@@ -295,7 +295,7 @@ fn test_proximity_split_word() {
let index = create_edge_cases_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("sunflower wilting");
@@ -315,7 +315,7 @@ fn test_proximity_split_word() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("\"sun flower\" wilting");
@@ -342,7 +342,7 @@ fn test_proximity_split_word() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("xyz wilting");
@@ -365,7 +365,7 @@ fn test_proximity_prefix_db() {
let index = create_edge_cases_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best s");
@@ -390,7 +390,7 @@ fn test_proximity_prefix_db() {
"###);
// Difference when using the `su` prefix, which is not in the prefix DB
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best su");
@@ -417,7 +417,7 @@ fn test_proximity_prefix_db() {
// **proximity** prefix DB. In that case, its sprximity score will always be
// the maximum. This happens for prefixes that are larger than 2 bytes.
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best win");
@@ -441,7 +441,7 @@ fn test_proximity_prefix_db() {
// Now using `wint`, which is not in the prefix DB:
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wint");
@@ -465,7 +465,7 @@ fn test_proximity_prefix_db() {
// and using `wi` which is in the prefix DB and proximity prefix DB
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wi");

View File

@@ -8,7 +8,7 @@ implemented.
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -57,7 +57,7 @@ fn test_trap_basic() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("summer holiday");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -17,9 +17,7 @@ use meili_snap::insta;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{
score_details, AscDesc, Criterion, Member, Search, SearchResult, TermsMatchingStrategy,
};
use crate::{score_details, AscDesc, Criterion, Member, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -184,7 +182,7 @@ fn test_sort() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
@@ -219,7 +217,7 @@ fn test_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank")))]);
@@ -254,7 +252,7 @@ fn test_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Field(S("vague")))]);
@@ -289,7 +287,7 @@ fn test_sort() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("vague")))]);
@@ -338,7 +336,7 @@ fn test_redacted() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![

View File

@@ -13,7 +13,7 @@ use std::collections::BTreeSet;
use std::iter::FromIterator;
use crate::index::tests::TempIndex;
use crate::{Search, SearchResult, TermsMatchingStrategy};
use crate::{SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -79,7 +79,7 @@ fn test_ignore_stop_words() {
let txn = index.read_txn().unwrap();
// `the` is treated as a prefix here, so it's not ignored
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("xyz to the");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -132,7 +132,7 @@ fn test_ignore_stop_words() {
"###);
// `xyz` is treated as a prefix here, so it's not ignored
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("to the xyz");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -185,7 +185,7 @@ fn test_ignore_stop_words() {
"###);
// `xyz` is not treated as a prefix anymore because of the trailing space, so it's ignored
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("to the xyz ");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -237,7 +237,7 @@ fn test_ignore_stop_words() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("to the dragon xyz");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -296,7 +296,7 @@ fn test_stop_words_in_phrase() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"how to train your dragon\"");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -389,7 +389,7 @@ fn test_stop_words_in_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("how \"to\" train \"the");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -441,7 +441,7 @@ fn test_stop_words_in_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("how \"to\" train \"The dragon");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -449,7 +449,7 @@ fn test_stop_words_in_phrase() {
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 6, 5]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"to\"");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -22,7 +22,7 @@ use std::collections::BTreeMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -157,7 +157,7 @@ fn test_no_typo() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -182,7 +182,7 @@ fn test_default_typo() {
insta::assert_debug_snapshot!(tt, @"9");
// 0 typo
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -202,7 +202,7 @@ fn test_default_typo() {
"###);
// 1 typo on one word, replaced letter
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -216,7 +216,7 @@ fn test_default_typo() {
"###);
// 1 typo on one word, missing letter, extra letter
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -235,7 +235,7 @@ fn test_phrase_no_typo_allowed() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the \"quick brewn\" fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -265,7 +265,7 @@ fn test_typo_exact_word() {
insta::assert_debug_snapshot!(tt, @"9");
// don't match quivk
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -279,7 +279,7 @@ fn test_typo_exact_word() {
"###);
// Don't match quick
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brown fox jumps over the lazy dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -287,7 +287,7 @@ fn test_typo_exact_word() {
insta::assert_snapshot!(format!("{document_scores:?}"), @"[]");
// words not in exact_words (quicest, jummps) have normal typo handling
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
@@ -301,7 +301,7 @@ fn test_typo_exact_word() {
"###);
// exact words do not disable prefix (sunflowering OK, but no sunflowar)
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
@@ -340,7 +340,7 @@ fn test_typo_exact_attribute() {
insta::assert_debug_snapshot!(tt, @"9");
// Exact match returns both exact attributes and tolerant ones.
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
@@ -365,7 +365,7 @@ fn test_typo_exact_attribute() {
"###);
// 1 typo only returns the tolerant attribute
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quidk brown fox jumps over the lazy dog");
@@ -386,7 +386,7 @@ fn test_typo_exact_attribute() {
"###);
// combine with exact words
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quivk brown fox jumps over the lazy dog");
@@ -414,7 +414,7 @@ fn test_typo_exact_attribute() {
"###);
// No result in tolerant attribute
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quicest brownest fox jummps over the laziest dog");
@@ -428,7 +428,7 @@ fn test_ngram_typos() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the extra lagant fox skyrocketed over the languorous dog");
@@ -442,7 +442,7 @@ fn test_ngram_typos() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the ex tra lagant fox skyrocketed over the languorous dog");
@@ -463,7 +463,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
@@ -499,7 +499,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
})
.unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lazy dog");
@@ -517,7 +517,7 @@ fn test_typo_bucketing() {
let txn = index.read_txn().unwrap();
// First do the search with just the Words ranking rule
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
@@ -545,7 +545,7 @@ fn test_typo_bucketing() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sunflower");
@@ -564,7 +564,7 @@ fn test_typo_bucketing() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("network interconnection sun flower");
@@ -600,7 +600,7 @@ fn test_typo_synonyms() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the quick brown fox jumps over the lackadaisical dog");
@@ -616,7 +616,7 @@ fn test_typo_synonyms() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("the fast brownish fox jumps over the lackadaisical dog");

View File

@@ -17,7 +17,7 @@ because the typo ranking rule before it only used the derivation `beautiful`.
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -87,7 +87,7 @@ fn test_trap_basic_and_complex1() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("beautiful summer");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -110,7 +110,7 @@ fn test_trap_complex2() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("delicious sweet dessert");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -14,7 +14,7 @@ This module tests the following properties:
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{Criterion, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -131,7 +131,7 @@ fn test_words_tms_last_simple() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -166,7 +166,7 @@ fn test_words_tms_last_simple() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("extravagant the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -180,7 +180,7 @@ fn test_words_tms_last_phrase() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox\" jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -205,7 +205,7 @@ fn test_words_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox\" jumps over the \"lazy\" dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -227,7 +227,7 @@ fn test_words_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox jumps over the lazy dog\"");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -243,7 +243,7 @@ fn test_words_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("\"the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -270,7 +270,7 @@ fn test_words_proximity_tms_last_simple() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -305,7 +305,7 @@ fn test_words_proximity_tms_last_simple() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the brown quick fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -351,7 +351,7 @@ fn test_words_proximity_tms_last_phrase() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the \"quick brown\" fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -382,7 +382,7 @@ fn test_words_proximity_tms_last_phrase() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the \"quick brown\" \"fox jumps\" over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -421,7 +421,7 @@ fn test_words_tms_all() {
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
@@ -447,7 +447,7 @@ fn test_words_tms_all() {
]
"###);
let mut s = Search::new(&txn, &index);
let mut s = index.search(&txn);
s.query("extravagant");
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);

View File

@@ -6,7 +6,10 @@ use roaring::RoaringBitmap;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use super::VectorStoreStats;
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::search::new::ranking_rules::RankingRuleId;
use crate::search::steps::{ComputingBucketSortStep, RankingRuleStep};
use crate::vector::{DistributionShift, Embedder, VectorStore};
use crate::{DocumentId, Result, SearchContext, SearchLogger, TimeBudget};
@@ -94,8 +97,8 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
}
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
fn id(&self) -> String {
"vector_sort".to_owned()
fn id(&self) -> RankingRuleId {
RankingRuleId::VectorSort
}
#[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
@@ -106,7 +109,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
universe: &RoaringBitmap,
query: &Q,
time_budget: &TimeBudget,
progress: &Progress,
) -> Result<()> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::StartIteration);
assert!(self.query.is_none());
self.query = Some(query.clone());
@@ -123,7 +129,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
_logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
time_budget: &TimeBudget,
progress: &Progress,
) -> Result<Option<RankingRuleOutput<Q>>> {
progress.update_progress(ComputingBucketSortStep::from(self.id()));
let _step = progress.update_progress_scoped(RankingRuleStep::NextBucket);
let query = self.query.as_ref().unwrap().clone();
let vector_candidates = &self.vector_candidates & universe;
@@ -158,7 +167,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
}));
}
self.next_bucket(ctx, _logger, universe, time_budget)
self.next_bucket(ctx, _logger, universe, time_budget, progress)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
@@ -171,7 +180,9 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
_ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
progress: &Progress,
) -> Result<Poll<RankingRuleOutput<Q>>> {
let _step = progress.update_progress_scoped(RankingRuleStep::NonBlockingNextBucket);
let query = self.query.as_ref().unwrap().clone();
let vector_candidates = &self.vector_candidates & universe;

View File

@@ -2,6 +2,7 @@ use std::sync::Arc;
use roaring::RoaringBitmap;
use crate::progress::Progress;
use crate::score_details::{self, ScoreDetails};
use crate::vector::{Embedder, VectorStore};
use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};
@@ -18,6 +19,7 @@ pub struct Similar<'a> {
embedder: Arc<Embedder>,
ranking_score_threshold: Option<f64>,
quantized: bool,
progress: &'a Progress,
}
impl<'a> Similar<'a> {
@@ -31,6 +33,7 @@ impl<'a> Similar<'a> {
embedder_name: String,
embedder: Arc<Embedder>,
quantized: bool,
progress: &'a Progress,
) -> Self {
Self {
id,
@@ -43,6 +46,7 @@ impl<'a> Similar<'a> {
embedder,
ranking_score_threshold: None,
quantized,
progress,
}
}
@@ -57,7 +61,7 @@ impl<'a> Similar<'a> {
}
pub fn execute(&self) -> Result<SearchResult> {
let mut universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
let mut universe = filtered_universe(self.index, self.rtxn, &self.filter, self.progress)?;
// we never want to receive the docid
universe.remove(self.id);

View File

@@ -0,0 +1,59 @@
use crate::make_enum_progress;
make_enum_progress! {
pub enum SearchStep {
Tokenize,
Embed,
Filter,
ResolveUniverse,
KeywordSearch,
PlaceholderSearch,
SemanticSearch,
Format,
FacetDistribution,
Federation,
Personalization,
}
}
make_enum_progress! {
pub enum ComputingBucketSortStep {
MergeCandidates,
Distinct,
Words,
Typo,
Proximity,
AttributePosition,
WordPosition,
Exactness,
Sort,
GeoSort,
VectorSort,
Asc,
Desc,
}
}
make_enum_progress! {
pub enum RankingRuleStep {
StartIteration,
NextBucket,
NonBlockingNextBucket,
}
}
make_enum_progress! {
pub enum FederatingResultsStep {
WaitForRemoteResults,
MergeFacets,
MergeResults,
}
}
make_enum_progress! {
pub enum TotalProcessingTimeStep {
WaitForPermit,
Search,
Similar,
}
}

View File

@@ -26,6 +26,7 @@ pub(crate) struct TempIndex {
pub inner: Index,
pub indexer_config: IndexerConfig,
pub index_documents_config: IndexDocumentsConfig,
pub progress: Progress,
_tempdir: TempDir,
}
@@ -47,7 +48,9 @@ impl TempIndex {
let inner = Index::new(options, _tempdir.path(), true).unwrap();
let indexer_config = IndexerConfig::default();
let index_documents_config = IndexDocumentsConfig::default();
Self { inner, indexer_config, index_documents_config, _tempdir }
let progress = Progress::default();
Self { inner, indexer_config, index_documents_config, progress, _tempdir }
}
/// Creates a temporary index, with a default `4096 * 2000` size. This should be enough for
/// most tests.
@@ -210,6 +213,10 @@ impl TempIndex {
pub fn delete_document(&self, external_document_id: &str) {
self.delete_documents(vec![external_document_id.to_string()])
}
pub fn search<'a>(&'a self, rtxn: &'a heed::RoTxn<'a>) -> Search<'a> {
self.inner.search(rtxn, &self.progress)
}
}
#[test]
@@ -1095,7 +1102,7 @@ fn bug_3021_fourth() {
"###);
let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index);
let search = index.search(&rtxn);
let SearchResult {
matching_words: _,
candidates: _,

View File

@@ -1292,7 +1292,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
// testing the simple query search
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.query("document");
search.terms_matching_strategy(TermsMatchingStrategy::default());
// all documents should be returned
@@ -1333,7 +1333,7 @@ mod tests {
assert!(documents_ids.is_empty()); // nested is not searchable
// testing the filters
let mut search = crate::Search::new(&rtxn, &index);
let mut search = index.search(&rtxn);
search.filter(crate::Filter::from_str(r#"title = "The first document""#).unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![1]);
@@ -1358,6 +1358,7 @@ mod tests {
#[test]
fn index_documents_with_nested_primary_key() {
let index = TempIndex::new();
let progress = Progress::default();
index
.update_settings(|settings| {
@@ -1397,7 +1398,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
// testing the simple query search
let mut search = crate::Search::new(&rtxn, &index);
let mut search = crate::Search::new(&rtxn, &index, &progress);
search.query("document");
search.terms_matching_strategy(TermsMatchingStrategy::default());
// all documents should be returned
@@ -1453,6 +1454,7 @@ mod tests {
#[test]
fn test_facets_generation() {
let index = TempIndex::new();
let progress = Progress::default();
index
.add_documents(documents!([
@@ -1507,7 +1509,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
let mut search = crate::Search::new(&rtxn, &index);
let mut search = crate::Search::new(&rtxn, &index, &progress);
let filter = format!(r#""dog.race.bernese mountain" = {s}"#);
search.filter(crate::Filter::from_str(&filter).unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
@@ -1545,7 +1547,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
let mut search = crate::Search::new(&rtxn, &index, &progress);
search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
"dog.race.bernese mountain",
)))]);
@@ -3601,6 +3603,7 @@ mod tests {
#[test]
fn delete_words_exact_attributes() {
let index = TempIndex::new();
let progress = Progress::default();
index
.update_settings(|settings| {
@@ -3639,7 +3642,7 @@ mod tests {
let words = index.words_fst(&txn).unwrap().into_stream().into_strs().unwrap();
insta::assert_snapshot!(format!("{words:?}"), @r###"["hello"]"###);
let mut s = Search::new(&txn, &index);
let mut s = Search::new(&txn, &index, &progress);
s.query("hello");
let crate::SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");

View File

@@ -25,7 +25,8 @@ macro_rules! test_distinct {
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.query(search::TEST_QUERY);
search.limit($limit);
search.offset($offset);

View File

@@ -1,5 +1,7 @@
use either::{Either, Left, Right};
use milli::progress::Progress;
use milli::{Criterion, Filter, Search, SearchResult, TermsMatchingStrategy};
use Criterion::*;
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
@@ -15,7 +17,8 @@ macro_rules! test_filter {
let filter_conditions =
Filter::from_array::<Vec<Either<Vec<&str>, &str>>, _>($filter).unwrap().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());

View File

@@ -24,7 +24,8 @@ fn test_phrase_search_with_stop_words_given_criteria(criteria: &[Criterion]) {
// Phrase search containing stop words
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("\"the use of force\"");
search.limit(10);
search.terms_matching_strategy(TermsMatchingStrategy::All);

View File

@@ -27,7 +27,8 @@ macro_rules! test_criterion {
let index = search::setup_search_index_with_criteria(&criteria);
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.terms_matching_strategy($optional_word);
@@ -241,7 +242,8 @@ fn criteria_mixup() {
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.terms_matching_strategy(ALLOW_OPTIONAL_WORDS);
@@ -365,7 +367,8 @@ fn criteria_ascdesc() {
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.limit(ASC_DESC_CANDIDATES_THRESHOLD + 1);
let SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@@ -1,4 +1,5 @@
use big_s::S;
use milli::progress::Progress;
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
use milli::{AscDesc, Error, Member, Search, TermsMatchingStrategy, UserError};
@@ -11,7 +12,8 @@ fn sort_ranking_rule_missing() {
let index = search::setup_search_index_with_criteria(&criteria);
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
let progress = Progress::default();
let mut search = Search::new(&rtxn, &index, &progress);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());

View File

@@ -22,7 +22,8 @@ fn test_typo_tolerance_one_typo() {
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zeal");
search.limit(10);
@@ -31,7 +32,8 @@ fn test_typo_tolerance_one_typo() {
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zean");
search.limit(10);
@@ -49,7 +51,8 @@ fn test_typo_tolerance_one_typo() {
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
// typo is now supported for 4 letters words
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zean");
search.limit(10);
@@ -68,7 +71,8 @@ fn test_typo_tolerance_two_typo() {
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zealand");
search.limit(10);
@@ -77,7 +81,8 @@ fn test_typo_tolerance_two_typo() {
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zealemd");
search.limit(10);
@@ -95,7 +100,8 @@ fn test_typo_tolerance_two_typo() {
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
// typo is now supported for 4 letters words
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zealemd");
search.limit(10);
@@ -164,7 +170,8 @@ fn test_typo_disabled_on_word() {
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zealand");
search.limit(10);
@@ -184,7 +191,8 @@ fn test_typo_disabled_on_word() {
builder.set_exact_words(exact_words);
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("zealand");
search.limit(10);
@@ -203,7 +211,8 @@ fn test_disable_typo_on_attribute() {
{
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
// typo in `antebel(l)um`
search.query("antebelum");
search.limit(10);
@@ -222,7 +231,8 @@ fn test_disable_typo_on_attribute() {
builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect());
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
let mut search = Search::new(&txn, &index);
let progress = Progress::default();
let mut search = Search::new(&txn, &index, &progress);
search.query("antebelum");
search.limit(10);

View File

@@ -1,8 +0,0 @@
extends: ["spectral:oas"]
rules:
# Disable the security scopes warning
# Meilisearch uses Bearer authentication with API key permissions that work like scopes,
# but OpenAPI 3.0 doesn't support scopes with HTTP Bearer authentication (only OAuth2).
# The security annotations document required permissions but are not OAuth2 scopes.
oas3-operation-security-defined: off

View File

@@ -10,5 +10,3 @@ serde_json = "1.0"
clap = { version = "4.5.52", features = ["derive"] }
anyhow = "1.0.100"
utoipa = "5.4.0"
reqwest = { version = "0.12", features = ["blocking"] }
regex = "1.10"

View File

@@ -1,65 +1,21 @@
use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::path::PathBuf;
use anyhow::{Context, Result};
use anyhow::Result;
use clap::Parser;
use meilisearch::routes::MeilisearchApi;
use serde_json::{json, Value};
use utoipa::OpenApi;
/// HTTP methods supported in OpenAPI specifications.
const HTTP_METHODS: &[&str] = &["get", "post", "put", "patch", "delete"];
/// Type alias for the mapping from OpenAPI keys to their code samples.
type CodeSamplesMap = HashMap<String, Vec<CodeSample>>;
/// Type alias for the mapping from OpenAPI keys to sample IDs.
type KeyMapping = HashMap<String, String>;
/// Language used in the documentation repository (contains the key mapping)
const DOCS_LANG: &str = "cURL";
/// Mapping of repository URLs to language names.
/// The "cURL" entry is special: it contains the key mapping used to resolve sample IDs for all SDKs.
const CODE_SAMPLES: &[(&str, &str)] = &[
("https://raw.githubusercontent.com/meilisearch/documentation/refs/heads/main/.code-samples.meilisearch.yaml", "cURL"),
("https://raw.githubusercontent.com/meilisearch/meilisearch-dotnet/refs/heads/main/.code-samples.meilisearch.yaml", "C#"),
("https://raw.githubusercontent.com/meilisearch/meilisearch-dart/refs/heads/main/.code-samples.meilisearch.yaml", "Dart"),
("https://raw.githubusercontent.com/meilisearch/meilisearch-go/refs/heads/main/.code-samples.meilisearch.yaml", "Go"),
("https://raw.githubusercontent.com/meilisearch/meilisearch-java/refs/heads/main/.code-samples.meilisearch.yaml", "Java"),
("https://raw.githubusercontent.com/meilisearch/meilisearch-js/refs/heads/main/.code-samples.meilisearch.yaml", "JS"),
("https://raw.githubusercontent.com/meilisearch/meilisearch-php/refs/heads/main/.code-samples.meilisearch.yaml", "PHP"),
("https://raw.githubusercontent.com/meilisearch/meilisearch-python/refs/heads/main/.code-samples.meilisearch.yaml", "Python"),
("https://raw.githubusercontent.com/meilisearch/meilisearch-ruby/refs/heads/main/.code-samples.meilisearch.yaml", "Ruby"),
("https://raw.githubusercontent.com/meilisearch/meilisearch-rust/refs/heads/main/.code-samples.meilisearch.yaml", "Rust"),
("https://raw.githubusercontent.com/meilisearch/meilisearch-swift/refs/heads/main/.code-samples.meilisearch.yaml", "Swift"),
];
#[derive(Parser)]
#[command(name = "openapi-generator")]
#[command(about = "Generate OpenAPI specification for Meilisearch")]
struct Cli {
/// Output file path (default: meilisearch-openapi.json)
/// Output file path (default: meilisearch.json)
#[arg(short, long, value_name = "FILE")]
output: Option<PathBuf>,
/// Pretty print the JSON output
#[arg(short, long)]
pretty: bool,
/// Include Mintlify code samples from SDK repositories
#[arg(long)]
with_mintlify_code_samples: bool,
/// Debug mode: display the mapping table and code samples
#[arg(long)]
debug: bool,
/// Check that all routes have a summary (useful for CI)
#[arg(long)]
check_summaries: bool,
}
fn main() -> Result<()> {
@@ -68,31 +24,14 @@ fn main() -> Result<()> {
// Generate the OpenAPI specification
let openapi = MeilisearchApi::openapi();
// Convert to serde_json::Value for modification
let mut openapi_value: Value = serde_json::to_value(&openapi)?;
// Fetch and add code samples if enabled
if cli.with_mintlify_code_samples {
let code_samples = fetch_all_code_samples(cli.debug)?;
add_code_samples_to_openapi(&mut openapi_value, &code_samples, cli.debug)?;
}
// Clean up null descriptions in tags
clean_null_descriptions(&mut openapi_value);
// Check that all routes have summaries if requested
if cli.check_summaries {
check_all_routes_have_summaries(&openapi_value)?;
}
// Determine output path
let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch-openapi.json"));
let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch.json"));
// Serialize to JSON
let json = if cli.pretty {
serde_json::to_string_pretty(&openapi_value)?
serde_json::to_string_pretty(&openapi)?
} else {
serde_json::to_string(&openapi_value)?
serde_json::to_string(&openapi)?
};
// Write to file
@@ -102,651 +41,3 @@ fn main() -> Result<()> {
Ok(())
}
/// Code sample for a specific language.
#[derive(Debug, Clone, PartialEq, Eq)]
struct CodeSample {
lang: String,
source: String,
}
/// Fetches and parses code samples from all SDK repositories.
///
/// Returns a map from OpenAPI key (e.g., `"get_indexes"`) to a list of code samples
/// for different languages.
fn fetch_all_code_samples(debug: bool) -> Result<CodeSamplesMap> {
// First, fetch the documentation file to get the OpenAPI key -> code sample ID mapping
let (docs_url, _) = CODE_SAMPLES
.iter()
.find(|(_, lang)| *lang == DOCS_LANG)
.context("Documentation source not found in CODE_SAMPLES")?;
let docs_content = reqwest::blocking::get(*docs_url)
.context("Failed to fetch documentation code samples")?
.text()
.context("Failed to read documentation code samples response")?;
// Build mapping from OpenAPI key to code sample ID (only first match per key)
let openapi_key_to_sample_id = build_openapi_key_mapping(&docs_content);
// Build final result
let mut all_samples: CodeSamplesMap = HashMap::new();
// Loop through all CODE_SAMPLES files
for (url, lang) in CODE_SAMPLES {
// Fetch content (reuse docs_content for documentation)
let content: Cow<'_, str> = if *lang == DOCS_LANG {
Cow::Borrowed(&docs_content)
} else {
match reqwest::blocking::get(*url).and_then(|r| r.text()) {
Ok(text) => Cow::Owned(text),
Err(e) => {
eprintln!("Warning: Failed to fetch code samples for {}: {}", lang, e);
continue;
}
}
};
// Parse all code samples from this file
let sample_id_to_code = parse_code_samples_from_file(&content);
// Add to result using the mapping
for (openapi_key, sample_id) in &openapi_key_to_sample_id {
if let Some(source) = sample_id_to_code.get(sample_id) {
all_samples
.entry(openapi_key.clone())
.or_default()
.push(CodeSample { lang: lang.to_string(), source: source.clone() });
}
}
}
// Debug mode: display mapping table and code samples
if debug {
println!("\n=== OpenAPI Key to Sample ID Mapping ===\n");
let mut keys: Vec<_> = openapi_key_to_sample_id.keys().collect();
keys.sort();
for key in keys {
println!(" {} -> {}", key, openapi_key_to_sample_id[key]);
}
println!("\n=== Code Samples ===\n");
let mut sample_keys: Vec<_> = all_samples.keys().collect();
sample_keys.sort();
for key in sample_keys {
let samples = &all_samples[key];
let langs: Vec<_> = samples.iter().map(|s| s.lang.as_str()).collect();
println!(" {} -> {}", key, langs.join(", "));
}
println!();
}
Ok(all_samples)
}
/// Builds a mapping from OpenAPI key to code sample ID from the documentation file.
///
/// The OpenAPI key is found on a line starting with `# ` (hash + space), containing a single word
/// that starts with an HTTP method followed by an underscore (e.g., `# get_indexes`).
/// The code sample ID is the first word of the next line.
/// Only keeps the first code sample ID per OpenAPI key.
///
/// # Example
///
/// ```yaml
/// # get_indexes
/// get_indexes_1: |-
/// curl \
/// -X GET 'MEILISEARCH_URL/indexes'
/// get_indexes_2: |-
/// curl \
/// -X GET 'MEILISEARCH_URL/indexes?limit=5'
/// # post_indexes
/// create_indexes_1: |-
/// curl \
/// -X POST 'MEILISEARCH_URL/indexes'
/// ```
///
/// This produces: `{"get_indexes": "get_indexes_1", "post_indexes": "create_indexes_1"}`
fn build_openapi_key_mapping(content: &str) -> KeyMapping {
let mut mapping = KeyMapping::new();
let lines: Vec<_> = content.lines().collect();
for window in lines.windows(2) {
let [line, next_line] = window else { continue };
// Check if line starts with "# " and extract the word
let Some(word) = line.strip_prefix("# ").map(str::trim) else {
continue;
};
// Must be a single word (no spaces) starting with an HTTP method prefix
if word.contains(' ') || !is_http_method_prefixed(word) {
continue;
}
// Extract sample ID from next line (first word before `:`)
let sample_id = next_line.split(':').next().map(str::trim).filter(|s| !s.is_empty());
// Only insert if key doesn't exist (keeps first match)
if let (Entry::Vacant(entry), Some(id)) = (mapping.entry(word.to_string()), sample_id) {
entry.insert(id.to_string());
}
}
mapping
}
/// Checks if a word starts with an HTTP method followed by an underscore.
fn is_http_method_prefixed(word: &str) -> bool {
HTTP_METHODS
.iter()
.any(|&method| word.strip_prefix(method).is_some_and(|rest| rest.starts_with('_')))
}
/// Parses all code samples from a YAML-like file.
///
/// A code sample ID is found when a line contains `: |-`.
/// The code sample value is everything between `: |-` and:
/// - The next code sample (next line containing `: |-`)
/// - OR a line starting with `#` at column 0 (indented `#` is part of the code sample)
/// - OR the end of file
///
/// # Example
///
/// ```yaml
/// get_indexes_1: |-
/// client.getIndexes()
/// # I write something
/// # COMMENT TO IGNORE
/// get_indexes_2: |-
/// client.getIndexes({ limit: 3 })
/// ```
///
/// This produces:
/// - `get_indexes_1` → `"client.getIndexes()\n# I write something"`
/// - `get_indexes_2` → `"client.getIndexes({ limit: 3 })"`
fn parse_code_samples_from_file(content: &str) -> HashMap<String, String> {
let mut samples: HashMap<String, String> = HashMap::new();
let mut current_sample_id: Option<String> = None;
let mut current_lines: Vec<String> = Vec::new();
let mut base_indent: Option<usize> = None;
for line in content.lines() {
// Check if this line starts a new code sample (contains `: |-`)
if line.contains(": |-") {
// Save previous sample if exists
if let Some(sample_id) = current_sample_id.take() {
let value = current_lines.join("\n").trim_end().to_string();
samples.insert(sample_id, value);
}
current_lines.clear();
base_indent = None;
// Extract sample ID (first word before `:`)
if let Some(id) = line.split(':').next() {
current_sample_id = Some(id.trim().to_string());
}
continue;
}
// Check if this line ends the current code sample (line starts with `#` at column 0)
// Indented `#` (spaces or tabs) is part of the code sample
if line.starts_with('#') {
// Save current sample and reset
if let Some(sample_id) = current_sample_id.take() {
let value = current_lines.join("\n").trim_end().to_string();
samples.insert(sample_id, value);
}
current_lines.clear();
base_indent = None;
continue;
}
// If we're in a code sample, add this line to the value
if current_sample_id.is_some() {
// Handle empty lines
if line.trim().is_empty() {
if !current_lines.is_empty() {
current_lines.push(String::new());
}
continue;
}
// Calculate indentation and strip base indent
let indent = line.len() - line.trim_start().len();
let base = *base_indent.get_or_insert(indent);
// Remove base indentation
let dedented = line.get(base..).unwrap_or_else(|| line.trim_start());
current_lines.push(dedented.to_string());
}
}
// Don't forget the last sample
if let Some(sample_id) = current_sample_id {
let value = current_lines.join("\n").trim_end().to_string();
samples.insert(sample_id, value);
}
samples
}
/// Converts an OpenAPI path and HTTP method to a code sample key.
///
/// # Example
///
/// - Path: `/indexes/{index_uid}/documents/{document_id}`
/// - Method: `GET`
/// - Result: `get_indexes_indexUid_documents_documentId`
fn path_to_key(path: &str, method: &str) -> String {
let method_lower = method.to_lowercase();
// Remove leading slash and convert path
let path_part = path
.trim_start_matches('/')
.split('/')
.map(|segment| {
if segment.starts_with('{') && segment.ends_with('}') {
// Convert {param_name} to camelCase
let param = &segment[1..segment.len() - 1];
to_camel_case(param)
} else {
// Keep path segments as-is, but replace hyphens with underscores
segment.replace('-', "_")
}
})
.collect::<Vec<_>>()
.join("_");
if path_part.is_empty() {
method_lower
} else {
format!("{}_{}", method_lower, path_part)
}
}
/// Converts a `snake_case` string to `camelCase`.
///
/// # Example
///
/// ```
/// assert_eq!(to_camel_case("index_uid"), "indexUid");
/// ```
fn to_camel_case(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut capitalize_next = false;
for (i, c) in s.chars().enumerate() {
match c {
'_' => capitalize_next = true,
_ if capitalize_next => {
result.push(c.to_ascii_uppercase());
capitalize_next = false;
}
_ if i == 0 => result.push(c.to_ascii_lowercase()),
_ => result.push(c),
}
}
result
}
/// Adds code samples to the OpenAPI specification as `x-codeSamples` extensions.
fn add_code_samples_to_openapi(
openapi: &mut Value,
code_samples: &CodeSamplesMap,
debug: bool,
) -> Result<()> {
let paths = openapi
.get_mut("paths")
.and_then(|p| p.as_object_mut())
.context("OpenAPI spec missing 'paths' object")?;
let mut routes_with_samples: Vec<String> = Vec::new();
let mut routes_without_samples: Vec<String> = Vec::new();
// Collect all routes first for sorted debug output
let mut all_routes: Vec<(String, String, String)> = Vec::new(); // (path, method, key)
for (path, path_item) in paths.iter_mut() {
let Some(path_item) = path_item.as_object_mut() else {
continue;
};
for method in HTTP_METHODS {
let Some(operation) = path_item.get_mut(*method) else {
continue;
};
let key = path_to_key(path, method);
all_routes.push((path.clone(), method.to_string(), key.clone()));
if let Some(samples) = code_samples.get(&key) {
routes_with_samples.push(key);
// Create x-codeSamples array according to Redocly spec
// Sort by language name for consistent output
let mut sorted_samples = samples.clone();
sorted_samples.sort_by(|a, b| a.lang.cmp(&b.lang));
let code_sample_array: Vec<Value> = sorted_samples
.iter()
.map(|sample| {
json!({
"lang": sample.lang,
"source": sample.source
})
})
.collect();
if let Some(op) = operation.as_object_mut() {
op.insert("x-codeSamples".to_string(), json!(code_sample_array));
}
} else {
routes_without_samples.push(key);
}
}
}
// Debug output
if debug {
routes_without_samples.sort();
if !routes_without_samples.is_empty() {
println!("=== Routes without code samples ===\n");
for key in &routes_without_samples {
println!(" {}", key);
}
}
let total = all_routes.len();
let with_samples = routes_with_samples.len();
let without_samples = routes_without_samples.len();
let percentage = if total > 0 { (with_samples as f64 / total as f64) * 100.0 } else { 0.0 };
println!("\n=== Summary ===\n");
println!(" Total routes: {}", total);
println!(" With code samples: {} ({:.1}%)", with_samples, percentage);
println!(" Missing code samples: {} ({:.1}%)\n", without_samples, 100.0 - percentage);
}
Ok(())
}
/// Cleans up null descriptions in tags to make Mintlify work.
///
/// Removes any `"description"` fields with null values (both JSON `null` and `"null"` string)
/// from the tags array and all nested objects.
fn clean_null_descriptions(openapi: &mut Value) {
if let Some(tags) = openapi.get_mut("tags").and_then(|t| t.as_array_mut()) {
for tag in tags.iter_mut() {
remove_null_descriptions_recursive(tag);
}
}
}
/// Recursively removes all `"description"` fields that are `null` or the `"null"` string.
fn remove_null_descriptions_recursive(value: &mut Value) {
if let Some(obj) = value.as_object_mut() {
// Check and remove description if it's null or "null" string
if let Some(desc) = obj.get("description") {
if desc.is_null() || (desc.is_string() && desc.as_str() == Some("null")) {
obj.remove("description");
}
}
// Recursively process all nested objects
for (_, v) in obj.iter_mut() {
remove_null_descriptions_recursive(v);
}
} else if let Some(arr) = value.as_array_mut() {
// Recursively process arrays
for item in arr.iter_mut() {
remove_null_descriptions_recursive(item);
}
}
}
/// Checks that all routes have a summary field.
///
/// Returns an error if any route is missing a summary.
fn check_all_routes_have_summaries(openapi: &Value) -> Result<()> {
let paths = openapi
.get("paths")
.and_then(|p| p.as_object())
.context("OpenAPI spec missing 'paths' object")?;
let mut missing_summaries: Vec<String> = Vec::new();
for (path, path_item) in paths.iter() {
let Some(path_item) = path_item.as_object() else {
continue;
};
for method in HTTP_METHODS {
let Some(operation) = path_item.get(*method) else {
continue;
};
let has_summary =
operation.get("summary").and_then(|s| s.as_str()).is_some_and(|s| !s.is_empty());
if !has_summary {
missing_summaries.push(format!("{} {}", method.to_uppercase(), path));
}
}
}
if missing_summaries.is_empty() {
println!("All routes have summaries.");
Ok(())
} else {
missing_summaries.sort();
eprintln!("The following routes are missing a summary:");
for route in &missing_summaries {
eprintln!(" - {}", route);
}
eprintln!("\nTo fix this, add a doc-comment (///) above the route handler function.");
eprintln!("The first line becomes the summary, subsequent lines become the description.");
eprintln!("\nExample:");
eprintln!(" /// List webhooks");
eprintln!(" ///");
eprintln!(" /// Get the list of all registered webhooks.");
eprintln!(" #[utoipa::path(...)]");
eprintln!(" async fn get_webhooks(...) {{ ... }}");
anyhow::bail!("{} route(s) missing summary", missing_summaries.len());
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_path_to_key() {
assert_eq!(path_to_key("/indexes", "GET"), "get_indexes");
assert_eq!(path_to_key("/indexes/{index_uid}", "GET"), "get_indexes_indexUid");
assert_eq!(
path_to_key("/indexes/{index_uid}/documents", "POST"),
"post_indexes_indexUid_documents"
);
assert_eq!(
path_to_key("/indexes/{index_uid}/documents/{document_id}", "GET"),
"get_indexes_indexUid_documents_documentId"
);
assert_eq!(
path_to_key("/indexes/{index_uid}/settings/stop-words", "GET"),
"get_indexes_indexUid_settings_stop_words"
);
}
#[test]
fn test_to_camel_case() {
assert_eq!(to_camel_case("index_uid"), "indexUid");
assert_eq!(to_camel_case("document_id"), "documentId");
assert_eq!(to_camel_case("task_uid"), "taskUid");
}
#[test]
fn test_build_openapi_key_mapping() {
let yaml = r#"
# get_indexes
get_indexes_1: |-
curl \
-X GET 'MEILISEARCH_URL/indexes'
get_indexes_2: |-
curl \
-X GET 'MEILISEARCH_URL/indexes?limit=5'
# post_indexes
create_indexes_1: |-
curl \
-X POST 'MEILISEARCH_URL/indexes'
# get_version
get_version_1: |-
curl \
-X GET 'MEILISEARCH_URL/version'
# COMMENT WITHOUT KEY - SHOULD BE IGNORED
## COMMENT WITHOUT KEY - SHOULD BE IGNORED
unrelated_sample_without_comment: |-
curl \
-X GET 'MEILISEARCH_URL/something'
"#;
let mapping = build_openapi_key_mapping(yaml);
// Should have 3 OpenAPI keys
assert_eq!(mapping.len(), 3);
assert!(mapping.contains_key("get_indexes"));
assert!(mapping.contains_key("post_indexes"));
assert!(mapping.contains_key("get_version"));
// Only keeps the first code sample ID per OpenAPI key
assert_eq!(mapping["get_indexes"], "get_indexes_1");
assert_eq!(mapping["post_indexes"], "create_indexes_1");
assert_eq!(mapping["get_version"], "get_version_1");
// Comments with multiple words or ## should be ignored and not create keys
assert!(!mapping.contains_key("COMMENT"));
assert!(!mapping.contains_key("##"));
}
#[test]
fn test_parse_code_samples_from_file() {
let yaml = r#"
get_indexes_1: |-
client.getIndexes()
# I write something
# COMMENT TO IGNORE
get_indexes_2: |-
client.getIndexes({ limit: 3 })
update_document: |-
// Code with blank line
updateDoc(doc)
// End
delete_document_1: |-
client.deleteDocument(1)
no_newline_at_end: |-
client.update({ id: 1 })
key_with_empty_sample: |-
# This should produce an empty string for the sample
complex_block: |-
// Some code
Indented line
# Indented comment
Last line
"#;
let samples = parse_code_samples_from_file(yaml);
assert_eq!(samples.len(), 7);
assert!(samples.contains_key("get_indexes_1"));
assert!(samples.contains_key("get_indexes_2"));
assert!(samples.contains_key("update_document"));
assert!(samples.contains_key("delete_document_1"));
assert!(samples.contains_key("no_newline_at_end"));
assert!(samples.contains_key("key_with_empty_sample"));
assert!(samples.contains_key("complex_block"));
// get_indexes_1 includes indented comment
assert_eq!(samples["get_indexes_1"], "client.getIndexes()\n# I write something");
// get_indexes_2 is a single line
assert_eq!(samples["get_indexes_2"], "client.getIndexes({ limit: 3 })");
// update_document contains a blank line and some code
assert_eq!(samples["update_document"], "// Code with blank line\n\nupdateDoc(doc)\n// End");
// delete_document_1
assert_eq!(samples["delete_document_1"], "client.deleteDocument(1)");
// no_newline_at_end, explicitly just one line
assert_eq!(samples["no_newline_at_end"], "client.update({ id: 1 })");
// key_with_empty_sample should be empty string
assert_eq!(samples["key_with_empty_sample"], "");
// complex_block preserves indentation and comments
assert_eq!(
samples["complex_block"],
"// Some code\n Indented line\n # Indented comment\nLast line"
);
}
#[test]
fn test_clean_null_descriptions() {
let mut openapi = json!({
"tags": [
{
"name": "Test1",
"description": "null"
},
{
"name": "Test2",
"description": null
},
{
"name": "Test3",
"description": "Valid description"
},
{
"name": "Test4",
"description": "null",
"externalDocs": {
"url": "https://example.com",
"description": null
}
},
{
"name": "Test5",
"externalDocs": {
"url": "https://example.com",
"description": "null"
}
}
]
});
clean_null_descriptions(&mut openapi);
let tags = openapi["tags"].as_array().unwrap();
// Test1: description "null" should be removed
assert!(!tags[0].as_object().unwrap().contains_key("description"));
// Test2: description null should be removed
assert!(!tags[1].as_object().unwrap().contains_key("description"));
// Test3: valid description should remain
assert_eq!(tags[2]["description"], "Valid description");
// Test4: both tag description and externalDocs description should be removed
assert!(!tags[3].as_object().unwrap().contains_key("description"));
assert!(!tags[3]["externalDocs"].as_object().unwrap().contains_key("description"));
assert_eq!(tags[3]["externalDocs"]["url"], "https://example.com");
// Test5: externalDocs description "null" should be removed
assert!(!tags[4]["externalDocs"].as_object().unwrap().contains_key("description"));
assert_eq!(tags[4]["externalDocs"]["url"], "https://example.com");
}
}