Compare commits

..

2 Commits

Author SHA1 Message Date
ManyTheFish
42bbfebf70 Remove proximity database, forcing us to remove phrase search and splitwords 2023-10-03 16:58:26 +02:00
ManyTheFish
5637978fe4 Don't compute proximity database anymore 2023-10-03 15:34:01 +02:00
103 changed files with 851 additions and 1750 deletions

View File

@@ -7,17 +7,19 @@ assignees: ''
---
Related product team resources: [PRD]() (_internal only_)
Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
Related product discussion:
Related spec: WIP
## Motivation
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->
<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
## Usage
<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->
<!---Write a quick description of the usage if the usage has already been defined-->
Refer to the final spec to know the details and the final decisions about the usage.
## TODO

View File

@@ -74,4 +74,4 @@ jobs:
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scripts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@@ -1,98 +0,0 @@
name: Benchmarks (PR)
on: issue_comment
permissions:
issues: write
env:
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
run-benchmarks-on-comment:
if: startsWith(github.event.comment.body, '/benchmark')
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 4320 # 72h
steps:
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Check for Command
id: command
uses: xt0rted/slash-command-action@v2
with:
command: benchmark
reaction-type: "eyes"
repo-token: ${{ env.GH_TOKEN }}
- uses: xt0rted/pull-request-comment-branch@v2
id: comment-branch
with:
repo_token: ${{ env.GH_TOKEN }}
- uses: actions/checkout@v3
if: success()
with:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${{ steps.command.outputs.command-arguments }}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${{ steps.command.outputs.command-arguments }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd benchmarks
cargo bench --bench ${{ steps.command.outputs.command-arguments }} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Compute the diff of the benchmarks and send a message on the GitHub PR
- name: Compute and send a message in the PR
env:
GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
run: |
set -x
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
echo '```' >> body.txt
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
echo '```' >> body.txt
gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt

View File

@@ -50,7 +50,7 @@ jobs:
needs: check-version
steps:
- name: Create PR to Homebrew
uses: mislav/bump-homebrew-formula-action@v3
uses: mislav/bump-homebrew-formula-action@v2
with:
formula-name: meilisearch
formula-path: Formula/m/meilisearch.rb

View File

@@ -57,20 +57,20 @@ jobs:
echo "date=$commit_date" >> $GITHUB_OUTPUT
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@v2
- name: Login to Docker Hub
uses: docker/login-action@v3
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
uses: docker/metadata-action@v4
with:
images: getmeili/meilisearch
# Prevent `latest` to be updated for each new tag pushed.
@@ -83,7 +83,7 @@ jobs:
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push
uses: docker/build-push-action@v5
uses: docker/build-push-action@v4
with:
push: true
platforms: linux/amd64,linux/arm64

View File

@@ -160,7 +160,7 @@ jobs:
with:
repository: meilisearch/meilisearch-js
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v3
with:
cache: 'yarn'
- name: Install dependencies
@@ -318,7 +318,7 @@ jobs:
with:
repository: meilisearch/meilisearch-js-plugins
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v3
with:
cache: yarn
- name: Install dependencies

View File

@@ -43,7 +43,7 @@ jobs:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -65,7 +65,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -149,7 +149,7 @@ jobs:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@@ -168,7 +168,7 @@ jobs:
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@@ -187,7 +187,7 @@ jobs:
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

72
Cargo.lock generated
View File

@@ -231,9 +231,9 @@ dependencies = [
[[package]]
name = "addr2line"
version = "0.21.0"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
dependencies = [
"gimli",
]
@@ -435,9 +435,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "backtrace"
version = "0.3.69"
version = "0.3.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
dependencies = [
"addr2line",
"cc",
@@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]]
name = "benchmarks"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"anyhow",
"bytes",
@@ -1206,7 +1206,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"anyhow",
"big_s",
@@ -1417,7 +1417,7 @@ dependencies = [
[[package]]
name = "file-store"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"faux",
"tempfile",
@@ -1439,7 +1439,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"insta",
"nom",
@@ -1459,7 +1459,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"criterion",
"serde_json",
@@ -1577,7 +1577,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"arbitrary",
"clap",
@@ -1638,9 +1638,9 @@ dependencies = [
[[package]]
name = "gimli"
version = "0.28.0"
version = "0.27.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
[[package]]
name = "git2"
@@ -1891,10 +1891,9 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"anyhow",
"backtrace",
"big_s",
"bincode",
"crossbeam",
@@ -2089,7 +2088,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"criterion",
"serde_json",
@@ -2501,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"insta",
"md5",
@@ -2510,7 +2509,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"actix-cors",
"actix-http",
@@ -2565,6 +2564,7 @@ dependencies = [
"platform-dirs",
"prometheus",
"puffin",
"puffin_http",
"rand",
"rayon",
"regex",
@@ -2600,7 +2600,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"base64 0.21.2",
"enum-iterator",
@@ -2619,7 +2619,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"actix-web",
"anyhow",
@@ -2673,7 +2673,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"big_s",
"bimap",
@@ -2857,9 +2857,9 @@ dependencies = [
[[package]]
name = "object"
version = "0.32.1"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
dependencies = [
"memchr",
]
@@ -2995,7 +2995,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]]
name = "permissive-json-pointer"
version = "1.4.1"
version = "1.4.0"
dependencies = [
"big_s",
"serde_json",
@@ -3193,7 +3193,7 @@ dependencies = [
"byteorder",
"hex",
"lazy_static",
"rustix 0.36.16",
"rustix 0.36.15",
]
[[package]]
@@ -3236,6 +3236,18 @@ dependencies = [
"serde",
]
[[package]]
name = "puffin_http"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193"
dependencies = [
"anyhow",
"crossbeam-channel",
"log",
"puffin",
]
[[package]]
name = "quote"
version = "1.0.32"
@@ -3466,9 +3478,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.36.16"
version = "0.36.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6da3636faa25820d8648e0e31c5d519bbb01f72fdf57131f0f5f7da5fed36eab"
checksum = "c37f1bd5ef1b5422177b7646cba67430579cfe2ace80f284fee876bca52ad941"
dependencies = [
"bitflags 1.3.2",
"errno",
@@ -3641,9 +3653,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.108"
version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c"
dependencies = [
"indexmap 2.0.0",
"itoa",
@@ -4431,9 +4443,9 @@ dependencies = [
[[package]]
name = "webpki"
version = "0.22.2"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f"
checksum = "f0e74f82d49d545ad128049b7e88f6576df2da6b02e9ce565c6f533be576957e"
dependencies = [
"ring",
"untrusted",

View File

@@ -18,7 +18,7 @@ members = [
]
[workspace.package]
version = "1.4.1"
version = "1.4.0"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"
@@ -28,7 +28,6 @@ license = "MIT"
[profile.release]
codegen-units = 1
debug = true
[profile.dev.package.flate2]
opt-level = 3

View File

@@ -1,14 +1,14 @@
# Profiling Meilisearch
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
![An example profiling with Puffin viewer](assets/profiling-example.png)
## Profiling the Indexing Process
When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.

View File

@@ -526,12 +526,12 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2022-10-09T20:27:22.688964637Z",
"updatedAt": "2022-10-09T20:27:23.951017769Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@@ -541,12 +541,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2022-10-09T20:27:22.197788495Z",
"updatedAt": "2022-10-09T20:28:01.93111053Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@@ -571,12 +571,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2022-10-09T20:27:24.242683494Z",
"updatedAt": "2022-10-09T20:27:24.312809641Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@@ -617,12 +617,12 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2023-01-30T16:25:56.595257Z",
"updatedAt": "2023-01-30T16:25:58.70348Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@@ -632,12 +632,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2023-01-30T16:25:56.192178Z",
"updatedAt": "2023-01-30T16:25:56.455714Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@@ -647,12 +647,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2023-01-30T16:25:58.876405Z",
"updatedAt": "2023-01-30T16:25:59.079906Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);

View File

@@ -46,7 +46,6 @@ pub type Checked = settings::Checked;
pub type Unchecked = settings::Unchecked;
pub type Task = updates::UpdateEntry;
pub type Kind = updates::UpdateMeta;
// everything related to the errors
pub type ResponseError = errors::ResponseError;
@@ -108,11 +107,8 @@ impl V2Reader {
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
V2IndexReader::new(
index.uid.clone(),
&self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
index,
BufReader::new(
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
),
)
}))
}
@@ -147,41 +143,16 @@ pub struct V2IndexReader {
}
impl V2IndexReader {
pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
pub fn new(name: String, path: &Path) -> Result<Self> {
let meta = File::open(path.join("meta.json"))?;
let meta: DumpMeta = serde_json::from_reader(meta)?;
let mut created_at = None;
let mut updated_at = None;
for line in tasks.lines() {
let task: Task = serde_json::from_str(&line?)?;
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
continue;
}
let new_created_at = match task.update.meta() {
Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
_ => None,
};
let new_updated_at = task.update.finished_at();
if created_at.is_none() || created_at > new_created_at {
created_at = new_created_at;
}
if updated_at.is_none() || updated_at < new_updated_at {
updated_at = new_updated_at;
}
}
let current_time = OffsetDateTime::now_utc();
let metadata = IndexMetadata {
uid: index_uuid.uid.clone(),
uid: name,
primary_key: meta.primary_key,
created_at: created_at.unwrap_or(current_time),
updated_at: updated_at.unwrap_or(current_time),
// FIXME: Iterate over the whole task queue to find the creation and last update date.
created_at: OffsetDateTime::now_utc(),
updated_at: OffsetDateTime::now_utc(),
};
let ret = V2IndexReader {
@@ -277,12 +248,12 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2022-10-09T20:27:22.688964637Z",
"updatedAt": "2022-10-09T20:27:23.951017769Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@@ -292,12 +263,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2022-10-09T20:27:22.197788495Z",
"updatedAt": "2022-10-09T20:28:01.93111053Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@@ -322,12 +293,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2022-10-09T20:27:24.242683494Z",
"updatedAt": "2022-10-09T20:27:24.312809641Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@@ -369,12 +340,12 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2023-01-30T16:25:56.595257Z",
"updatedAt": "2023-01-30T16:25:58.70348Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@@ -384,12 +355,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2023-01-30T16:25:56.192178Z",
"updatedAt": "2023-01-30T16:25:56.455714Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
@@ -399,12 +370,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2023-01-30T16:25:58.876405Z",
"updatedAt": "2023-01-30T16:25:59.079906Z"
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);

View File

@@ -227,14 +227,4 @@ impl UpdateStatus {
_ => None,
}
}
pub fn finished_at(&self) -> Option<OffsetDateTime> {
match self {
UpdateStatus::Processing(_) => None,
UpdateStatus::Enqueued(_) => None,
UpdateStatus::Processed(u) => Some(u.processed_at),
UpdateStatus::Aborted(_) => None,
UpdateStatus::Failed(u) => Some(u.failed_at),
}
}
}

View File

@@ -12,7 +12,6 @@ license.workspace = true
[dependencies]
anyhow = "1.0.70"
backtrace = "0.3.69"
bincode = "1.3.3"
csv = "1.2.1"
derive_builder = "0.12.0"

View File

@@ -19,7 +19,6 @@ one indexing operation.
use std::collections::{BTreeSet, HashSet};
use std::ffi::OsStr;
use std::fmt;
use std::fs::{self, File};
use std::io::BufWriter;
@@ -200,29 +199,6 @@ impl Batch {
}
}
impl fmt::Display for Batch {
/// A text used when we debug the profiling reports.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let index_uid = self.index_uid();
let tasks = self.ids();
match self {
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
Batch::Dump(_) => f.write_str("Dump")?,
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
};
match index_uid {
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
}
}
}
impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
@@ -237,30 +213,6 @@ impl IndexOperation {
}
}
impl fmt::Display for IndexOperation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IndexOperation::DocumentOperation { .. } => {
f.write_str("IndexOperation::DocumentOperation")
}
IndexOperation::DocumentDeletion { .. } => {
f.write_str("IndexOperation::DocumentDeletion")
}
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
}
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
IndexOperation::DocumentClearAndSetting { .. } => {
f.write_str("IndexOperation::DocumentClearAndSetting")
}
IndexOperation::SettingsAndDocumentOperation { .. } => {
f.write_str("IndexOperation::SettingsAndDocumentOperation")
}
}
}
}
impl IndexScheduler {
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
///
@@ -629,7 +581,7 @@ impl IndexScheduler {
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
}
puffin::profile_function!(batch.to_string());
puffin::profile_function!(format!("{:?}", batch));
match batch {
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
@@ -825,10 +777,6 @@ impl IndexScheduler {
// 2. dump the tasks
let mut dump_tasks = dump.create_tasks_queue()?;
for ret in self.all_tasks.iter(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_, mut t) = ret?;
let status = t.status;
let content_file = t.content_uuid();
@@ -849,9 +797,6 @@ impl IndexScheduler {
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file) = content_file {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
if status == Status::Enqueued {
let content_file = self.file_store.get_update(content_file)?;
@@ -891,9 +836,6 @@ impl IndexScheduler {
// 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
@@ -906,16 +848,13 @@ impl IndexScheduler {
})?;
// 4. Dump experimental feature settings
let features = self.features().runtime_features();
let features = self.features()?.runtime_features();
dump.create_experimental_features(features)?;
let dump_uid = started_at.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
)).unwrap();
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
let file = File::create(path)?;
dump.persist_to(BufWriter::new(file))?;

View File

@@ -108,8 +108,6 @@ pub enum Error {
TaskDeletionWithEmptyQuery,
#[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
TaskCancelationWithEmptyQuery,
#[error("Aborted task")]
AbortedTask,
#[error(transparent)]
Dump(#[from] dump::Error),
@@ -117,13 +115,8 @@ pub enum Error {
Heed(#[from] heed::Error),
#[error(transparent)]
Milli(#[from] milli::Error),
#[error("An unexpected crash occurred when processing the task. {}", {
match .0 {
Some(report) => format!("Get /reports/{}", report),
None => "No report was saved.".into(),
}
})]
ProcessBatchPanicked(Option<uuid::Uuid>),
#[error("An unexpected crash occurred when processing the task.")]
ProcessBatchPanicked,
#[error(transparent)]
FileStore(#[from] file_store::Error),
#[error(transparent)]
@@ -182,11 +175,10 @@ impl Error {
| Error::TaskNotFound(_)
| Error::TaskDeletionWithEmptyQuery
| Error::TaskCancelationWithEmptyQuery
| Error::AbortedTask
| Error::Dump(_)
| Error::Heed(_)
| Error::Milli(_)
| Error::ProcessBatchPanicked(_)
| Error::ProcessBatchPanicked
| Error::FileStore(_)
| Error::IoError(_)
| Error::Persist(_)
@@ -229,7 +221,7 @@ impl ErrorCode for Error {
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::Dump(e) => e.error_code(),
Error::Milli(e) => e.error_code(),
Error::ProcessBatchPanicked(_) => Code::Internal,
Error::ProcessBatchPanicked => Code::Internal,
Error::Heed(e) => e.error_code(),
Error::HeedTransaction(e) => e.error_code(),
Error::FileStore(e) => e.error_code(),
@@ -244,9 +236,6 @@ impl ErrorCode for Error {
Error::TaskDatabaseUpdate(_) => Code::Internal,
Error::CreateBatch(_) => Code::Internal,
// This one should never be seen by the end user
Error::AbortedTask => Code::Internal,
#[cfg(test)]
Error::PlannedFailure => Code::Internal,
}

View File

@@ -1,8 +1,6 @@
use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use crate::error::FeatureNotEnabledError;
use crate::Result;
@@ -11,19 +9,20 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";
#[derive(Clone)]
pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
instance: InstanceTogglableFeatures,
}
#[derive(Debug, Clone, Copy)]
pub struct RoFeatures {
runtime: RuntimeTogglableFeatures,
instance: InstanceTogglableFeatures,
}
impl RoFeatures {
fn new(data: &FeatureData) -> Self {
let runtime = data.runtime_features();
Self { runtime }
fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
let runtime = data.runtime_features(txn)?;
Ok(Self { runtime, instance: data.instance })
}
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
@@ -44,13 +43,13 @@ impl RoFeatures {
}
pub fn check_metrics(&self) -> Result<()> {
if self.runtime.metrics {
if self.instance.metrics {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Getting metrics",
feature: "metrics",
issue_link: "https://github.com/meilisearch/product/discussions/625",
issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
}
.into())
}
@@ -68,36 +67,15 @@ impl RoFeatures {
.into())
}
}
pub fn check_puffin(&self) -> Result<()> {
if self.runtime.export_puffin_reports {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Outputting Puffin reports to disk",
feature: "export puffin reports",
issue_link: "https://github.com/meilisearch/product/discussions/693",
}
.into())
}
}
}
impl FeatureData {
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
wtxn.commit()?;
let txn = env.read_txn()?;
let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: instance_features.metrics || persisted_features.metrics,
..persisted_features
}));
Ok(Self { persisted: runtime_features_db, runtime })
Ok(Self { runtime: runtime_features, instance: instance_features })
}
pub fn put_runtime_features(
@@ -105,25 +83,16 @@ impl FeatureData {
mut wtxn: RwTxn,
features: RuntimeTogglableFeatures,
) -> Result<()> {
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
wtxn.commit()?;
// safe to unwrap, the lock will only fail if:
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
// 2. there's a panic while the thread is held -> it is only used for an assignment here.
let mut toggled_features = self.runtime.write().unwrap();
*toggled_features = features;
Ok(())
}
fn runtime_features(&self) -> RuntimeTogglableFeatures {
// sound to unwrap, the lock will only fail if:
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
// 2. there's a panic while the thread is held -> it is only used for copying the data here
*self.runtime.read().unwrap()
fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
}
pub fn features(&self) -> RoFeatures {
RoFeatures::new(self)
pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
RoFeatures::new(txn, self)
}
}

View File

@@ -30,7 +30,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
index_mapper,
features: _,
max_number_of_tasks: _,
puffin_frame: _,
wake_up: _,
dumps_path: _,
snapshots_path: _,
@@ -39,7 +38,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
test_breakpoint_sdr: _,
planned_failures: _,
run_loop_iteration: _,
panic_reader: _,
} = scheduler;
let rtxn = env.read_txn().unwrap();

View File

@@ -26,7 +26,6 @@ mod index_mapper;
#[cfg(test)]
mod insta_snapshot;
mod lru;
mod panic_hook;
mod utils;
mod uuid_codec;
@@ -34,7 +33,6 @@ pub type Result<T> = std::result::Result<T, Error>;
pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::ops::{Bound, RangeBounds};
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
@@ -54,9 +52,6 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use panic_hook::ReportReader;
pub use panic_hook::{Panic, Report, ReportRegistry};
use puffin::FrameView;
use roaring::RoaringBitmap;
use synchronoise::SignalEvent;
use time::format_description::well_known::Rfc3339;
@@ -319,9 +314,6 @@ pub struct IndexScheduler {
/// the finished tasks automatically.
pub(crate) max_number_of_tasks: usize,
/// A frame to output the indexation profiling files to disk.
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
/// The path used to create the dumps.
pub(crate) dumps_path: PathBuf,
@@ -334,8 +326,6 @@ pub struct IndexScheduler {
/// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf,
pub(crate) panic_reader: ReportReader,
// ================= test
// The next entry is dedicated to the tests.
/// Provide a way to set a breakpoint in multiple part of the scheduler.
@@ -374,7 +364,6 @@ impl IndexScheduler {
wake_up: self.wake_up.clone(),
autobatching_enabled: self.autobatching_enabled,
max_number_of_tasks: self.max_number_of_tasks,
puffin_frame: self.puffin_frame.clone(),
snapshots_path: self.snapshots_path.clone(),
dumps_path: self.dumps_path.clone(),
auth_path: self.auth_path.clone(),
@@ -386,7 +375,6 @@ impl IndexScheduler {
#[cfg(test)]
run_loop_iteration: self.run_loop_iteration.clone(),
features: self.features.clone(),
panic_reader: self.panic_reader.clone(),
}
}
}
@@ -444,12 +432,6 @@ impl IndexScheduler {
let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?;
wtxn.commit()?;
const MAX_REPORT_COUNT: usize = 20;
let panic_reader = panic_hook::ReportReader::install_panic_hook(
std::num::NonZeroUsize::new(MAX_REPORT_COUNT).unwrap(),
);
// allow unreachable_code to get rids of the warning in the case of a test build.
let this = Self {
must_stop_processing: MustStopProcessing::default(),
@@ -475,7 +457,6 @@ impl IndexScheduler {
env,
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
wake_up: Arc::new(SignalEvent::auto(true)),
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
autobatching_enabled: options.autobatching_enabled,
max_number_of_tasks: options.max_number_of_tasks,
dumps_path: options.dumps_path,
@@ -490,7 +471,6 @@ impl IndexScheduler {
#[cfg(test)]
run_loop_iteration: Arc::new(RwLock::new(0)),
features,
panic_reader,
};
this.run();
@@ -592,46 +572,17 @@ impl IndexScheduler {
run.wake_up.wait();
loop {
let puffin_enabled = run.features().check_puffin().is_ok();
puffin::set_scopes_on(puffin_enabled);
puffin::GlobalProfiler::lock().new_frame();
match run.tick() {
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => {
log::error!("{e}");
log::error!("{}", e);
// Wait one second when an irrecoverable error occurs.
if !e.is_recoverable() {
std::thread::sleep(Duration::from_secs(1));
}
}
}
// Let's write the previous frame to disk but only if
// the user wanted to profile with puffin.
if puffin_enabled {
let mut frame_view = run.puffin_frame.lock();
if !frame_view.is_empty() {
let now = OffsetDateTime::now_utc();
let mut file = match File::create(format!("{}.puffin", now)) {
Ok(file) => file,
Err(e) => {
log::error!("{e}");
continue;
}
};
if let Err(e) = frame_view.save_to_writer(&mut file) {
log::error!("{e}");
}
if let Err(e) = file.sync_all() {
log::error!("{e}");
}
// We erase this frame view as it is no more useful. We want to
// measure the new frames now that we exported the previous ones.
*frame_view = FrameView::default();
}
}
}
})
.unwrap();
@@ -1111,6 +1062,8 @@ impl IndexScheduler {
self.breakpoint(Breakpoint::Start);
}
puffin::GlobalProfiler::lock().new_frame();
self.cleanup_task_queue()?;
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@@ -1143,10 +1096,7 @@ impl IndexScheduler {
.name(String::from("batch-operation"))
.spawn(move || cloned_index_scheduler.process_batch(batch))
.unwrap();
self.panic_reader
.join_thread(handle)
.unwrap_or_else(|maybe_report| Err(Error::ProcessBatchPanicked(maybe_report)))
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
};
#[cfg(test)]
@@ -1183,8 +1133,7 @@ impl IndexScheduler {
// If we have an abortion error we must stop the tick here and re-schedule tasks.
Err(Error::Milli(milli::Error::InternalError(
milli::InternalError::AbortedIndexation,
)))
| Err(Error::AbortedTask) => {
))) => {
#[cfg(test)]
self.breakpoint(Breakpoint::AbortedIndexation);
wtxn.abort().map_err(Error::HeedTransaction)?;
@@ -1310,8 +1259,9 @@ impl IndexScheduler {
Ok(IndexStats { is_indexing, inner_stats: index_stats })
}
pub fn features(&self) -> RoFeatures {
self.features.features()
pub fn features(&self) -> Result<RoFeatures> {
let rtxn = self.read_txn()?;
self.features.features(rtxn)
}
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
@@ -1327,10 +1277,6 @@ impl IndexScheduler {
}
}
pub fn reports(&self) -> Arc<RwLock<ReportRegistry>> {
self.panic_reader.registry()
}
/// Blocks the thread until the test handle asks to progress to/through this breakpoint.
///
/// Two messages are sent through the channel for each breakpoint.
@@ -4344,26 +4290,4 @@ mod tests {
}
"###);
}
#[test]
fn cancel_processing_dump() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
let dump_cancellation = KindWithContent::TaskCancelation {
query: "cancel dump".to_owned(),
tasks: RoaringBitmap::from_iter([0]),
};
let _ = index_scheduler.register(dump_creation).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
let _ = index_scheduler.register(dump_cancellation).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
}
}

View File

@@ -1,211 +0,0 @@
//! Panic hook designed to fetch a panic from a subthread and recover it on join.
use std::collections::VecDeque;
use std::num::NonZeroUsize;
use std::panic::PanicInfo;
use std::sync::{Arc, RwLock};
use std::thread::{JoinHandle, ThreadId};
use backtrace::Backtrace;
// Represents a panic in a shallowy structured fashion
pub struct Panic {
pub payload: Option<String>,
pub location: Option<String>,
pub thread_name: Option<String>,
pub thread_id: ThreadId,
pub backtrace: Backtrace,
}
/// A panic enriched with a unique id
#[derive(serde::Serialize)]
pub struct Report {
pub id: uuid::Uuid,
#[serde(serialize_with = "serialize_panic")]
pub panic: Panic,
}
fn serialize_panic<S>(panic: &Panic, s: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::Serialize;
panic.to_json().serialize(s)
}
impl Report {
pub fn new(panic: Panic) -> Self {
Self { id: uuid::Uuid::new_v4(), panic }
}
}
impl Panic {
pub fn to_json(&self) -> serde_json::Value {
json::panic_to_json(self)
}
}
mod json {
use backtrace::{Backtrace, BacktraceFrame, BacktraceSymbol};
use serde_json::{json, Value};
use super::Panic;
fn symbol_to_json(symbol: &BacktraceSymbol) -> Value {
let address = symbol.addr().map(|addr| format!("{:p}", addr));
let column = symbol.colno();
let line = symbol.lineno();
let function = symbol.name().map(|name| name.to_string());
let filename = symbol.filename();
json!({
"function": function,
"filename": filename,
"line": line,
"column": column,
"address": address,
})
}
fn frame_to_json(frame: &BacktraceFrame) -> Value {
let symbols: Vec<_> = frame.symbols().iter().map(symbol_to_json).collect();
match symbols.as_slice() {
[] => {
let address = format!("{:p}", frame.ip());
json!({"address": address})
}
[symbol] => json!(symbol),
symbols => json!(symbols),
}
}
fn backtrace_to_json(backtrace: &Backtrace) -> Value {
let frames: Vec<_> = backtrace.frames().iter().map(frame_to_json).collect();
json!(frames)
}
pub fn panic_to_json(panic: &Panic) -> Value {
let thread_id = format!("{:?}", panic.thread_id);
serde_json::json!({
"payload": panic.payload,
"location": panic.location,
"thread": {
"id": thread_id,
"name": panic.thread_name,
},
"backtrace": backtrace_to_json(&panic.backtrace),
})
}
}
struct ReportWriter(Arc<RwLock<ReportRegistry>>);
/// A FIFO queue of reports.
pub struct ReportRegistry {
reports: std::collections::VecDeque<Report>,
}
impl ReportRegistry {
pub fn new(capacity: NonZeroUsize) -> Self {
Self { reports: VecDeque::with_capacity(capacity.get()) }
}
pub fn push(&mut self, report: Report) -> Option<Report> {
let popped = if self.reports.len() == self.reports.capacity() {
self.reports.pop_back()
} else {
None
};
self.reports.push_front(report);
popped
}
pub fn iter(&self) -> impl Iterator<Item = &Report> {
self.reports.iter()
}
pub fn find(&self, report_id: uuid::Uuid) -> Option<&Report> {
self.iter().find(|report| report.id == report_id)
}
}
impl ReportWriter {
#[track_caller]
fn write_panic(&self, panic_info: &PanicInfo<'_>) {
let payload = panic_info
.payload()
.downcast_ref::<&str>()
.map(ToString::to_string)
.or_else(|| panic_info.payload().downcast_ref::<String>().cloned());
let location = panic_info.location().map(|loc| {
format!(
"{file}:{line}:{column}",
file = loc.file(),
line = loc.line(),
column = loc.column()
)
});
let thread_name = std::thread::current().name().map(ToString::to_string);
let thread_id = std::thread::current().id();
let backtrace = backtrace::Backtrace::new();
let panic = Panic { payload, location, thread_name, thread_id, backtrace };
let report = Report::new(panic);
log::error!(
"An unexpected panic occurred on thread {name} at {location}: {payload}. See report '{report}' for details.",
payload = report.panic.payload.as_deref().unwrap_or("Box<dyn Any>"),
name = report.panic.thread_name.as_deref().unwrap_or("<unnamed>"),
location = report.panic.location.as_deref().unwrap_or("<unknown>"),
report = report.id,
);
if let Ok(mut registry) = self.0.write() {
if let Some(old_report) = registry.push(report) {
log::trace!("Forgetting report {} to make space for new report.", old_report.id)
}
}
}
}
/// Reads the reports written in case of a panic.
#[derive(Clone)]
pub struct ReportReader(Arc<RwLock<ReportRegistry>>);
impl ReportReader {
/// Installs a new global panic hook, overriding any existing hook.
///
/// The hook writes any incoming panic in reports.
/// The reports can then be read by the returned [`ReportReader`].
pub fn install_panic_hook(capacity: NonZeroUsize) -> Self {
let registry = Arc::new(RwLock::new(ReportRegistry::new(capacity)));
let reader = ReportReader(registry.clone());
let writer = ReportWriter(registry.clone());
std::panic::set_hook(Box::new(move |panic_info| writer.write_panic(panic_info)));
reader
}
/// Join the thread corresponding to the passed handle, recovering either its value
/// or, in case the thread panicked, the id of the report corresponding to the panic.
///
/// The id can be used to read the report from the [`self.registry()`].
pub fn join_thread<T>(&self, thread: JoinHandle<T>) -> Result<T, Option<uuid::Uuid>> {
let thread_id = thread.thread().id();
thread.join().map_err(|_e| {
self.0
.read()
.unwrap()
.iter()
.find(|report| report.panic.thread_id == thread_id)
.map(|report| report.id)
})
}
/// Returns a registry that can be used to read the reports written during a panic.
pub fn registry(&self) -> Arc<RwLock<ReportRegistry>> {
self.0.clone()
}
}

View File

@@ -1,35 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,45 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [1,]
canceled [0,]
----------------------------------------------------------------------
### Kind:
"taskCancelation" [1,]
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
1 [0,]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,38 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[0,]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"taskCancelation" [1,]
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -88,6 +88,7 @@ pub trait ErrorCode {
}
}
#[allow(clippy::enum_variant_names)]
enum ErrorType {
Internal,
InvalidRequest,
@@ -297,7 +298,6 @@ MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
ReportNotFound , InvalidRequest , NOT_FOUND ;
TaskNotFound , InvalidRequest , NOT_FOUND ;
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
UnretrievableDocument , Internal , BAD_REQUEST ;

View File

@@ -5,8 +5,6 @@ use serde::{Deserialize, Serialize};
pub struct RuntimeTogglableFeatures {
pub score_details: bool,
pub vector_store: bool,
pub metrics: bool,
pub export_puffin_reports: bool,
}
#[derive(Default, Debug, Clone, Copy)]

View File

@@ -69,7 +69,8 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.9"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] }
puffin = { version = "0.16.0", features = ["serialization"] }
puffin = "0.16.0"
puffin_http = { version = "0.13.0", optional = true }
rand = "0.8.5"
rayon = "1.7.0"
regex = "1.7.3"
@@ -134,6 +135,7 @@ zip = { version = "0.6.4", optional = true }
[features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
analytics = ["segment"]
profile-with-puffin = ["dep:puffin_http"]
mini-dashboard = [
"actix-web-static-files",
"static-files",

View File

@@ -51,8 +51,6 @@ pub enum MeilisearchHttpError {
DocumentFormat(#[from] DocumentFormatError),
#[error(transparent)]
Join(#[from] JoinError),
#[error("Report `{0}` not found. Either its id is incorrect, or it was deleted. To save on memory, only a limited amount of reports are kept.")]
ReportNotFound(uuid::Uuid),
}
impl ErrorCode for MeilisearchHttpError {
@@ -76,7 +74,6 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::FileStore(_) => Code::Internal,
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
MeilisearchHttpError::Join(_) => Code::Internal,
MeilisearchHttpError::ReportNotFound(_) => Code::ReportNotFound,
}
}
}

View File

@@ -114,7 +114,10 @@ pub fn create_app(
.configure(routes::configure)
.configure(|s| dashboard(s, enable_dashboard));
let app = app.wrap(middleware::RouteMetrics);
let app = app.wrap(actix_web::middleware::Condition::new(
opt.experimental_enable_metrics,
middleware::RouteMetrics,
));
app.wrap(
Cors::default()
.send_wildcard()

View File

@@ -30,6 +30,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
#[cfg(feature = "profile-with-puffin")]
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"

View File

@@ -3,10 +3,8 @@
use std::future::{ready, Ready};
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
use actix_web::web::Data;
use actix_web::Error;
use futures_util::future::LocalBoxFuture;
use index_scheduler::IndexScheduler;
use prometheus::HistogramTimer;
pub struct RouteMetrics;
@@ -49,27 +47,19 @@ where
fn call(&self, req: ServiceRequest) -> Self::Future {
let mut histogram_timer: Option<HistogramTimer> = None;
// calling unwrap here is safe because index scheduler is added to app data while creating actix app.
// also, the tests will fail if this is not present.
let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
let features = index_scheduler.features();
if features.check_metrics().is_ok() {
let request_path = req.path();
let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource {
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, request_path])
.start_timer(),
);
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
let request_path = req.path();
let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource {
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, request_path])
.inc();
}
};
.start_timer(),
);
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
.with_label_values(&[&request_method, request_path])
.inc();
}
let fut = self.service.call(req);

View File

@@ -29,12 +29,12 @@ async fn get_features(
>,
req: HttpRequest,
analytics: Data<dyn Analytics>,
) -> HttpResponse {
let features = index_scheduler.features();
) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features()?;
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
debug!("returns: {:?}", features.runtime_features());
HttpResponse::Ok().json(features.runtime_features())
Ok(HttpResponse::Ok().json(features.runtime_features()))
}
#[derive(Debug, Deserr)]
@@ -44,10 +44,6 @@ pub struct RuntimeTogglableFeatures {
pub score_details: Option<bool>,
#[deserr(default)]
pub vector_store: Option<bool>,
#[deserr(default)]
pub metrics: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>,
}
async fn patch_features(
@@ -59,36 +55,26 @@ async fn patch_features(
req: HttpRequest,
analytics: Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features();
let features = index_scheduler.features()?;
let old_features = features.runtime_features();
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
export_puffin_reports: new_features
.0
.export_puffin_reports
.unwrap_or(old_features.export_puffin_reports),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
// the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures {
score_details,
vector_store,
metrics,
export_puffin_reports,
} = new_features;
let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
new_features;
analytics.publish(
"Experimental features Updated".to_string(),
json!({
"score_details": score_details,
"vector_store": vector_store,
"metrics": metrics,
"export_puffin_reports": export_puffin_reports,
}),
Some(&req),
);

View File

@@ -68,7 +68,7 @@ pub async fn search(
}
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let features = index_scheduler.features()?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(&index, search_query, facet_query, facet_name, features)
})

View File

@@ -157,7 +157,7 @@ pub async fn search_with_url_query(
let mut aggregate = SearchAggregator::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let features = index_scheduler.features()?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
if let Ok(ref search_result) = search_result {
@@ -192,7 +192,7 @@ pub async fn search_with_post(
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let features = index_scheduler.features()?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
if let Ok(ref search_result) = search_result {

View File

@@ -19,7 +19,7 @@ pub async fn get_metrics(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
auth_controller: Data<AuthController>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_metrics()?;
index_scheduler.features()?.check_metrics()?;
let auth_filters = index_scheduler.filters();
if !auth_filters.all_indexes_authorized() {
let mut error = ResponseError::from(AuthenticationError::InvalidToken);

View File

@@ -24,7 +24,6 @@ pub mod features;
pub mod indexes;
mod metrics;
mod multi_search;
mod reports;
mod snapshot;
mod swap_indexes;
pub mod tasks;
@@ -41,8 +40,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/multi-search").configure(multi_search::configure))
.service(web::scope("/swap-indexes").configure(swap_indexes::configure))
.service(web::scope("/metrics").configure(metrics::configure))
.service(web::scope("/experimental-features").configure(features::configure))
.service(web::scope("/reports").configure(reports::configure));
.service(web::scope("/experimental-features").configure(features::configure));
}
#[derive(Debug, Serialize)]

View File

@@ -41,7 +41,7 @@ pub async fn multi_search_with_post(
let queries = params.into_inner().queries;
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
let features = index_scheduler.features();
let features = index_scheduler.features()?;
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code

View File

@@ -1,39 +0,0 @@
use actix_web::web::{self, Data};
use actix_web::HttpResponse;
use index_scheduler::{IndexScheduler, Report};
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::get().to(list_reports))).service(
web::scope("/{report_uid}")
.service(web::resource("").route(web::get().to(SeqHandler(get_report)))),
);
}
pub async fn list_reports(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_ALL }>, Data<IndexScheduler>>,
) -> Result<HttpResponse, ResponseError> {
let reports = &index_scheduler.reports();
let reports = &reports.read().unwrap();
let reports: Vec<&Report> = reports.iter().collect();
Ok(HttpResponse::Ok().json(reports))
}
pub async fn get_report(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_ALL }>, Data<IndexScheduler>>,
report_id: web::Path<uuid::Uuid>,
) -> Result<HttpResponse, ResponseError> {
let reports = &index_scheduler.reports();
let reports = &reports.read().unwrap();
let report = reports
.find(*report_id)
.ok_or(crate::error::MeilisearchHttpError::ReportNotFound(*report_id))?;
Ok(HttpResponse::Ok().json(report))
}

View File

@@ -2,12 +2,10 @@ use std::collections::{HashMap, HashSet};
use ::time::format_description::well_known::Rfc3339;
use maplit::{hashmap, hashset};
use meilisearch::Opt;
use once_cell::sync::Lazy;
use tempfile::TempDir;
use time::{Duration, OffsetDateTime};
use crate::common::{default_settings, Server, Value};
use crate::common::{Server, Value};
use crate::json;
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
@@ -197,9 +195,7 @@ async fn access_authorized_master_key() {
#[actix_rt::test]
async fn access_authorized_restricted_index() {
let dir = TempDir::new().unwrap();
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
let mut server = Server::new_auth().await;
for ((method, route), actions) in AUTHORIZATIONS.iter() {
for action in actions {
// create a new API key letting only the needed action.

View File

@@ -202,10 +202,6 @@ impl Server {
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
self.service.patch("/experimental-features", value).await
}
pub async fn get_metrics(&self) -> (Value, StatusCode) {
self.service.get("/metrics").await
}
}
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
@@ -225,7 +221,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
skip_index_budget: true,
..Parser::parse_from(None as Option<&str>)
},
experimental_enable_metrics: false,
experimental_enable_metrics: true,
..Parser::parse_from(None as Option<&str>)
}
}

View File

@@ -1,7 +1,4 @@
use meilisearch::Opt;
use tempfile::TempDir;
use crate::common::{default_settings, Server};
use crate::common::Server;
use crate::json;
/// Feature name to test against.
@@ -19,9 +16,7 @@ async fn experimental_features() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false
"vectorStore": false
}
"###);
@@ -31,9 +26,7 @@ async fn experimental_features() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"vectorStore": true
}
"###);
@@ -43,9 +36,7 @@ async fn experimental_features() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"vectorStore": true
}
"###);
@@ -56,9 +47,7 @@ async fn experimental_features() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"vectorStore": true
}
"###);
@@ -69,73 +58,11 @@ async fn experimental_features() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"vectorStore": true
}
"###);
}
#[actix_rt::test]
async fn experimental_feature_metrics() {
// instance flag for metrics enables metrics at startup
let dir = TempDir::new().unwrap();
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
let server = Server::new_with_options(enable_metrics).await.unwrap();
let (response, code) = server.get_features().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": true,
"exportPuffinReports": false
}
"###);
let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
// metrics are not returned in json format
// so the test server will return null
meili_snap::snapshot!(response, @"null");
// disabling metrics results in invalid request
let (response, code) = server.set_features(json!({"metrics": false})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response["metrics"], @"false");
let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// enabling metrics via HTTP results in valid request
let (response, code) = server.set_features(json!({"metrics": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response["metrics"], @"true");
let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null");
// startup without flag respects persisted metrics value
let disable_metrics =
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
let (response, code) = server_no_flag.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null");
}
#[actix_rt::test]
async fn errors() {
let server = Server::new().await;
@@ -146,7 +73,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@@ -1,63 +0,0 @@
use meili_snap::snapshot;
use once_cell::sync::Lazy;
use crate::common::{Server, Value};
use crate::json;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{"productId": 1, "shopId": 1},
{"productId": 2, "shopId": 1},
{"productId": 3, "shopId": 2},
{"productId": 4, "shopId": 2},
{"productId": 5, "shopId": 3},
{"productId": 6, "shopId": 3},
{"productId": 7, "shopId": 4},
{"productId": 8, "shopId": 4},
{"productId": 9, "shopId": 5},
{"productId": 10, "shopId": 5}
])
});
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
#[actix_rt::test]
async fn distinct_search_with_offset_no_ranking() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
index.wait_task(1).await;
fn get_hits(Value(response): Value) -> Vec<i64> {
let hits_array = response["hits"].as_array().unwrap();
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
}
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @"[1, 2]");
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @"[3, 4]");
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"1");
snapshot!(format!("{:?}", hits), @"[5]");
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"0");
}

View File

@@ -1,7 +1,6 @@
// This modules contains all the test concerning search. Each particular feature of the search
// should be tested in its own module to isolate tests and keep the tests readable.
mod distinct;
mod errors;
mod facet_search;
mod formatted;
@@ -817,7 +816,7 @@ async fn experimental_feature_score_details() {
},
"proximity": {
"order": 2,
"score": 0.75
"score": 0.875
},
"attribute": {
"order": 3,

View File

@@ -1,5 +1,4 @@
use std::fs::File;
use std::io::BufReader;
use std::{io, str};
use obkv::KvReader;
@@ -20,14 +19,14 @@ use crate::FieldId;
pub struct EnrichedDocumentsBatchReader<R> {
documents: DocumentsBatchReader<R>,
primary_key: String,
external_ids: grenad::ReaderCursor<BufReader<File>>,
external_ids: grenad::ReaderCursor<File>,
}
impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
pub fn new(
documents: DocumentsBatchReader<R>,
primary_key: String,
external_ids: grenad::Reader<BufReader<File>>,
external_ids: grenad::Reader<File>,
) -> Result<Self, Error> {
if documents.documents_count() as u64 == external_ids.len() {
Ok(EnrichedDocumentsBatchReader {
@@ -76,7 +75,7 @@ pub struct EnrichedDocument<'a> {
pub struct EnrichedDocumentsBatchCursor<R> {
documents: DocumentsBatchCursor<R>,
primary_key: String,
external_ids: grenad::ReaderCursor<BufReader<File>>,
external_ids: grenad::ReaderCursor<File>,
}
impl<R> EnrichedDocumentsBatchCursor<R> {

View File

@@ -2,7 +2,7 @@ use std::cmp;
use crate::{relative_from_absolute_position, Position};
pub const MAX_DISTANCE: u32 = 4;
pub const MAX_DISTANCE: u32 = 8;
pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
if lhs <= rhs {

View File

@@ -46,27 +46,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
if let Some(distinct_fid) = distinct_fid {
let mut excluded = RoaringBitmap::new();
let mut results = vec![];
let mut skip = 0;
for docid in universe.iter() {
if results.len() >= length {
if results.len() >= from + length {
break;
}
if excluded.contains(docid) {
continue;
}
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
skip += 1;
if skip <= from {
continue;
}
results.push(docid);
}
let mut all_candidates = universe - excluded;
all_candidates.extend(results.iter().copied());
return Ok(BucketSortOutput {
scores: vec![Default::default(); results.len()],
docids: results,

View File

@@ -12,7 +12,8 @@ use super::Word;
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
use crate::{
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec,
RoaringBitmapLenCodec, SearchContext,
};
/// A cache storing pointers to values in the LMDB databases.
@@ -259,6 +260,7 @@ impl<'ctx> SearchContext<'ctx> {
word2: Interned<String>,
proximity: u8,
) -> Result<Option<RoaringBitmap>> {
unreachable!();
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, word1, word2),
@@ -278,6 +280,7 @@ impl<'ctx> SearchContext<'ctx> {
word2: Interned<String>,
proximity: u8,
) -> Result<Option<u64>> {
unreachable!();
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
self.txn,
(proximity, word1, word2),
@@ -291,12 +294,23 @@ impl<'ctx> SearchContext<'ctx> {
)
}
pub fn get_db_word_docids_len(&mut self, word: Interned<String>) -> Result<Option<u64>> {
DatabaseCache::get_value::<_, _, RoaringBitmapLenCodec>(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.word_docids,
self.index.word_docids.remap_data_type::<ByteSlice>(),
)
}
pub fn get_db_word_prefix_pair_proximity_docids(
&mut self,
word1: Interned<String>,
prefix2: Interned<String>,
proximity: u8,
) -> Result<Option<RoaringBitmap>> {
unreachable!();
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, word1, prefix2),
@@ -315,6 +329,7 @@ impl<'ctx> SearchContext<'ctx> {
right: Interned<String>,
proximity: u8,
) -> Result<Option<RoaringBitmap>> {
unreachable!();
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, left_prefix, right),

View File

@@ -295,11 +295,11 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
ranking_rules.push(Box::new(Typo::new(None)));
}
crate::Criterion::Proximity => {
if proximity {
continue;
}
proximity = true;
ranking_rules.push(Box::new(Proximity::new(None)));
// if proximity {
continue;
// }
// proximity = true;
// ranking_rules.push(Box::new(Proximity::new(None)));
}
crate::Criterion::Attribute => {
if attribute {

View File

@@ -265,11 +265,11 @@ pub fn partially_initialized_term_from_word(
}
fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Interned<Phrase>>> {
if let Some((l, r)) = split_best_frequency(ctx, word)? {
Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
} else {
Ok(None)
}
// if let Some((l, r)) = split_best_frequency(ctx, word)? {
// Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
// } else {
Ok(None)
// }
}
impl Interned<QueryTerm> {
@@ -416,11 +416,20 @@ fn split_best_frequency(
let left = ctx.word_interner.insert(left.to_owned());
let right = ctx.word_interner.insert(right.to_owned());
if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
if let (Some(l_freq), Some(r_freq)) =
(ctx.get_db_word_docids_len(left)?, ctx.get_db_word_docids_len(right)?)
{
let frequency = l_freq.min(r_freq);
if best.map_or(true, |(old, _, _)| frequency > old) {
best = Some((frequency, left, right));
}
}
// if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
// if best.map_or(true, |(old, _, _)| frequency > old) {
// best = Some((frequency, left, right));
// }
// }
}
Ok(best.map(|(_, left, right)| (left, right)))

View File

@@ -82,41 +82,41 @@ pub fn located_query_terms_from_tokens(
position = position.wrapping_add(7);
}
phrase = 'phrase: {
let phrase = phrase.take();
// phrase = 'phrase: {
// let phrase = phrase.take();
// If we have a hard separator inside a phrase, we immediately start a new phrase
let phrase = if separator_kind == SeparatorKind::Hard {
if let Some(phrase) = phrase {
if let Some(located_query_term) = phrase.build(ctx) {
located_terms.push(located_query_term)
}
Some(PhraseBuilder::empty())
} else {
None
}
} else {
phrase
};
// // If we have a hard separator inside a phrase, we immediately start a new phrase
// let phrase = if separator_kind == SeparatorKind::Hard {
// if let Some(phrase) = phrase {
// if let Some(located_query_term) = phrase.build(ctx) {
// located_terms.push(located_query_term)
// }
// Some(PhraseBuilder::empty())
// } else {
// None
// }
// } else {
// phrase
// };
// We close and start a new phrase depending on the number of double quotes
let mut quote_count = token.lemma().chars().filter(|&s| s == '"').count();
if quote_count == 0 {
break 'phrase phrase;
}
// // We close and start a new phrase depending on the number of double quotes
// let mut quote_count = token.lemma().chars().filter(|&s| s == '"').count();
// if quote_count == 0 {
// break 'phrase phrase;
// }
// Consume the closing quote and the phrase
if let Some(phrase) = phrase {
// Per the check above, quote_count > 0
quote_count -= 1;
if let Some(located_query_term) = phrase.build(ctx) {
located_terms.push(located_query_term)
}
}
// // Consume the closing quote and the phrase
// if let Some(phrase) = phrase {
// // Per the check above, quote_count > 0
// quote_count -= 1;
// if let Some(located_query_term) = phrase.build(ctx) {
// located_terms.push(located_query_term)
// }
// }
// Start new phrase if the token ends with an opening quote
(quote_count % 2 == 1).then_some(PhraseBuilder::empty())
};
// // Start new phrase if the token ends with an opening quote
// (quote_count % 2 == 1).then_some(PhraseBuilder::empty())
// };
}
_ => (),
}

View File

@@ -1,7 +1,6 @@
#![allow(clippy::too_many_arguments)]
use super::ProximityCondition;
use crate::proximity::MAX_DISTANCE;
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::SearchContext;
@@ -36,7 +35,7 @@ pub fn build_edges(
}
let mut conditions = vec![];
for cost in right_ngram_max..(((MAX_DISTANCE as usize) - 1) + right_ngram_max) {
for cost in right_ngram_max..(7 + right_ngram_max) {
conditions.push((
cost as u32,
conditions_interner.insert(ProximityCondition::Uninit {
@@ -48,7 +47,7 @@ pub fn build_edges(
}
conditions.push((
((MAX_DISTANCE - 1) + (right_ngram_max as u32)),
(7 + right_ngram_max) as u32,
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
));

View File

@@ -273,7 +273,7 @@ fn test_proximity_simple() {
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 2, 3, 5, 1, 0]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 5, 2, 3, 0, 1]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
@@ -282,11 +282,11 @@ fn test_proximity_simple() {
"\"the quickbrown fox jumps over the lazy dog\"",
"\"the really quick brown fox jumps over the lazy dog\"",
"\"the really quick brown fox jumps over the very lazy dog\"",
"\"brown quick fox jumps over the lazy dog\"",
"\"the quick brown fox jumps over the lazy. dog\"",
"\"dog the quick brown fox jumps over the lazy\"",
"\"brown quick fox jumps over the lazy dog\"",
"\"the. quick brown fox jumps over the lazy. dog\"",
"\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
"\"the. quick brown fox jumps over the lazy. dog\"",
]
"###);
}
@@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best s");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@@ -382,9 +382,9 @@ fn test_proximity_prefix_db() {
"\"summer best\"",
"\"this is the best meal of summer\"",
"\"summer x best\"",
"\"this is the best meal of the summer\"",
"\"this is the best meal I have ever had in such a beautiful summer day\"",
"\"this is the best cooked meal of the summer\"",
"\"this is the best meal of the summer\"",
"\"summer x y best\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"",
]
@@ -396,7 +396,7 @@ fn test_proximity_prefix_db() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best su");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@@ -406,10 +406,10 @@ fn test_proximity_prefix_db() {
"\"summer best\"",
"\"this is the best meal of summer\"",
"\"summer x best\"",
"\"this is the best meal I have ever had in such a beautiful summer day\"",
"\"this is the best cooked meal of the summer\"",
"\"this is the best meal of the summer\"",
"\"summer x y best\"",
"\"this is the best cooked meal of the summer\"",
"\"this is the best meal I have ever had in such a beautiful summer day\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"",
]
"###);
@@ -447,7 +447,7 @@ fn test_proximity_prefix_db() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wint");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@@ -457,10 +457,10 @@ fn test_proximity_prefix_db() {
"\"winter best\"",
"\"this is the best meal of winter\"",
"\"winter x best\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"",
"\"this is the best cooked meal of the winter\"",
"\"this is the best meal of the winter\"",
"\"winter x y best\"",
"\"this is the best cooked meal of the winter\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"",
]
"###);
@@ -471,7 +471,7 @@ fn test_proximity_prefix_db() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wi");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@@ -481,9 +481,9 @@ fn test_proximity_prefix_db() {
"\"winter best\"",
"\"this is the best meal of winter\"",
"\"winter x best\"",
"\"this is the best meal of the winter\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"",
"\"this is the best cooked meal of the winter\"",
"\"this is the best meal of the winter\"",
"\"winter x y best\"",
]
"###);

View File

@@ -68,8 +68,8 @@ fn test_trap_basic() {
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
Typo(
@@ -82,8 +82,8 @@ fn test_trap_basic() {
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
Typo(

View File

@@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 9,
max_rank: 25,
rank: 35,
max_rank: 57,
},
),
],
@@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 9,
max_rank: 25,
rank: 35,
max_rank: 57,
},
),
],
@@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 9,
max_rank: 25,
rank: 35,
max_rank: 57,
},
),
],

View File

@@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 25,
max_rank: 25,
rank: 57,
max_rank: 57,
},
),
],
@@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 24,
max_rank: 25,
rank: 56,
max_rank: 57,
},
),
],
@@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 9,
max_rank: 25,
rank: 35,
max_rank: 57,
},
),
],
@@ -101,8 +101,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 10,
max_rank: 10,
rank: 22,
max_rank: 22,
},
),
],
@@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 10,
max_rank: 10,
rank: 22,
max_rank: 22,
},
),
],
@@ -153,8 +153,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 10,
max_rank: 10,
rank: 22,
max_rank: 22,
},
),
],
@@ -179,8 +179,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 9,
max_rank: 10,
rank: 21,
max_rank: 22,
},
),
],
@@ -205,8 +205,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 5,
max_rank: 10,
rank: 17,
max_rank: 22,
},
),
],
@@ -231,8 +231,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
),
Proximity(
Rank {
rank: 5,
max_rank: 10,
rank: 17,
max_rank: 22,
},
),
],

View File

@@ -3,35 +3,59 @@ source: milli/src/search/new/tests/proximity.rs
expression: "format!(\"{document_scores:#?}\")"
---
[
[
Proximity(
Rank {
rank: 8,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 7,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 6,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 6,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 5,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 5,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 3,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -39,31 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],

View File

@@ -6,32 +6,40 @@ expression: "format!(\"{document_scores:#?}\")"
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 3,
max_rank: 4,
rank: 7,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
rank: 6,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
rank: 6,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 5,
max_rank: 8,
},
),
],
@@ -39,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -47,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -55,15 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],

View File

@@ -6,32 +6,40 @@ expression: "format!(\"{document_scores:#?}\")"
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 3,
max_rank: 4,
rank: 7,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
rank: 6,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
rank: 6,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 5,
max_rank: 8,
},
),
],
@@ -39,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -47,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -55,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -63,15 +71,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],

View File

@@ -3,35 +3,59 @@ source: milli/src/search/new/tests/proximity.rs
expression: "format!(\"{document_scores:#?}\")"
---
[
[
Proximity(
Rank {
rank: 8,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 7,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 6,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 6,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 5,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 5,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 3,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -39,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -47,31 +71,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],

View File

@@ -7,7 +7,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -15,7 +15,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -47,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -55,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -63,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],

View File

@@ -6,24 +6,24 @@ expression: "format!(\"{document_scores:#?}\")"
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
@@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],
@@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],

View File

@@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
@@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],

View File

@@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
[
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
@@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 4,
max_rank: 8,
},
),
],

View File

@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 1,
max_rank: 4,
rank: 5,
max_rank: 8,
},
),
],
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 3,
max_rank: 4,
rank: 7,
max_rank: 8,
},
),
],

View File

@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 7,
max_rank: 7,
rank: 15,
max_rank: 15,
},
),
],
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 4,
max_rank: 7,
rank: 8,
max_rank: 15,
},
),
],

View File

@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 22,
max_rank: 22,
rank: 50,
max_rank: 50,
},
),
],
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 22,
max_rank: 22,
rank: 50,
max_rank: 50,
},
),
],
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 21,
max_rank: 22,
rank: 49,
max_rank: 50,
},
),
],
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 21,
max_rank: 22,
rank: 49,
max_rank: 50,
},
),
],
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 20,
max_rank: 22,
rank: 48,
max_rank: 50,
},
),
],
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 17,
max_rank: 22,
rank: 41,
max_rank: 50,
},
),
],
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 16,
max_rank: 22,
rank: 40,
max_rank: 50,
},
),
],
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 19,
max_rank: 19,
rank: 43,
max_rank: 43,
},
),
],
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 16,
max_rank: 16,
rank: 36,
max_rank: 36,
},
),
],
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 13,
max_rank: 16,
rank: 31,
max_rank: 36,
},
),
],
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 10,
max_rank: 10,
rank: 22,
max_rank: 22,
},
),
],
@@ -166,8 +166,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 7,
max_rank: 7,
rank: 15,
max_rank: 15,
},
),
],
@@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 7,
max_rank: 7,
rank: 15,
max_rank: 15,
},
),
],
@@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 7,
max_rank: 7,
rank: 15,
max_rank: 15,
},
),
],
@@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 4,
max_rank: 4,
rank: 8,
max_rank: 8,
},
),
],

View File

@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 19,
max_rank: 19,
rank: 43,
max_rank: 43,
},
),
],
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 19,
max_rank: 19,
rank: 43,
max_rank: 43,
},
),
],
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 18,
max_rank: 19,
rank: 42,
max_rank: 43,
},
),
],
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 18,
max_rank: 19,
rank: 42,
max_rank: 43,
},
),
],
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 17,
max_rank: 19,
rank: 41,
max_rank: 43,
},
),
],
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 14,
max_rank: 19,
rank: 34,
max_rank: 43,
},
),
],
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 13,
max_rank: 19,
rank: 33,
max_rank: 43,
},
),
],
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 16,
max_rank: 16,
rank: 36,
max_rank: 36,
},
),
],
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 13,
max_rank: 13,
rank: 29,
max_rank: 29,
},
),
],
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 10,
max_rank: 13,
rank: 24,
max_rank: 29,
},
),
],
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 7,
max_rank: 7,
rank: 15,
max_rank: 15,
},
),
],

View File

@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 25,
max_rank: 25,
rank: 57,
max_rank: 57,
},
),
],
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 25,
max_rank: 25,
rank: 57,
max_rank: 57,
},
),
],
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 24,
max_rank: 25,
rank: 56,
max_rank: 57,
},
),
],
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 24,
max_rank: 25,
rank: 56,
max_rank: 57,
},
),
],
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 23,
max_rank: 25,
rank: 55,
max_rank: 57,
},
),
],
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 22,
max_rank: 25,
rank: 54,
max_rank: 57,
},
),
],
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 21,
max_rank: 25,
rank: 53,
max_rank: 57,
},
),
],
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 20,
max_rank: 25,
rank: 52,
max_rank: 57,
},
),
],
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 20,
max_rank: 25,
rank: 51,
max_rank: 57,
},
),
],
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 19,
max_rank: 25,
rank: 48,
max_rank: 57,
},
),
],
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 19,
max_rank: 25,
rank: 47,
max_rank: 57,
},
),
],
@@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 25,
max_rank: 57,
},
),
],
@@ -178,6 +178,62 @@ expression: "format!(\"{document_scores:#?}\")"
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 50,
max_rank: 50,
},
),
],
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 43,
max_rank: 43,
},
),
],
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 38,
max_rank: 43,
},
),
],
[
Words(
Words {
matching_words: 5,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 29,
max_rank: 29,
},
),
],
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 22,
@@ -188,42 +244,14 @@ expression: "format!(\"{document_scores:#?}\")"
[
Words(
Words {
matching_words: 7,
matching_words: 4,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 19,
max_rank: 19,
},
),
],
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 16,
max_rank: 19,
},
),
],
[
Words(
Words {
matching_words: 5,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 13,
max_rank: 13,
rank: 22,
max_rank: 22,
},
),
],
@@ -236,36 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 10,
max_rank: 10,
},
),
],
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 10,
max_rank: 10,
},
),
],
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 10,
max_rank: 10,
rank: 22,
max_rank: 22,
},
),
],
@@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 7,
max_rank: 7,
rank: 15,
max_rank: 15,
},
),
],

View File

@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 25,
max_rank: 25,
rank: 57,
max_rank: 57,
},
),
],
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 24,
max_rank: 25,
rank: 56,
max_rank: 57,
},
),
],
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 23,
max_rank: 25,
rank: 55,
max_rank: 57,
},
),
],
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 22,
max_rank: 25,
rank: 54,
max_rank: 57,
},
),
],
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 22,
max_rank: 25,
rank: 54,
max_rank: 57,
},
),
],
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 22,
max_rank: 25,
rank: 54,
max_rank: 57,
},
),
],
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 21,
max_rank: 25,
rank: 53,
max_rank: 57,
},
),
],
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 21,
max_rank: 25,
rank: 53,
max_rank: 57,
},
),
],
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 20,
max_rank: 25,
rank: 52,
max_rank: 57,
},
),
],
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 18,
max_rank: 25,
rank: 47,
max_rank: 57,
},
),
],
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 18,
max_rank: 25,
rank: 45,
max_rank: 57,
},
),
],
@@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 25,
max_rank: 57,
},
),
],
@@ -178,6 +178,62 @@ expression: "format!(\"{document_scores:#?}\")"
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 47,
max_rank: 50,
},
),
],
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 40,
max_rank: 43,
},
),
],
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 35,
max_rank: 43,
},
),
],
[
Words(
Words {
matching_words: 5,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 26,
max_rank: 29,
},
),
],
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 19,
@@ -188,42 +244,14 @@ expression: "format!(\"{document_scores:#?}\")"
[
Words(
Words {
matching_words: 7,
matching_words: 4,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 16,
max_rank: 19,
},
),
],
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 13,
max_rank: 19,
},
),
],
[
Words(
Words {
matching_words: 5,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 10,
max_rank: 13,
rank: 19,
max_rank: 22,
},
),
],
@@ -236,36 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 7,
max_rank: 10,
},
),
],
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 7,
max_rank: 10,
},
),
],
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
Proximity(
Rank {
rank: 7,
max_rank: 10,
rank: 19,
max_rank: 22,
},
),
],
@@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
),
Proximity(
Rank {
rank: 5,
max_rank: 7,
rank: 13,
max_rank: 15,
},
),
],

View File

@@ -6,88 +6,88 @@ expression: "format!(\"{document_scores:#?}\")"
[
Proximity(
Rank {
rank: 25,
max_rank: 25,
rank: 57,
max_rank: 57,
},
),
],
[
Proximity(
Rank {
rank: 25,
max_rank: 25,
rank: 57,
max_rank: 57,
},
),
],
[
Proximity(
Rank {
rank: 24,
max_rank: 25,
rank: 56,
max_rank: 57,
},
),
],
[
Proximity(
Rank {
rank: 24,
max_rank: 25,
rank: 56,
max_rank: 57,
},
),
],
[
Proximity(
Rank {
rank: 23,
max_rank: 25,
rank: 55,
max_rank: 57,
},
),
],
[
Proximity(
Rank {
rank: 22,
max_rank: 25,
rank: 54,
max_rank: 57,
},
),
],
[
Proximity(
Rank {
rank: 21,
max_rank: 25,
rank: 53,
max_rank: 57,
},
),
],
[
Proximity(
Rank {
rank: 20,
max_rank: 25,
rank: 52,
max_rank: 57,
},
),
],
[
Proximity(
Rank {
rank: 20,
max_rank: 25,
rank: 51,
max_rank: 57,
},
),
],
[
Proximity(
Rank {
rank: 19,
max_rank: 25,
rank: 48,
max_rank: 57,
},
),
],
[
Proximity(
Rank {
rank: 19,
max_rank: 25,
rank: 47,
max_rank: 57,
},
),
],
@@ -95,7 +95,7 @@ expression: "format!(\"{document_scores:#?}\")"
Proximity(
Rank {
rank: 1,
max_rank: 25,
max_rank: 57,
},
),
],

View File

@@ -259,8 +259,8 @@ fn test_ignore_stop_words() {
),
Proximity(
Rank {
rank: 3,
max_rank: 4,
rank: 7,
max_rank: 8,
},
),
Fid(
@@ -411,8 +411,8 @@ fn test_stop_words_in_phrase() {
),
Proximity(
Rank {
rank: 2,
max_rank: 4,
rank: 6,
max_rank: 8,
},
),
Fid(

View File

@@ -277,7 +277,7 @@ fn test_words_proximity_tms_last_simple() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
// 7 is better than 6 because of the proximity between "the" and its surrounding terms
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
@@ -289,10 +289,10 @@ fn test_words_proximity_tms_last_simple() {
"\"the mighty and quick brown fox jumps over the lazy dog\"",
"\"the brown quick fox jumps over the lazy dog\"",
"\"the brown quick fox jumps over the really lazy dog\"",
"\"this quick brown and scary fox jumps over the lazy dog\"",
"\"the brown quick fox immediately jumps over the really lazy dog\"",
"\"this quick brown and very scary fox jumps over the lazy dog\"",
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
"\"this quick brown and scary fox jumps over the lazy dog\"",
"\"this quick brown and very scary fox jumps over the lazy dog\"",
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
"\"the quick brown fox jumps over the lazy\"",
"\"the quick brown fox jumps over the\"",
@@ -312,7 +312,7 @@ fn test_words_proximity_tms_last_simple() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
// 10 is better than 9 because of the proximity between "quick" and "brown"
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 15, 16, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
@@ -326,8 +326,8 @@ fn test_words_proximity_tms_last_simple() {
"\"the great quick brown fox jumps over the lazy dog\"",
"\"the quick brown fox jumps over the really lazy dog\"",
"\"the mighty and quick brown fox jumps over the lazy dog\"",
"\"this quick brown and very scary fox jumps over the lazy dog\"",
"\"this quick brown and scary fox jumps over the lazy dog\"",
"\"this quick brown and very scary fox jumps over the lazy dog\"",
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
"\"the quick brown fox jumps over the lazy\"",
"\"the quick brown fox jumps over the\"",
@@ -427,7 +427,7 @@ fn test_words_tms_all() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
@@ -439,10 +439,10 @@ fn test_words_tms_all() {
"\"the mighty and quick brown fox jumps over the lazy dog\"",
"\"the brown quick fox jumps over the lazy dog\"",
"\"the brown quick fox jumps over the really lazy dog\"",
"\"this quick brown and scary fox jumps over the lazy dog\"",
"\"the brown quick fox immediately jumps over the really lazy dog\"",
"\"this quick brown and very scary fox jumps over the lazy dog\"",
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
"\"this quick brown and scary fox jumps over the lazy dog\"",
"\"this quick brown and very scary fox jumps over the lazy dog\"",
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
]
"###);

View File

@@ -108,17 +108,15 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
self.delete_document(docid);
Some(docid)
}
pub fn execute(self) -> Result<DocumentDeletionResult> {
puffin::profile_function!();
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
self.execute_inner()?;
Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
}
pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
puffin::profile_function!();
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
// We retrieve the current documents ids that are in the database.
@@ -478,8 +476,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
C: for<'a> BytesDecode<'a, DItem = RoaringBitmap>
+ for<'a> BytesEncode<'a, EItem = RoaringBitmap>,
{
puffin::profile_function!();
while let Some(result) = iter.next() {
let (bytes, mut docids) = result?;
let previous_len = docids.len();
@@ -502,8 +498,6 @@ fn remove_from_word_prefix_docids(
db: &Database<Str, RoaringBitmapCodec>,
to_remove: &RoaringBitmap,
) -> Result<fst::Set<Vec<u8>>> {
puffin::profile_function!();
let mut prefixes_to_delete = fst::SetBuilder::memory();
// We iterate over the word prefix docids database and remove the deleted documents ids
@@ -534,8 +528,6 @@ fn remove_from_word_docids(
words_to_keep: &mut BTreeSet<String>,
words_to_remove: &mut BTreeSet<String>,
) -> Result<()> {
puffin::profile_function!();
// We create an iterator to be able to get the content and delete the word docids.
// It's faster to acquire a cursor to get and delete or put, as we avoid traversing
// the LMDB B-Tree two times but only once.
@@ -567,8 +559,6 @@ fn remove_docids_from_field_id_docid_facet_value(
field_id: FieldId,
to_remove: &RoaringBitmap,
) -> heed::Result<HashSet<Vec<u8>>> {
puffin::profile_function!();
let db = match facet_type {
FacetType::String => {
index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
@@ -604,8 +594,6 @@ fn remove_docids_from_facet_id_docids<'a, C>(
where
C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
{
puffin::profile_function!();
let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
while let Some(result) = iter.next() {
let (bytes, mut docids) = result?;

View File

@@ -1,6 +1,5 @@
use std::borrow::Cow;
use std::fs::File;
use std::io::BufReader;
use grenad::CompressionType;
use heed::types::ByteSlice;
@@ -31,7 +30,7 @@ pub struct FacetsUpdateBulk<'i> {
facet_type: FacetType,
field_ids: Vec<FieldId>,
// None if level 0 does not need to be updated
new_data: Option<grenad::Reader<BufReader<File>>>,
new_data: Option<grenad::Reader<File>>,
}
impl<'i> FacetsUpdateBulk<'i> {
@@ -39,7 +38,7 @@ impl<'i> FacetsUpdateBulk<'i> {
index: &'i Index,
field_ids: Vec<FieldId>,
facet_type: FacetType,
new_data: grenad::Reader<BufReader<File>>,
new_data: grenad::Reader<File>,
group_size: u8,
min_level_size: u8,
) -> FacetsUpdateBulk<'i> {
@@ -188,7 +187,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
&self,
field_id: FieldId,
txn: &RoTxn,
) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
let mut all_docids = RoaringBitmap::new();
let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
for bitmap in bitmaps {
@@ -260,7 +259,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
field_id: u16,
level: u8,
handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
) -> Result<Vec<grenad::Reader<File>>> {
if level == 0 {
self.read_level_0(rtxn, field_id, handle_group)?;
// Level 0 is already in the database

View File

@@ -1,6 +1,5 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::BufReader;
use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesDecode, Error, RoTxn, RwTxn};
@@ -35,14 +34,14 @@ pub struct FacetsUpdateIncremental<'i> {
index: &'i Index,
inner: FacetsUpdateIncrementalInner,
facet_type: FacetType,
new_data: grenad::Reader<BufReader<File>>,
new_data: grenad::Reader<File>,
}
impl<'i> FacetsUpdateIncremental<'i> {
pub fn new(
index: &'i Index,
facet_type: FacetType,
new_data: grenad::Reader<BufReader<File>>,
new_data: grenad::Reader<File>,
group_size: u8,
min_level_size: u8,
max_group_size: u8,

View File

@@ -78,7 +78,6 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
use std::collections::BTreeSet;
use std::fs::File;
use std::io::BufReader;
use std::iter::FromIterator;
use charabia::normalizer::{Normalize, NormalizerOption};
@@ -109,17 +108,13 @@ pub struct FacetsUpdate<'i> {
index: &'i Index,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
facet_type: FacetType,
new_data: grenad::Reader<BufReader<File>>,
new_data: grenad::Reader<File>,
group_size: u8,
max_group_size: u8,
min_level_size: u8,
}
impl<'i> FacetsUpdate<'i> {
pub fn new(
index: &'i Index,
facet_type: FacetType,
new_data: grenad::Reader<BufReader<File>>,
) -> Self {
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
let database = match facet_type {
FacetType::String => index
.facet_id_string_docids

View File

@@ -1,4 +1,4 @@
use std::io::{BufWriter, Read, Seek};
use std::io::{Read, Seek};
use std::result::Result as StdResult;
use std::{fmt, iter};
@@ -35,7 +35,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];
// The primary key *field id* that has already been set for this index or the one

View File

@@ -1,7 +1,6 @@
use std::collections::{HashMap, HashSet};
use std::convert::TryInto;
use std::fs::File;
use std::io::BufReader;
use std::{io, mem, str};
use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
@@ -32,7 +31,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
allowed_separators: Option<&[&str]>,
dictionary: Option<&[&str]>,
max_positions_per_attributes: Option<u32>,
) -> Result<(RoaringBitmap, grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
puffin::profile_function!();
let max_positions_per_attributes = max_positions_per_attributes

View File

@@ -1,5 +1,5 @@
use std::fs::File;
use std::io::{self, BufReader};
use std::io;
use heed::{BytesDecode, BytesEncode};
@@ -19,7 +19,7 @@ use crate::Result;
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
docid_fid_facet_number: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<BufReader<File>>> {
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();

View File

@@ -1,5 +1,5 @@
use std::fs::File;
use std::io::{self, BufReader};
use std::io;
use heed::BytesEncode;
@@ -17,7 +17,7 @@ use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
docid_fid_facet_string: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<BufReader<File>>> {
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();

View File

@@ -1,7 +1,7 @@
use std::collections::{BTreeMap, HashSet};
use std::convert::TryInto;
use std::fs::File;
use std::io::{self, BufReader};
use std::io;
use std::mem::size_of;
use heed::zerocopy::AsBytes;
@@ -17,11 +17,11 @@ use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET
/// The extracted facet values stored in grenad files by type.
pub struct ExtractedFacetValues {
pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
}
/// Extracts the facet values of each faceted field of each document.

View File

@@ -1,6 +1,6 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::{self, BufReader};
use std::io;
use grenad::Sorter;
@@ -21,7 +21,7 @@ use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<BufReader<File>>> {
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();

View File

@@ -1,5 +1,5 @@
use std::fs::File;
use std::io::{self, BufReader};
use std::io;
use concat_arrays::concat_arrays;
use serde_json::Value;
@@ -18,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
indexer: GrenadParameters,
primary_key_id: FieldId,
(lat_fid, lng_fid): (FieldId, FieldId),
) -> Result<grenad::Reader<BufReader<File>>> {
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let mut writer = create_writer(

View File

@@ -1,6 +1,6 @@
use std::convert::TryFrom;
use std::fs::File;
use std::io::{self, BufReader};
use std::io;
use bytemuck::cast_slice;
use serde_json::{from_slice, Value};
@@ -18,7 +18,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
indexer: GrenadParameters,
primary_key_id: FieldId,
vectors_fid: FieldId,
) -> Result<grenad::Reader<BufReader<File>>> {
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let mut writer = create_writer(

View File

@@ -1,6 +1,6 @@
use std::collections::HashSet;
use std::fs::File;
use std::io::{self, BufReader};
use std::io;
use std::iter::FromIterator;
use roaring::RoaringBitmap;
@@ -26,7 +26,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
exact_attributes: &HashSet<FieldId>,
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();

View File

@@ -1,5 +1,5 @@
use std::fs::File;
use std::io::{self, BufReader};
use std::io;
use super::helpers::{
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
@@ -14,7 +14,7 @@ use crate::{relative_from_absolute_position, DocumentId, Result};
pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<BufReader<File>>> {
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();

View File

@@ -1,7 +1,6 @@
use std::cmp::Ordering;
use std::collections::{BinaryHeap, HashMap};
use std::fs::File;
use std::io::BufReader;
use std::{cmp, io, mem, str, vec};
use super::helpers::{
@@ -21,7 +20,7 @@ use crate::{DocumentId, Result};
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<BufReader<File>>> {
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();

View File

@@ -1,5 +1,5 @@
use std::fs::File;
use std::io::{self, BufReader};
use std::io;
use super::helpers::{
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
@@ -17,7 +17,7 @@ use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Resu
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<BufReader<File>>> {
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();

View File

@@ -12,7 +12,6 @@ mod extract_word_position_docids;
use std::collections::HashSet;
use std::fs::File;
use std::io::BufReader;
use crossbeam_channel::Sender;
use log::debug;
@@ -40,8 +39,8 @@ use crate::{FieldId, Result};
/// Send data in grenad file over provided Sender.
#[allow(clippy::too_many_arguments)]
pub(crate) fn data_from_obkv_documents(
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>,
searchable_fields: Option<HashSet<FieldId>>,
@@ -153,17 +152,17 @@ pub(crate) fn data_from_obkv_documents(
});
}
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
docid_word_positions_chunks.clone(),
indexer,
lmdb_writer_sx.clone(),
extract_word_pair_proximity_docids,
merge_cbo_roaring_bitmaps,
TypedChunk::WordPairProximityDocids,
"word-pair-proximity-docids",
);
// spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
// docid_word_positions_chunks.clone(),
// indexer,
// lmdb_writer_sx.clone(),
// extract_word_pair_proximity_docids,
// merge_cbo_roaring_bitmaps,
// TypedChunk::WordPairProximityDocids,
// "word-pair-proximity-docids",
// );
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_word_positions_chunks.clone(),
indexer,
lmdb_writer_sx.clone(),
@@ -173,11 +172,7 @@ pub(crate) fn data_from_obkv_documents(
"field-id-wordcount-docids",
);
spawn_extraction_task::<
_,
_,
Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)>,
>(
spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
docid_word_positions_chunks.clone(),
indexer,
lmdb_writer_sx.clone(),
@@ -190,7 +185,7 @@ pub(crate) fn data_from_obkv_documents(
"word-docids",
);
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_word_positions_chunks.clone(),
indexer,
lmdb_writer_sx.clone(),
@@ -199,7 +194,7 @@ pub(crate) fn data_from_obkv_documents(
TypedChunk::WordPositionDocids,
"word-position-docids",
);
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_word_positions_chunks,
indexer,
lmdb_writer_sx.clone(),
@@ -209,7 +204,7 @@ pub(crate) fn data_from_obkv_documents(
"word-fid-docids",
);
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_fid_facet_strings_chunks,
indexer,
lmdb_writer_sx.clone(),
@@ -219,7 +214,7 @@ pub(crate) fn data_from_obkv_documents(
"field-id-facet-string-docids",
);
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_fid_facet_numbers_chunks,
indexer,
lmdb_writer_sx,
@@ -274,7 +269,7 @@ fn spawn_extraction_task<FE, FS, M>(
/// Extract chunked data and send it into lmdb_writer_sx sender:
/// - documents
fn send_original_documents_data(
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
original_documents_chunk: Result<grenad::Reader<File>>,
indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>,
vectors_field_id: Option<FieldId>,
@@ -316,7 +311,7 @@ fn send_original_documents_data(
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn send_and_extract_flattened_documents_data(
flattened_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
flattened_documents_chunk: Result<grenad::Reader<File>>,
indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>,
searchable_fields: &Option<HashSet<FieldId>>,
@@ -333,10 +328,7 @@ fn send_and_extract_flattened_documents_data(
grenad::Reader<CursorClonableMmap>,
(
grenad::Reader<CursorClonableMmap>,
(
grenad::Reader<BufReader<File>>,
(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>),
),
(grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
),
),
)> {

View File

@@ -1,6 +1,6 @@
use std::borrow::Cow;
use std::fs::File;
use std::io::{self, BufReader, BufWriter, Seek};
use std::io::{self, Seek};
use std::time::Instant;
use grenad::{CompressionType, Sorter};
@@ -17,13 +17,13 @@ pub fn create_writer<R: io::Write>(
typ: grenad::CompressionType,
level: Option<u32>,
file: R,
) -> grenad::Writer<BufWriter<R>> {
) -> grenad::Writer<R> {
let mut builder = grenad::Writer::builder();
builder.compression_type(typ);
if let Some(level) = level {
builder.compression_level(level);
}
builder.build(BufWriter::new(file))
builder.build(file)
}
pub fn create_sorter(
@@ -53,7 +53,7 @@ pub fn create_sorter(
pub fn sorter_into_reader(
sorter: grenad::Sorter<MergeFn>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<BufReader<File>>> {
) -> Result<grenad::Reader<File>> {
let mut writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
@@ -64,18 +64,16 @@ pub fn sorter_into_reader(
writer_into_reader(writer)
}
pub fn writer_into_reader(
writer: grenad::Writer<BufWriter<File>>,
) -> Result<grenad::Reader<BufReader<File>>> {
let mut file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
let mut file = writer.into_inner()?;
file.rewind()?;
grenad::Reader::new(BufReader::new(file)).map_err(Into::into)
grenad::Reader::new(file).map_err(Into::into)
}
pub unsafe fn as_cloneable_grenad(
reader: &grenad::Reader<BufReader<File>>,
reader: &grenad::Reader<File>,
) -> Result<grenad::Reader<CursorClonableMmap>> {
let file = reader.get_ref().get_ref();
let file = reader.get_ref();
let mmap = memmap2::Mmap::map(file)?;
let cursor = io::Cursor::new(ClonableMmap::from(mmap));
let reader = grenad::Reader::new(cursor)?;
@@ -91,8 +89,8 @@ where
fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
}
impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
type Output = grenad::Reader<BufReader<File>>;
impl MergeableReader for Vec<grenad::Reader<File>> {
type Output = grenad::Reader<File>;
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
let mut merger = MergerBuilder::new(merge_fn);
@@ -101,8 +99,8 @@ impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
}
}
impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
type Output = (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>);
impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
type Output = (grenad::Reader<File>, grenad::Reader<File>);
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
let mut m1 = MergerBuilder::new(merge_fn);
@@ -127,7 +125,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
Ok(())
}
fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<BufReader<File>>> {
fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<File>> {
let merger = self.0.build();
let mut writer = create_writer(
params.chunk_compression_type,
@@ -178,7 +176,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
reader: grenad::Reader<R>,
indexer: GrenadParameters,
documents_chunk_size: usize,
) -> Result<impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>>> {
) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> {
let mut continue_reading = true;
let mut cursor = reader.into_cursor()?;

View File

@@ -659,10 +659,8 @@ impl<'a, 'i> Transform<'a, 'i> {
new_documents_ids: self.new_documents_ids,
replaced_documents_ids: self.replaced_documents_ids,
documents_count: self.documents_count,
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
flattened_documents: flattened_documents
.into_inner()
.map_err(|err| err.into_error())?,
original_documents,
flattened_documents,
})
}
@@ -781,10 +779,8 @@ impl<'a, 'i> Transform<'a, 'i> {
new_documents_ids: documents_ids,
replaced_documents_ids: RoaringBitmap::default(),
documents_count,
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
flattened_documents: flattened_documents
.into_inner()
.map_err(|err| err.into_error())?,
original_documents,
flattened_documents,
};
let new_facets = output.compute_real_facets(wtxn, self.index)?;

View File

@@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::collections::HashMap;
use std::convert::TryInto;
use std::fs::File;
use std::io::{self, BufReader};
use std::io;
use bytemuck::allocation::pod_collect_to_vec;
use charabia::{Language, Script};
@@ -27,22 +27,22 @@ pub(crate) enum TypedChunk {
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
Documents(grenad::Reader<CursorClonableMmap>),
FieldIdWordcountDocids(grenad::Reader<BufReader<File>>),
FieldIdWordcountDocids(grenad::Reader<File>),
NewDocumentsIds(RoaringBitmap),
WordDocids {
word_docids_reader: grenad::Reader<BufReader<File>>,
exact_word_docids_reader: grenad::Reader<BufReader<File>>,
word_docids_reader: grenad::Reader<File>,
exact_word_docids_reader: grenad::Reader<File>,
},
WordPositionDocids(grenad::Reader<BufReader<File>>),
WordFidDocids(grenad::Reader<BufReader<File>>),
WordPairProximityDocids(grenad::Reader<BufReader<File>>),
FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>),
FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>),
FieldIdFacetExistsDocids(grenad::Reader<BufReader<File>>),
FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
GeoPoints(grenad::Reader<BufReader<File>>),
VectorPoints(grenad::Reader<BufReader<File>>),
WordPositionDocids(grenad::Reader<File>),
WordFidDocids(grenad::Reader<File>),
WordPairProximityDocids(grenad::Reader<File>),
FieldIdFacetStringDocids(grenad::Reader<File>),
FieldIdFacetNumberDocids(grenad::Reader<File>),
FieldIdFacetExistsDocids(grenad::Reader<File>),
FieldIdFacetIsNullDocids(grenad::Reader<File>),
FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
GeoPoints(grenad::Reader<File>),
VectorPoints(grenad::Reader<File>),
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
}

View File

@@ -1,6 +1,6 @@
use std::borrow::Cow;
use std::collections::HashSet;
use std::io::{BufReader, BufWriter};
use std::io::BufReader;
use grenad::CompressionType;
use heed::types::ByteSlice;
@@ -119,9 +119,9 @@ pub fn insert_into_database(
pub fn write_into_lmdb_database_without_merging(
wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase,
writer: grenad::Writer<BufWriter<std::fs::File>>,
writer: grenad::Writer<std::fs::File>,
) -> Result<()> {
let file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
let file = writer.into_inner()?;
let reader = grenad::Reader::new(BufReader::new(file))?;
if database.is_empty(wtxn)? {
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;

View File

@@ -20,4 +20,7 @@ source: milli/src/update/prefix_word_pairs/mod.rs
3 at a [100, ]
3 rings a [101, ]
3 the a [101, ]
4 at b [100, ]
4 at be [100, ]
4 bell a [101, ]

View File

@@ -30,4 +30,10 @@ source: milli/src/update/prefix_word_pairs/mod.rs
3 bell 5 [101, ]
3 rings am [101, ]
3 the at [101, ]
4 an house [100, ]
4 at beautiful [100, ]
4 bell am [101, ]
4 the 5 [101, ]
5 at house [100, ]
5 the am [101, ]

View File

@@ -28,4 +28,8 @@ source: milli/src/update/prefix_word_pairs/mod.rs
3 rings a [101, ]
3 rings am [101, ]
3 the a [101, ]
4 at b [100, ]
4 at be [100, ]
4 bell a [101, ]
4 bell am [101, ]

View File

@@ -7,4 +7,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
2 bell a [51, ]
3 rings a [51, ]
3 the a [51, ]
4 bell a [51, ]

View File

@@ -12,4 +12,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
3 at a [50, ]
3 rings a [51, ]
3 the a [51, ]
4 bell a [51, ]

View File

@@ -7,4 +7,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
2 bell a [51, ]
3 rings a [51, ]
3 the a [51, ]
4 bell a [51, ]

View File

@@ -12,4 +12,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
3 at a [50, ]
3 rings a [51, ]
3 the a [51, ]
4 bell a [51, ]

View File

@@ -12,4 +12,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
3 at a [50, ]
3 rings a [51, ]
3 the a [51, ]
4 bell a [51, ]

Some files were not shown because too many files have changed in this diff Show More