Merge pull request #5218 from meilisearch/upgrade-dependencies
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m17s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Look for flaky tests / flaky (push) Failing after 9s
Publish binaries to GitHub release / Check the version validity (push) Successful in 8s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 8s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 9s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Failing after 28s
Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 13s
Test suite / Tests almost all features (push) Failing after 8s
Test suite / Test disabled tokenization (push) Failing after 7s
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Rustfmt (push) Successful in 2m26s
Test suite / Run Clippy (push) Successful in 5m40s

Upgrade dependencies
This commit is contained in:
Clément Renault
2025-01-09 11:46:44 +01:00
committed by GitHub
65 changed files with 1068 additions and 994 deletions

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 180 # 3h timeout-minutes: 180 # 3h
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal

View File

@@ -66,7 +66,7 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }} ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal

View File

@@ -12,7 +12,7 @@ jobs:
timeout-minutes: 180 # 3h timeout-minutes: 180 # 3h
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal

View File

@@ -44,7 +44,7 @@ jobs:
exit 1 exit 1
fi fi
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal

View File

@@ -16,7 +16,7 @@ jobs:
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks runs-on: benchmarks
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks runs-on: benchmarks
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks runs-on: benchmarks
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal

View File

@@ -17,7 +17,7 @@ jobs:
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
- name: Install cargo-flaky - name: Install cargo-flaky
run: cargo install cargo-flaky run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps - name: Run cargo flaky in the dumps

View File

@@ -12,7 +12,7 @@ jobs:
timeout-minutes: 4320 # 72h timeout-minutes: 4320 # 72h
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal

View File

@@ -25,7 +25,7 @@ jobs:
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
- name: Install cargo-deb - name: Install cargo-deb
run: cargo install cargo-deb run: cargo install cargo-deb
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@@ -45,7 +45,7 @@ jobs:
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
- name: Build - name: Build
run: cargo build --release --locked run: cargo build --release --locked
# No need to upload binaries for dry run (cron) # No need to upload binaries for dry run (cron)
@@ -75,7 +75,7 @@ jobs:
asset_name: meilisearch-windows-amd64.exe asset_name: meilisearch-windows-amd64.exe
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
- name: Build - name: Build
run: cargo build --release --locked run: cargo build --release --locked
# No need to upload binaries for dry run (cron) # No need to upload binaries for dry run (cron)
@@ -101,7 +101,7 @@ jobs:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v3 uses: actions/checkout@v3
- name: Installing Rust toolchain - name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.79 uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal
target: ${{ matrix.target }} target: ${{ matrix.target }}
@@ -148,7 +148,7 @@ jobs:
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain - name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.79 uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal
target: ${{ matrix.target }} target: ${{ matrix.target }}

View File

@@ -31,7 +31,7 @@ jobs:
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- name: Setup test with Rust stable - name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.79 uses: dtolnay/rust-toolchain@1.81
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7 uses: Swatinem/rust-cache@v2.7.7
- name: Run cargo check without any default features - name: Run cargo check without any default features
@@ -56,7 +56,7 @@ jobs:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7 uses: Swatinem/rust-cache@v2.7.7
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
- name: Run cargo check without any default features - name: Run cargo check without any default features
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
@@ -81,7 +81,7 @@ jobs:
run: | run: |
apt-get update apt-get update
apt-get install --assume-yes build-essential curl apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
- name: Run cargo build with almost all features - name: Run cargo build with almost all features
run: | run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
@@ -101,7 +101,7 @@ jobs:
run: | run: |
apt-get update apt-get update
apt-get install --assume-yes build-essential curl apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
- name: Run cargo tree without default features and check lindera is not present - name: Run cargo tree without default features and check lindera is not present
run: | run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@@ -125,7 +125,7 @@ jobs:
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7 uses: Swatinem/rust-cache@v2.7.7
- name: Run tests in debug - name: Run tests in debug
@@ -139,7 +139,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal
components: clippy components: clippy
@@ -156,7 +156,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal
toolchain: nightly-2024-07-09 toolchain: nightly-2024-07-09

View File

@@ -18,7 +18,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79 - uses: dtolnay/rust-toolchain@1.81
with: with:
profile: minimal profile: minimal
- name: Install sd - name: Install sd

1
.gitignore vendored
View File

@@ -10,6 +10,7 @@
/dumps /dumps
/bench /bench
/_xtask_benchmark.ms /_xtask_benchmark.ms
/benchmarks
# Snapshots # Snapshots
## ... large ## ... large

1169
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
# Compile # Compile
FROM rust:1.79.0-alpine3.20 AS compiler FROM rust:1.81.0-alpine3.20 AS compiler
RUN apk add -q --no-cache build-base openssl-dev RUN apk add -q --no-cache build-base openssl-dev

View File

@@ -11,27 +11,27 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.95"
bumpalo = "3.16.0" bumpalo = "3.16.0"
csv = "1.3.0" csv = "1.3.1"
memmap2 = "0.9.5" memmap2 = "0.9.5"
milli = { path = "../milli" } milli = { path = "../milli" }
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.43", default-features = false }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.135", features = ["preserve_order"] }
tempfile = "3.14.0" tempfile = "3.15.0"
[dev-dependencies] [dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] } criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5" rand = "0.8.5"
rand_chacha = "0.3.1" rand_chacha = "0.3.1"
roaring = "0.10.7" roaring = "0.10.10"
[build-dependencies] [build-dependencies]
anyhow = "1.0.86" anyhow = "1.0.95"
bytes = "1.6.0" bytes = "1.9.0"
convert_case = "0.6.0" convert_case = "0.6.0"
flate2 = "1.0.30" flate2 = "1.0.35"
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false } reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false }
[features] [features]
default = ["milli/all-tokenizations"] default = ["milli/all-tokenizations"]

View File

@@ -11,8 +11,8 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
time = { version = "0.3.36", features = ["parsing"] } time = { version = "0.3.37", features = ["parsing"] }
[build-dependencies] [build-dependencies]
anyhow = "1.0.86" anyhow = "1.0.95"
vergen-git2 = "1.0.0" vergen-git2 = "1.0.2"

View File

@@ -11,21 +11,21 @@ readme.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.95"
flate2 = "1.0.30" flate2 = "1.0.35"
http = "1.1.0" http = "1.2.0"
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0" once_cell = "1.20.2"
regex = "1.10.5" regex = "1.11.1"
roaring = { version = "0.10.7", features = ["serde"] } roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.135", features = ["preserve_order"] }
tar = "0.4.41" tar = "0.4.43"
tempfile = "3.10.1" tempfile = "3.15.0"
thiserror = "1.0.61" thiserror = "2.0.9"
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40" tracing = "0.1.41"
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
big_s = "1.0.2" big_s = "1.0.2"

View File

@@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
tempfile = "3.10.1" tempfile = "3.15.0"
thiserror = "1.0.61" thiserror = "2.0.9"
tracing = "0.1.40" tracing = "0.1.41"
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.11.0", features = ["serde", "v4"] }

View File

@@ -17,4 +17,5 @@ nom_locate = "4.2.0"
unescaper = "0.1.5" unescaper = "0.1.5"
[dev-dependencies] [dev-dependencies]
insta = "1.39.0" # fixed version due to format breakages in v1.40
insta = "=1.39.0"

View File

@@ -11,12 +11,12 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] } arbitrary = { version = "1.4.1", features = ["derive"] }
bumpalo = "3.16.0" bumpalo = "3.16.0"
clap = { version = "4.5.9", features = ["derive"] } clap = { version = "4.5.24", features = ["derive"] }
either = "1.13.0" either = "1.13.0"
fastrand = "2.1.0" fastrand = "2.3.0"
milli = { path = "../milli" } milli = { path = "../milli" }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.135", features = ["preserve_order"] }
tempfile = "3.10.1" tempfile = "3.15.0"

View File

@@ -11,42 +11,43 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.95"
bincode = "1.3.3" bincode = "1.3.3"
bumpalo = "3.16.0" bumpalo = "3.16.0"
bumparaw-collections = "0.1.2" bumparaw-collections = "0.1.4"
convert_case = "0.6.0" convert_case = "0.6.0"
csv = "1.3.0" csv = "1.3.1"
derive_builder = "0.20.0" derive_builder = "0.20.2"
dump = { path = "../dump" } dump = { path = "../dump" }
enum-iterator = "2.1.0" enum-iterator = "2.1.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.30" flate2 = "1.0.35"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.4" memmap2 = "0.9.5"
page_size = "0.6.0" page_size = "0.6.0"
rayon = "1.10.0" rayon = "1.10.0"
roaring = { version = "0.10.7", features = ["serde"] } roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.135", features = ["preserve_order"] }
synchronoise = "1.0.1" synchronoise = "1.0.1"
tempfile = "3.10.1" tempfile = "3.15.0"
thiserror = "1.0.61" thiserror = "2.0.9"
time = { version = "0.3.36", features = [ time = { version = "0.3.37", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
tracing = "0.1.40" tracing = "0.1.41"
ureq = "2.10.0" ureq = "2.12.1"
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
arroy = "0.5.0" arroy = "0.5.0"
big_s = "1.0.2" big_s = "1.0.2"
crossbeam-channel = "0.5.13" crossbeam-channel = "0.5.14"
insta = { version = "1.39.0", features = ["json", "redactions"] } # fixed version due to format breakages in v1.40
insta = { version = "=1.39.0", features = ["json", "redactions"] }
maplit = "1.0.2" maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }

View File

@@ -1,7 +1,7 @@
/*! /*!
This crate defines the index scheduler, which is responsible for: This crate defines the index scheduler, which is responsible for:
1. Keeping references to meilisearch's indexes and mapping them to their 1. Keeping references to meilisearch's indexes and mapping them to their
user-defined names. user-defined names.
2. Scheduling tasks given by the user and executing them, in batch if possible. 2. Scheduling tasks given by the user and executing them, in batch if possible.
When an `IndexScheduler` is created, a new thread containing a reference to the When an `IndexScheduler` is created, a new thread containing a reference to the
@@ -513,7 +513,7 @@ impl IndexScheduler {
/// the user. /// the user.
/// ///
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
/// with many indexes internally. /// with many indexes internally.
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
pub fn get_tasks_from_authorized_indexes( pub fn get_tasks_from_authorized_indexes(
&self, &self,
@@ -532,7 +532,7 @@ impl IndexScheduler {
/// the user. /// the user.
/// ///
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
/// with many indexes internally. /// with many indexes internally.
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
pub fn get_task_ids_from_authorized_indexes( pub fn get_task_ids_from_authorized_indexes(
&self, &self,
@@ -551,7 +551,7 @@ impl IndexScheduler {
/// the user. /// the user.
/// ///
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
/// with many indexes internally. /// with many indexes internally.
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
pub fn get_batches_from_authorized_indexes( pub fn get_batches_from_authorized_indexes(
&self, &self,
@@ -570,7 +570,7 @@ impl IndexScheduler {
/// the user. /// the user.
/// ///
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
/// with many indexes internally. /// with many indexes internally.
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
pub fn get_batch_ids_from_authorized_indexes( pub fn get_batch_ids_from_authorized_indexes(
&self, &self,

View File

@@ -444,7 +444,7 @@ impl Queue {
/// the user. /// the user.
/// ///
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
/// with many indexes internally. /// with many indexes internally.
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
pub(crate) fn get_batch_ids_from_authorized_indexes( pub(crate) fn get_batch_ids_from_authorized_indexes(
&self, &self,

View File

@@ -106,7 +106,7 @@ impl IndexScheduler {
progress.update_progress(DumpCreationProgress::DumpTheIndexes); progress.update_progress(DumpCreationProgress::DumpTheIndexes);
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
let mut count = 0; let mut count = 0;
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> { let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
progress.update_progress(VariableNameStep::new(uid.to_string(), count, nb_indexes)); progress.update_progress(VariableNameStep::new(uid.to_string(), count, nb_indexes));
count += 1; count += 1;

View File

@@ -14,4 +14,4 @@ license.workspace = true
# fixed version due to format breakages in v1.40 # fixed version due to format breakages in v1.40
insta = { version = "=1.39.0", features = ["json", "redactions"] } insta = { version = "=1.39.0", features = ["json", "redactions"] }
md5 = "0.7.0" md5 = "0.7.0"
once_cell = "1.19" once_cell = "1.20"

View File

@@ -17,10 +17,10 @@ hmac = "0.12.1"
maplit = "1.0.2" maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5" rand = "0.8.5"
roaring = { version = "0.10.7", features = ["serde"] } roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.135", features = ["preserve_order"] }
sha2 = "0.10.8" sha2 = "0.10.8"
thiserror = "1.0.61" thiserror = "2.0.9"
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.11.0", features = ["serde", "v4"] }

View File

@@ -11,40 +11,41 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
actix-web = { version = "4.8.0", default-features = false } actix-web = { version = "4.9.0", default-features = false }
anyhow = "1.0.86" anyhow = "1.0.95"
bumpalo = "3.16.0" bumpalo = "3.16.0"
convert_case = "0.6.0" convert_case = "0.6.0"
csv = "1.3.0" csv = "1.3.1"
deserr = { version = "0.6.2", features = ["actix-web"] } deserr = { version = "0.6.3", features = ["actix-web"] }
either = { version = "1.13.0", features = ["serde"] } either = { version = "1.13.0", features = ["serde"] }
enum-iterator = "2.1.0" enum-iterator = "2.1.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.30" flate2 = "1.0.35"
fst = "0.4.7" fst = "0.4.7"
memmap2 = "0.9.4" memmap2 = "0.9.5"
milli = { path = "../milli" } milli = { path = "../milli" }
bumparaw-collections = "0.1.2" bumparaw-collections = "0.1.4"
roaring = { version = "0.10.7", features = ["serde"] } roaring = { version = "0.10.10", features = ["serde"] }
rustc-hash = "2.1.0" rustc-hash = "2.1.0"
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde-cs = "0.2.4" serde-cs = "0.2.4"
serde_json = "1.0.120" serde_json = "1.0.135"
tar = "0.4.41" tar = "0.4.43"
tempfile = "3.10.1" tempfile = "3.15.0"
thiserror = "1.0.61" thiserror = "2.0.9"
time = { version = "0.3.36", features = [ time = { version = "0.3.37", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
tokio = "1.38" tokio = "1.42"
utoipa = { version = "5.2.0", features = ["macros"] } utoipa = { version = "5.3.1", features = ["macros"] }
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
insta = "1.39.0" # fixed version due to format breakages in v1.40
insta = "=1.39.0"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
[features] [features]

View File

@@ -14,42 +14,42 @@ default-run = "meilisearch"
[dependencies] [dependencies]
actix-cors = "0.7.0" actix-cors = "0.7.0"
actix-http = { version = "3.8.0", default-features = false, features = [ actix-http = { version = "3.9.0", default-features = false, features = [
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"rustls-0_23", "rustls-0_23",
] } ] }
actix-utils = "3.0.1" actix-utils = "3.0.1"
actix-web = { version = "4.8.0", default-features = false, features = [ actix-web = { version = "4.9.0", default-features = false, features = [
"macros", "macros",
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"cookies", "cookies",
"rustls-0_23", "rustls-0_23",
] } ] }
anyhow = { version = "1.0.86", features = ["backtrace"] } anyhow = { version = "1.0.95", features = ["backtrace"] }
async-trait = "0.1.81" async-trait = "0.1.85"
bstr = "1.9.1" bstr = "1.11.3"
byte-unit = { version = "5.1.4", default-features = false, features = [ byte-unit = { version = "5.1.6", default-features = false, features = [
"std", "std",
"byte", "byte",
"serde", "serde",
] } ] }
bytes = "1.6.0" bytes = "1.9.0"
clap = { version = "4.5.9", features = ["derive", "env"] } clap = { version = "4.5.24", features = ["derive", "env"] }
crossbeam-channel = "0.5.13" crossbeam-channel = "0.5.14"
deserr = { version = "0.6.2", features = ["actix-web"] } deserr = { version = "0.6.3", features = ["actix-web"] }
dump = { path = "../dump" } dump = { path = "../dump" }
either = "1.13.0" either = "1.13.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.30" flate2 = "1.0.35"
fst = "0.4.7" fst = "0.4.7"
futures = "0.3.30" futures = "0.3.31"
futures-util = "0.3.30" futures-util = "0.3.31"
index-scheduler = { path = "../index-scheduler" } index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.2.6", features = ["serde"] } indexmap = { version = "2.7.0", features = ["serde"] }
is-terminal = "0.4.12" is-terminal = "0.4.13"
itertools = "0.13.0" itertools = "0.14.0"
jsonwebtoken = "9.3.0" jsonwebtoken = "9.3.0"
lazy_static = "1.5.0" lazy_static = "1.5.0"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
@@ -58,80 +58,81 @@ mimalloc = { version = "0.1.43", default-features = false }
mime = "0.3.17" mime = "0.3.17"
num_cpus = "1.16.0" num_cpus = "1.16.0"
obkv = "0.3.0" obkv = "0.3.0"
once_cell = "1.19.0" once_cell = "1.20.2"
ordered-float = "4.2.1" ordered-float = "4.6.0"
parking_lot = "0.12.3" parking_lot = "0.12.3"
permissive-json-pointer = { path = "../permissive-json-pointer" } permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.14" pin-project-lite = "0.2.16"
platform-dirs = "0.3.0" platform-dirs = "0.3.0"
prometheus = { version = "0.13.4", features = ["process"] } prometheus = { version = "0.13.4", features = ["process"] }
rand = "0.8.5" rand = "0.8.5"
rayon = "1.10.0" rayon = "1.10.0"
regex = "1.10.5" regex = "1.11.1"
reqwest = { version = "0.12.5", features = [ reqwest = { version = "0.12.12", features = [
"rustls-tls", "rustls-tls",
"json", "json",
], default-features = false } ], default-features = false }
rustls = { version = "0.23.11", features = ["ring"], default-features = false } rustls = { version = "0.23.20", features = ["ring"], default-features = false }
rustls-pki-types = { version = "1.7.0", features = ["alloc"] } rustls-pki-types = { version = "1.10.1", features = ["alloc"] }
rustls-pemfile = "2.1.2" rustls-pemfile = "2.2.0"
segment = { version = "0.2.4" } segment = { version = "0.2.5" }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] } serde_json = { version = "1.0.135", features = ["preserve_order"] }
sha2 = "0.10.8" sha2 = "0.10.8"
siphasher = "1.0.1" siphasher = "1.0.1"
slice-group-by = "0.3.1" slice-group-by = "0.3.1"
static-files = { version = "0.2.4", optional = true } static-files = { version = "0.2.4", optional = true }
sysinfo = "0.30.13" sysinfo = "0.33.1"
tar = "0.4.41" tar = "0.4.43"
tempfile = "3.10.1" tempfile = "3.15.0"
thiserror = "1.0.61" thiserror = "2.0.9"
time = { version = "0.3.36", features = [ time = { version = "0.3.37", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
tokio = { version = "1.38.0", features = ["full"] } tokio = { version = "1.42.0", features = ["full"] }
toml = "0.8.14" toml = "0.8.19"
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.11.0", features = ["serde", "v4"] }
serde_urlencoded = "0.7.1" serde_urlencoded = "0.7.1"
termcolor = "1.4.1" termcolor = "1.4.1"
url = { version = "2.5.2", features = ["serde"] } url = { version = "2.5.4", features = ["serde"] }
tracing = "0.1.40" tracing = "0.1.41"
tracing-subscriber = { version = "0.3.18", features = ["json"] } tracing-subscriber = { version = "0.3.19", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" } tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.11" tracing-actix-web = "0.7.15"
build-info = { version = "1.7.0", path = "../build-info" } build-info = { version = "1.7.0", path = "../build-info" }
roaring = "0.10.7" roaring = "0.10.10"
mopa-maintained = "0.2.3" mopa-maintained = "0.2.3"
utoipa = { version = "5.2.0", features = ["actix_extras", "macros", "non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] } utoipa = { version = "5.3.1", features = ["actix_extras", "macros", "non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
utoipa-scalar = { version = "0.2.0", optional = true, features = ["actix-web"] } utoipa-scalar = { version = "0.2.1", optional = true, features = ["actix-web"] }
[dev-dependencies] [dev-dependencies]
actix-rt = "2.10.0" actix-rt = "2.10.0"
brotli = "6.0.0" brotli = "6.0.0"
insta = "1.39.0" # fixed version due to format breakages in v1.40
insta = "=1.39.0"
manifest-dir-macros = "0.1.18" manifest-dir-macros = "0.1.18"
maplit = "1.0.2" maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6" temp-env = "0.3.6"
urlencoding = "2.1.3" urlencoding = "2.1.3"
wiremock = "0.6.0" wiremock = "0.6.2"
yaup = "0.3.1" yaup = "0.3.1"
[build-dependencies] [build-dependencies]
anyhow = { version = "1.0.86", optional = true } anyhow = { version = "1.0.95", optional = true }
cargo_toml = { version = "0.20.3", optional = true } cargo_toml = { version = "0.21.0", optional = true }
hex = { version = "0.4.3", optional = true } hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.12.5", features = [ reqwest = { version = "0.12.12", features = [
"blocking", "blocking",
"rustls-tls", "rustls-tls",
], default-features = false, optional = true } ], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true } sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.4", optional = true } static-files = { version = "0.2.4", optional = true }
tempfile = { version = "3.10.1", optional = true } tempfile = { version = "3.15.0", optional = true }
zip = { version = "2.1.3", optional = true } zip = { version = "2.2.2", optional = true }
[features] [features]
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"] default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]

View File

@@ -426,13 +426,9 @@ impl Segment {
&AuthFilter::default(), &AuthFilter::default(),
) { ) {
// Replace the version number with the prototype name if any. // Replace the version number with the prototype name if any.
let version = if let Some(prototype) = build_info::DescribeResult::from_build() let version = build_info::DescribeResult::from_build()
.and_then(|describe| describe.as_prototype()) .and_then(|describe| describe.as_prototype())
{ .unwrap_or(env!("CARGO_PKG_VERSION"));
prototype
} else {
env!("CARGO_PKG_VERSION")
};
let _ = self let _ = self
.batcher .batcher

View File

@@ -19,15 +19,15 @@ pub enum MeilisearchHttpError {
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")] #[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String), CsvDelimiterWithWrongContentType(String),
#[error( #[error(
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}", "The Content-Type `{}` is invalid. Accepted values for the Content-Type header are: {}",
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ") .0, .1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
)] )]
InvalidContentType(String, Vec<String>), InvalidContentType(String, Vec<String>),
#[error("Document `{0}` not found.")] #[error("Document `{0}` not found.")]
DocumentNotFound(String), DocumentNotFound(String),
#[error("Sending an empty filter is forbidden.")] #[error("Sending an empty filter is forbidden.")]
EmptyFilter, EmptyFilter,
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] #[error("Invalid syntax for the filter parameter: `expected {}, found: {}`.", .0.join(", "), .1)]
InvalidExpression(&'static [&'static str], Value), InvalidExpression(&'static [&'static str], Value),
#[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")] #[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")]
FederationOptionsInNonFederatedRequest(usize), FederationOptionsInNonFederatedRequest(usize),

View File

@@ -188,13 +188,13 @@ impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
if let Some(error) = response.response().error() { if let Some(error) = response.response().error() {
// use the status code already constructed for the outgoing HTTP response // use the status code already constructed for the outgoing HTTP response
span.record("error", &tracing::field::display(error.as_response_error())); span.record("error", tracing::field::display(error.as_response_error()));
} }
} }
Err(error) => { Err(error) => {
let code: i32 = error.error_response().status().as_u16().into(); let code: i32 = error.error_response().status().as_u16().into();
span.record("status_code", code); span.record("status_code", code);
span.record("error", &tracing::field::display(error.as_response_error())); span.record("error", tracing::field::display(error.as_response_error()));
} }
}; };
} }

View File

@@ -760,8 +760,8 @@ impl MaxMemory {
/// Returns the total amount of bytes available or `None` if this system isn't supported. /// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> { fn total_memory_bytes() -> Option<u64> {
if sysinfo::IS_SUPPORTED_SYSTEM { if sysinfo::IS_SUPPORTED_SYSTEM {
let memory_kind = RefreshKind::new().with_memory(MemoryRefreshKind::new().with_ram()); let mem_kind = RefreshKind::nothing().with_memory(MemoryRefreshKind::nothing().with_ram());
let mut system = System::new_with_specifics(memory_kind); let mut system = System::new_with_specifics(mem_kind);
system.refresh_memory(); system.refresh_memory();
Some(system.total_memory()) Some(system.total_memory())
} else { } else {

View File

@@ -1,7 +1,3 @@
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::routes::create_all_stats;
use crate::search_queue::SearchQueue;
use actix_web::http::header; use actix_web::http::header;
use actix_web::web::{self, Data}; use actix_web::web::{self, Data};
use actix_web::HttpResponse; use actix_web::HttpResponse;
@@ -14,6 +10,11 @@ use prometheus::{Encoder, TextEncoder};
use time::OffsetDateTime; use time::OffsetDateTime;
use utoipa::OpenApi; use utoipa::OpenApi;
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::routes::create_all_stats;
use crate::search_queue::SearchQueue;
#[derive(OpenApi)] #[derive(OpenApi)]
#[openapi(paths(get_metrics))] #[openapi(paths(get_metrics))]
pub struct MetricApi; pub struct MetricApi;

View File

@@ -545,5 +545,5 @@ pub async fn get_health(
index_scheduler.health().unwrap(); index_scheduler.health().unwrap();
auth_controller.health().unwrap(); auth_controller.health().unwrap();
Ok(HttpResponse::Ok().json(&HealthResponse::default())) Ok(HttpResponse::Ok().json(HealthResponse::default()))
} }

View File

@@ -73,8 +73,8 @@ async fn get_and_paginate_indexes() {
let server = Server::new().await; let server = Server::new().await;
const NB_INDEXES: usize = 50; const NB_INDEXES: usize = 50;
for i in 0..NB_INDEXES { for i in 0..NB_INDEXES {
server.index(&format!("test_{i:02}")).create(None).await; server.index(format!("test_{i:02}")).create(None).await;
server.index(&format!("test_{i:02}")).wait_task(i as u64).await; server.index(format!("test_{i:02}")).wait_task(i as u64).await;
} }
// basic // basic

View File

@@ -9,16 +9,16 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.95"
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" } arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
clap = { version = "4.5.9", features = ["derive"] } clap = { version = "4.5.24", features = ["derive"] }
dump = { path = "../dump" } dump = { path = "../dump" }
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
indexmap = {version = "2.7.0", features = ["serde"]} indexmap = {version = "2.7.0", features = ["serde"]}
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
serde = { version = "1.0.209", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = {version = "1.0.133", features = ["preserve_order"]} serde_json = {version = "1.0.135", features = ["preserve_order"]}
tempfile = "3.14.0" tempfile = "3.15.0"
time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] } time = { version = "0.3.37", features = ["formatting", "parsing", "alloc"] }
uuid = { version = "1.10.0", features = ["v4"], default-features = false } uuid = { version = "1.11.0", features = ["v4"], default-features = false }

View File

@@ -15,68 +15,68 @@ license.workspace = true
big_s = "1.0.2" big_s = "1.0.2"
bimap = { version = "0.6.3", features = ["serde"] } bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3" bincode = "1.3.3"
bstr = "1.9.1" bstr = "1.11.3"
bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] } bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.5.0"
charabia = { version = "0.9.2", default-features = false } charabia = { version = "0.9.2", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
crossbeam-channel = "0.5.13" crossbeam-channel = "0.5.14"
deserr = "0.6.2" deserr = "0.6.3"
either = { version = "1.13.0", features = ["serde"] } either = { version = "1.13.0", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" } flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7" fst = "0.4.7"
fxhash = "0.2.1" fxhash = "0.2.1"
geoutils = "0.5.1" geoutils = "0.5.1"
grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] } grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] }
heed = { version = "0.20.3", default-features = false, features = [ heed = { version = "0.20.5", default-features = false, features = [
"serde-json", "serde-json",
"serde-bincode", "serde-bincode",
"read-txn-no-tls", "read-txn-no-tls",
] } ] }
indexmap = { version = "2.2.6", features = ["serde"] } indexmap = { version = "2.7.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" } json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memchr = "2.5.0" memchr = "2.7.4"
memmap2 = "0.9.4" memmap2 = "0.9.5"
obkv = "0.3.0" obkv = "0.3.0"
once_cell = "1.19.0" once_cell = "1.20.2"
ordered-float = "4.2.1" ordered-float = "4.6.0"
rayon = "1.10.0" rayon = "1.10.0"
roaring = { version = "0.10.7", features = ["serde"] } roaring = { version = "0.10.10", features = ["serde"] }
rstar = { version = "0.12.0", features = ["serde"] } rstar = { version = "0.12.2", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] } serde_json = { version = "1.0.135", features = ["preserve_order", "raw_value"] }
slice-group-by = "0.3.1" slice-group-by = "0.3.1"
smallstr = { version = "0.3.0", features = ["serde"] } smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = "1.13.2" smallvec = "1.13.2"
smartstring = "1.0.1" smartstring = "1.0.1"
tempfile = "3.10.1" tempfile = "3.15.0"
thiserror = "1.0.61" thiserror = "2.0.9"
time = { version = "0.3.36", features = [ time = { version = "0.3.37", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
uuid = { version = "1.10.0", features = ["v4"] } uuid = { version = "1.11.0", features = ["v4"] }
filter-parser = { path = "../filter-parser" } filter-parser = { path = "../filter-parser" }
# documents words self-join # documents words self-join
itertools = "0.13.0" itertools = "0.14.0"
csv = "1.3.0" csv = "1.3.1"
candle-core = { version = "0.6.0" } candle-core = { version = "0.8.2" }
candle-transformers = { version = "0.6.0" } candle-transformers = { version = "0.8.2" }
candle-nn = { version = "0.6.0" } candle-nn = { version = "0.8.2" }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [ tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
"onig", "onig",
] } ] }
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
"online", "online",
] } ] }
tiktoken-rs = "0.5.9" tiktoken-rs = "0.6.0"
liquid = "0.26.6" liquid = "0.26.9"
rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [
"serde", "serde",
"no_module", "no_module",
@@ -86,25 +86,26 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838
] } ] }
arroy = "0.5.0" arroy = "0.5.0"
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.40" tracing = "0.1.41"
ureq = { version = "2.10.0", features = ["json"] } ureq = { version = "2.12.1", features = ["json"] }
url = "2.5.2" url = "2.5.4"
rayon-par-bridge = "0.1.0" rayon-par-bridge = "0.1.0"
hashbrown = "0.15.0" hashbrown = "0.15.2"
bumpalo = "3.16.0" bumpalo = "3.16.0"
bumparaw-collections = "0.1.2" bumparaw-collections = "0.1.4"
thread_local = "1.1.8" thread_local = "1.1.8"
allocator-api2 = "0.2.18" allocator-api2 = "0.2.21"
rustc-hash = "2.0.0" rustc-hash = "2.1.0"
uell = "0.1.0" uell = "0.1.0"
enum-iterator = "2.1.0" enum-iterator = "2.1.0"
bbqueue = { git = "https://github.com/meilisearch/bbqueue" } bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
flume = { version = "0.11.1", default-features = false } flume = { version = "0.11.1", default-features = false }
utoipa = { version = "5.0.2", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] } utoipa = { version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
[dev-dependencies] [dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.43", default-features = false }
insta = "1.39.0" # fixed version due to format breakages in v1.40
insta = "=1.39.0"
maplit = "1.0.2" maplit = "1.0.2"
md5 = "0.7.0" md5 = "0.7.0"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }

View File

@@ -134,7 +134,7 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidVectorsEmbedderConf { document_id: String, error: String }, InvalidVectorsEmbedderConf { document_id: String, error: String },
#[error("{0}")] #[error("{0}")]
InvalidFilter(String), InvalidFilter(String),
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))] #[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)]
InvalidFilterExpression(&'static [&'static str], Value), InvalidFilterExpression(&'static [&'static str], Value),
#[error("Attribute `{}` is not sortable. {}", #[error("Attribute `{}` is not sortable. {}",
.field, .field,

View File

@@ -1,4 +1,3 @@
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
#![allow(clippy::type_complexity)] #![allow(clippy::type_complexity)]
#[cfg(not(windows))] #[cfg(not(windows))]

View File

@@ -132,12 +132,12 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
/// ///
/// 1. So long as the element's range is less than the left bound, we do nothing and keep iterating /// 1. So long as the element's range is less than the left bound, we do nothing and keep iterating
/// 2. If the element's range is fully contained by the bounds, then all of its docids are added to /// 2. If the element's range is fully contained by the bounds, then all of its docids are added to
/// the roaring bitmap. /// the roaring bitmap.
/// 3. If the element's range merely intersects the bounds, then we call the algorithm recursively /// 3. If the element's range merely intersects the bounds, then we call the algorithm recursively
/// on the children of the element from the level below. /// on the children of the element from the level below.
/// 4. If the element's range is greater than the right bound, we do nothing and stop iterating. /// 4. If the element's range is greater than the right bound, we do nothing and stop iterating.
/// Note that the right bound is found through either the `left_bound` of the *next* element, /// Note that the right bound is found through either the `left_bound` of the *next* element,
/// or from the `rightmost_bound` argument /// or from the `rightmost_bound` argument
/// ///
/// ## Arguments /// ## Arguments
/// - `level`: the level being visited /// - `level`: the level being visited

View File

@@ -18,10 +18,10 @@ pub struct DistinctOutput {
/// Return a [`DistinctOutput`] containing: /// Return a [`DistinctOutput`] containing:
/// - `remaining`: a set of docids built such that exactly one element from `candidates` /// - `remaining`: a set of docids built such that exactly one element from `candidates`
/// is kept for each distinct value inside the given field. If the field does not exist, it /// is kept for each distinct value inside the given field. If the field does not exist, it
/// is considered unique. /// is considered unique.
/// - `excluded`: the set of document ids that contain a value for the given field that occurs /// - `excluded`: the set of document ids that contain a value for the given field that occurs
/// in the given candidates. /// in the given candidates.
pub fn apply_distinct_rule( pub fn apply_distinct_rule(
ctx: &mut SearchContext<'_>, ctx: &mut SearchContext<'_>,
field_id: u16, field_id: u16,

View File

@@ -149,7 +149,7 @@ pub type WordId = u16;
/// A given token can partially match a query word for several reasons: /// A given token can partially match a query word for several reasons:
/// - split words /// - split words
/// - multi-word synonyms /// - multi-word synonyms
/// In these cases we need to match consecutively several tokens to consider that the match is full. /// In these cases we need to match consecutively several tokens to consider that the match is full.
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum MatchType<'a> { pub enum MatchType<'a> {
Full { char_count: usize, byte_len: usize, ids: &'a RangeInclusive<WordId> }, Full { char_count: usize, byte_len: usize, ids: &'a RangeInclusive<WordId> },

View File

@@ -21,9 +21,9 @@ use crate::Result;
/// 1. `Start` : unique, represents the start of the query /// 1. `Start` : unique, represents the start of the query
/// 2. `End` : unique, represents the end of a query /// 2. `End` : unique, represents the end of a query
/// 3. `Deleted` : represents a node that was deleted. /// 3. `Deleted` : represents a node that was deleted.
/// All deleted nodes are unreachable from the start node. /// All deleted nodes are unreachable from the start node.
/// 4. `Term` is a regular node representing a word or combination of words /// 4. `Term` is a regular node representing a word or combination of words
/// from the user query. /// from the user query.
#[derive(Clone)] #[derive(Clone)]
pub struct QueryNode { pub struct QueryNode {
pub data: QueryNodeData, pub data: QueryNodeData,

View File

@@ -8,7 +8,7 @@ with them, they are "unconditional". These kinds of edges are used to "skip" a n
The algorithm uses a depth-first search. It benefits from two main optimisations: The algorithm uses a depth-first search. It benefits from two main optimisations:
- The list of all possible costs to go from any node to the END node is precomputed - The list of all possible costs to go from any node to the END node is precomputed
- The `DeadEndsCache` reduces the number of valid paths drastically, by making some edges - The `DeadEndsCache` reduces the number of valid paths drastically, by making some edges
untraversable depending on what other edges were selected. untraversable depending on what other edges were selected.
These two optimisations are meant to avoid traversing edges that wouldn't lead These two optimisations are meant to avoid traversing edges that wouldn't lead
to a valid path. In practically all cases, we avoid the exponential complexity to a valid path. In practically all cases, we avoid the exponential complexity
@@ -24,6 +24,7 @@ For example, the DeadEndsCache could say the following:
- if we take `g`, then `[f]` is also forbidden - if we take `g`, then `[f]` is also forbidden
- etc. - etc.
- etc. - etc.
As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden
conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden. conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden.

View File

@@ -58,7 +58,7 @@ pub struct ComputedCondition {
/// 2. The cost of traversing this edge /// 2. The cost of traversing this edge
/// 3. The condition associated with it /// 3. The condition associated with it
/// 4. The list of nodes that have to be skipped /// 4. The list of nodes that have to be skipped
/// if this edge is traversed. /// if this edge is traversed.
#[derive(Clone)] #[derive(Clone)]
pub struct Edge<E> { pub struct Edge<E> {
pub source_node: Interned<QueryNode>, pub source_node: Interned<QueryNode>,

View File

@@ -14,7 +14,7 @@ This module tests the following properties about the exactness ranking rule:
3. those that contain the most exact words from the remaining query 3. those that contain the most exact words from the remaining query
- if it is followed by other graph-based ranking rules (`typo`, `proximity`, `attribute`). - if it is followed by other graph-based ranking rules (`typo`, `proximity`, `attribute`).
Then these rules will only work with Then these rules will only work with
1. the exact terms selected by `exactness 1. the exact terms selected by `exactness
2. the full query term otherwise 2. the full query term otherwise
*/ */

View File

@@ -4,15 +4,14 @@ This module tests the Proximity ranking rule:
1. A proximity of >7 always has the same cost. 1. A proximity of >7 always has the same cost.
2. Phrase terms can be in sprximity to other terms via their start and end words, 2. Phrase terms can be in sprximity to other terms via their start and end words,
but we need to make sure that the phrase exists in the document that meets this but we need to make sure that the phrase exists in the document that meets this
proximity condition. This is especially relevant with split words and synonyms. proximity condition. This is especially relevant with split words and synonyms.
3. An ngram has the same sprximity cost as its component words being consecutive. 3. An ngram has the same sprximity cost as its component words being consecutive.
e.g. `sunflower` equivalent to `sun flower`. e.g. `sunflower` equivalent to `sun flower`.
4. The prefix databases can be used to find the sprximity between two words, but 4. The prefix databases can be used to find the sprximity between two words, but
they store fewer sprximities than the regular word sprximity DB. they store fewer sprximities than the regular word sprximity DB.
*/ */
use std::collections::BTreeMap; use std::collections::BTreeMap;

View File

@@ -11,7 +11,7 @@ This module tests the following properties:
8. 2grams can have 1 typo if they are larger than `min_word_len_two_typos` 8. 2grams can have 1 typo if they are larger than `min_word_len_two_typos`
9. 3grams are not typo tolerant (but they can be split into two words) 9. 3grams are not typo tolerant (but they can be split into two words)
10. The `typo` ranking rule assumes the role of the `words` ranking rule implicitly 10. The `typo` ranking rule assumes the role of the `words` ranking rule implicitly
if `words` doesn't exist before it. if `words` doesn't exist before it.
11. The `typo` ranking rule places documents with the same number of typos in the same bucket 11. The `typo` ranking rule places documents with the same number of typos in the same bucket
12. Prefix tolerance costs nothing according to the typo ranking rule 12. Prefix tolerance costs nothing according to the typo ranking rule
13. Split words cost 1 typo according to the typo ranking rule 13. Split words cost 1 typo according to the typo ranking rule

View File

@@ -2,11 +2,11 @@
This module tests the following properties: This module tests the following properties:
1. The `last` term matching strategy starts removing terms from the query 1. The `last` term matching strategy starts removing terms from the query
starting from the end if no more results match it. starting from the end if no more results match it.
2. Phrases are never deleted by the `last` term matching strategy 2. Phrases are never deleted by the `last` term matching strategy
3. Duplicate words don't affect the ranking of a document according to the `words` ranking rule 3. Duplicate words don't affect the ranking of a document according to the `words` ranking rule
4. The proximity of the first and last word of a phrase to its adjacent terms is taken into 4. The proximity of the first and last word of a phrase to its adjacent terms is taken into
account by the proximity ranking rule. account by the proximity ranking rule.
5. Unclosed double quotes still make a phrase 5. Unclosed double quotes still make a phrase
6. The `all` term matching strategy does not remove any term from the query 6. The `all` term matching strategy does not remove any term from the query
7. The search is capable of returning no results if no documents match the query 7. The search is capable of returning no results if no documents match the query

View File

@@ -21,29 +21,30 @@ use crate::{CboRoaringBitmapCodec, Index, Result};
/// Enum used as a return value for the facet incremental indexing. /// Enum used as a return value for the facet incremental indexing.
/// ///
/// - `ModificationResult::InPlace` means that modifying the `facet_value` into the `level` did not have /// - `ModificationResult::InPlace` means that modifying the `facet_value` into the `level` did not have
/// an effect on the number of keys in that level. Therefore, it did not increase the number of children /// an effect on the number of keys in that level. Therefore, it did not increase the number of children
/// of the parent node. /// of the parent node.
/// ///
/// - `ModificationResult::Insert` means that modifying the `facet_value` into the `level` resulted /// - `ModificationResult::Insert` means that modifying the `facet_value` into the `level` resulted
/// in the addition of a new key in that level, and that therefore the number of children /// in the addition of a new key in that level, and that therefore the number of children
/// of the parent node should be incremented. /// of the parent node should be incremented.
/// ///
/// - `ModificationResult::Remove` means that modifying the `facet_value` into the `level` resulted in a change in the /// - `ModificationResult::Remove` means that modifying the `facet_value` into the `level` resulted in a change in the
/// number of keys in the level. For example, removing a document id from the facet value `3` could /// number of keys in the level. For example, removing a document id from the facet value `3` could
/// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted /// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted
/// entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must /// entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must
/// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well. /// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well.
/// ///
/// - `ModificationResult::Reduce/Expand` means that modifying the `facet_value` into the `level` resulted in a change in the /// - `ModificationResult::Reduce/Expand` means that modifying the `facet_value` into the `level` resulted in a change in the
/// bounds of the keys of the level. For example, removing a document id from the facet value /// bounds of the keys of the level. For example, removing a document id from the facet value
/// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore, /// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore,
/// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4). /// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4).
/// In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust /// In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust
/// its left bound as well. /// its left bound as well.
/// ///
/// - `ModificationResult::Nothing` means that modifying the `facet_value` didn't have any impact into the `level`. /// - `ModificationResult::Nothing` means that modifying the `facet_value` didn't have any impact into the `level`.
/// This case is reachable when a document id is removed from a sub-level node but is still present in another one. /// This case is reachable when a document id is removed from a sub-level node but is still present in another one.
/// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0` but should remain in the group node [1..4] in `level 1`. /// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0`
/// but should remain in the group node [1..4] in `level 1`.
enum ModificationResult { enum ModificationResult {
InPlace, InPlace,
Expand, Expand,
@@ -1059,208 +1060,3 @@ mod tests {
milli_snap!(format!("{index}"), "after_delete"); milli_snap!(format!("{index}"), "after_delete");
} }
} }
// fuzz tests
#[cfg(all(test, fuzzing))]
/**
Fuzz test for the incremental indxer.
The fuzz test uses fuzzcheck, a coverage-guided fuzzer.
See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org
for more information.
It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with:
```sh
cargo install cargo-fuzzcheck
```
To start the fuzz test, run (from the base folder or from milli/):
```sh
cargo fuzzcheck update::facet::incremental::fuzz::fuzz
```
and wait a couple minutes to make sure the code was thoroughly tested, then
hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz.
To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file:
```toml
[build]
rustflags = ["--cfg", "fuzzing"]
```
The fuzz test generates sequences of additions and deletions to the facet database and
ensures that:
1. its structure is still internally valid
2. its content is the same as a trivially correct implementation of the same database
*/
mod fuzz {
use std::collections::{BTreeMap, HashMap};
use std::iter::FromIterator;
use std::rc::Rc;
use fuzzcheck::mutators::integer::U8Mutator;
use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator};
use fuzzcheck::mutators::vector::VecMutator;
use fuzzcheck::DefaultMutator;
use roaring::RoaringBitmap;
use tempfile::TempDir;
use super::*;
use crate::update::facet::test_helpers::FacetIndex;
#[derive(Default)]
pub struct TrivialDatabase<T> {
pub elements: BTreeMap<u16, BTreeMap<T, RoaringBitmap>>,
}
impl<T> TrivialDatabase<T>
where
T: Ord + Clone + Eq + std::fmt::Debug,
{
#[no_coverage]
pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) {
if new_values.is_empty() {
return;
}
let values_field_id = self.elements.entry(field_id).or_default();
let values = values_field_id.entry(new_key.clone()).or_default();
*values |= new_values;
}
#[no_coverage]
pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) {
if let Some(values_field_id) = self.elements.get_mut(&field_id) {
if let Some(values) = values_field_id.get_mut(&key) {
*values -= values_to_remove;
if values.is_empty() {
values_field_id.remove(&key);
}
}
if values_field_id.is_empty() {
self.elements.remove(&field_id);
}
}
}
}
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
struct Operation {
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })]
key: Vec<u8>,
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
group_size: u8,
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
max_group_size: u8,
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
min_level_size: u8,
#[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })]
field_id: u16,
kind: OperationKind,
}
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
enum OperationKind {
Insert(
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
Vec<u8>,
),
Delete(
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
Vec<u8>,
),
}
#[no_coverage]
fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) {
let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
let mut txn = index.env.write_txn().unwrap();
let mut trivial_db = TrivialDatabase::<Vec<u8>>::default();
let mut value_to_keys = HashMap::<u8, Vec<Vec<u8>>>::new();
for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in
operations
{
index.set_group_size(*group_size);
index.set_max_group_size(*max_group_size);
index.set_min_level_size(*min_level_size);
match kind {
OperationKind::Insert(values) => {
let mut bitmap = RoaringBitmap::new();
for value in values {
bitmap.insert(*value as u32);
value_to_keys.entry(*value).or_default().push(key.clone());
}
index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap);
trivial_db.insert(*field_id, &key, &bitmap);
}
OperationKind::Delete(values) => {
let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32));
let mut values_per_key = HashMap::new();
for value in values {
if let Some(keys) = value_to_keys.get(&(value as u8)) {
for key in keys {
let values: &mut RoaringBitmap =
values_per_key.entry(key).or_default();
values.insert(value);
}
}
}
for (key, values) in values_per_key {
index.delete(&mut txn, *field_id, &key.as_slice(), &values);
trivial_db.delete(*field_id, &key, &values);
}
}
}
}
for (field_id, values_field_id) in trivial_db.elements.iter() {
let level0iter = index
.content
.as_polymorph()
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes())
.unwrap();
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
let (group_key, group_values) = group.unwrap();
let group_key =
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
assert_eq!(key, &group_key.left_bound);
assert_eq!(values, &group_values.bitmap);
}
}
for (field_id, values_field_id) in trivial_db.elements.iter() {
let level0iter = index
.content
.as_polymorph()
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
.unwrap();
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
let (group_key, group_values) = group.unwrap();
let group_key =
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
assert_eq!(key, &group_key.left_bound);
assert_eq!(values, &group_values.bitmap);
}
index.verify_structure_validity(&txn, *field_id);
}
txn.abort().unwrap();
}
#[test]
#[no_coverage]
fn fuzz() {
let tempdir = Rc::new(TempDir::new().unwrap());
let tempdir_cloned = tempdir.clone();
let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| {
compare_with_trivial_database(tempdir_cloned.clone(), operations)
})
.default_mutator()
.serde_serializer()
.default_sensor_and_pool_with_custom_filter(|file, function| {
file == std::path::Path::new("milli/src/update/facet/incremental.rs")
&& !function.contains("serde")
&& !function.contains("tests::")
&& !function.contains("fuzz::")
&& !function.contains("display_bitmap")
})
.arguments_from_cargo_fuzzcheck()
.launch();
assert!(!result.found_test_failure);
}
}

View File

@@ -346,35 +346,6 @@ pub(crate) mod test_helpers {
for<'a> BoundCodec: for<'a> BoundCodec:
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>, BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{ {
#[cfg(all(test, fuzzing))]
pub fn open_from_tempdir(
tempdir: Rc<tempfile::TempDir>,
group_size: u8,
max_group_size: u8,
min_level_size: u8,
) -> FacetIndex<BoundCodec> {
let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16
let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16
let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17
let mut options = heed::EnvOpenOptions::new();
let options = options.map_size(4096 * 4 * 10 * 1000);
unsafe {
options.flag(heed::flags::Flags::MdbAlwaysFreePages);
}
let env = options.open(tempdir.path()).unwrap();
let content = env.open_database(None).unwrap().unwrap();
FacetIndex {
content,
group_size: Cell::new(group_size),
max_group_size: Cell::new(max_group_size),
min_level_size: Cell::new(min_level_size),
_tempdir: tempdir,
env,
_phantom: PhantomData,
}
}
pub fn new( pub fn new(
group_size: u8, group_size: u8,
max_group_size: u8, max_group_size: u8,
@@ -402,26 +373,6 @@ pub(crate) mod test_helpers {
} }
} }
#[cfg(all(test, fuzzing))]
pub fn set_group_size(&self, group_size: u8) {
// 2 <= x <= 64
self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2)));
}
#[cfg(all(test, fuzzing))]
pub fn set_max_group_size(&self, max_group_size: u8) {
// 2*group_size <= x <= 128
let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size));
self.max_group_size.set(max_group_size);
if self.group_size.get() < max_group_size / 2 {
self.group_size.set(max_group_size / 2);
}
}
#[cfg(all(test, fuzzing))]
pub fn set_min_level_size(&self, min_level_size: u8) {
// 1 <= x <= inf
self.min_level_size.set(std::cmp::max(1, min_level_size));
}
pub fn insert<'a>( pub fn insert<'a>(
&self, &self,
wtxn: &'a mut RwTxn<'_>, wtxn: &'a mut RwTxn<'_>,

View File

@@ -2093,33 +2093,6 @@ mod tests {
index.add_documents(doc1).unwrap(); index.add_documents(doc1).unwrap();
} }
#[cfg(feature = "default")]
#[test]
fn store_detected_script_and_language_per_document_during_indexing() {
use charabia::{Language, Script};
let index = TempIndex::new();
index
.add_documents(documents!([
{ "id": 1, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": 2, "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
{ "id": 3, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
{ "id": 4, "title": "関西国際空港限定トートバッグ すもももももももものうち" },
{ "id": 5, "title": "ภาษาไทยง่ายนิดเดียว" },
{ "id": 6, "title": "The quick 在尊嚴和權利上一律平等。" },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let key_jpn = (Script::Cj, Language::Jpn);
let key_cmn = (Script::Cj, Language::Cmn);
let cj_jpn_docs = index.script_language_documents_ids(&rtxn, &key_jpn).unwrap().unwrap();
let cj_cmn_docs = index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap();
let expected_cj_jpn_docids = [3].iter().collect();
assert_eq!(cj_jpn_docs, expected_cj_jpn_docids);
let expected_cj_cmn_docids = [1, 5].iter().collect();
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
}
#[test] #[test]
fn add_and_delete_documents_in_single_transform() { fn add_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new(); let mut index = TempIndex::new();

View File

@@ -29,9 +29,9 @@ use std::cell::RefCell;
/// - An example of a type that verifies (1) and (2) is [`std::rc::Rc`] (when `T` is `Send` and `Sync`). /// - An example of a type that verifies (1) and (2) is [`std::rc::Rc`] (when `T` is `Send` and `Sync`).
/// - An example of a type that doesn't verify (1) is thread-local data. /// - An example of a type that doesn't verify (1) is thread-local data.
/// - An example of a type that doesn't verify (2) is [`std::sync::MutexGuard`]: a lot of mutex implementations require that /// - An example of a type that doesn't verify (2) is [`std::sync::MutexGuard`]: a lot of mutex implementations require that
/// a lock is returned to the operating system on the same thread that initially locked the mutex, failing to uphold this /// a lock is returned to the operating system on the same thread that initially locked the mutex, failing to uphold this
/// invariant will cause Undefined Behavior /// invariant will cause Undefined Behavior
/// (see last § in [the nomicon](https://doc.rust-lang.org/nomicon/send-and-sync.html)). /// (see last § in [the nomicon](https://doc.rust-lang.org/nomicon/send-and-sync.html)).
/// ///
/// It is **always safe** to implement this trait on a type that is `Send`, but no placeholder impl is provided due to limitations in /// It is **always safe** to implement this trait on a type that is `Send`, but no placeholder impl is provided due to limitations in
/// coherency. Use the [`FullySend`] wrapper in this situation. /// coherency. Use the [`FullySend`] wrapper in this situation.
@@ -86,7 +86,7 @@ impl<T: MostlySend> MostlySendWrapper<T> {
/// # Safety /// # Safety
/// ///
/// 1. `T` is [`MostlySend`], so by its safety contract it can be accessed by any thread and all of its operations are available /// 1. `T` is [`MostlySend`], so by its safety contract it can be accessed by any thread and all of its operations are available
/// from any thread. /// from any thread.
/// 2. (P1) of `MostlySendWrapper::new` forces the user to never access the value from multiple threads concurrently. /// 2. (P1) of `MostlySendWrapper::new` forces the user to never access the value from multiple threads concurrently.
unsafe impl<T: MostlySend> Send for MostlySendWrapper<T> {} unsafe impl<T: MostlySend> Send for MostlySendWrapper<T> {}

View File

@@ -86,9 +86,9 @@ pub enum EmbedErrorKind {
}, },
option_info(.0.as_deref(), "server replied with "))] option_info(.0.as_deref(), "server replied with "))]
RestBadRequest(Option<String>, ConfigurationSource), RestBadRequest(Option<String>, ConfigurationSource),
#[error("received internal error HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))] #[error("received internal error HTTP {} from embedding server{}", .0, option_info(.1.as_deref(), "server replied with "))]
RestInternalServerError(u16, Option<String>), RestInternalServerError(u16, Option<String>),
#[error("received unexpected HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))] #[error("received unexpected HTTP {} from embedding server{}", .0, option_info(.1.as_deref(), "server replied with "))]
RestOtherStatusCode(u16, Option<String>), RestOtherStatusCode(u16, Option<String>),
#[error("could not reach embedding server:\n - {0}")] #[error("could not reach embedding server:\n - {0}")]
RestNetwork(ureq::Transport), RestNetwork(ureq::Transport),

View File

@@ -163,8 +163,10 @@ impl Embedder {
let token_ids = Tensor::stack(&token_ids, 0).map_err(EmbedError::tensor_shape)?; let token_ids = Tensor::stack(&token_ids, 0).map_err(EmbedError::tensor_shape)?;
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?; let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
let embeddings = let embeddings = self
self.model.forward(&token_ids, &token_type_ids).map_err(EmbedError::model_forward)?; .model
.forward(&token_ids, &token_type_ids, None)
.map_err(EmbedError::model_forward)?;
// Apply some avg-pooling by taking the mean embedding value for all tokens (including padding) // Apply some avg-pooling by taking the mean embedding value for all tokens (including padding)
let (_n_sentence, n_tokens, _hidden_size) = let (_n_sentence, n_tokens, _hidden_size) =
@@ -185,8 +187,10 @@ impl Embedder {
Tensor::new(token_ids, &self.model.device).map_err(EmbedError::tensor_shape)?; Tensor::new(token_ids, &self.model.device).map_err(EmbedError::tensor_shape)?;
let token_ids = Tensor::stack(&[token_ids], 0).map_err(EmbedError::tensor_shape)?; let token_ids = Tensor::stack(&[token_ids], 0).map_err(EmbedError::tensor_shape)?;
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?; let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
let embeddings = let embeddings = self
self.model.forward(&token_ids, &token_type_ids).map_err(EmbedError::model_forward)?; .model
.forward(&token_ids, &token_type_ids, None)
.map_err(EmbedError::model_forward)?;
// Apply some avg-pooling by taking the mean embedding value for all tokens (including padding) // Apply some avg-pooling by taking the mean embedding value for all tokens (including padding)
let (_n_sentence, n_tokens, _hidden_size) = let (_n_sentence, n_tokens, _hidden_size) =

View File

@@ -1,3 +1,4 @@
use std::fmt;
use std::time::Instant; use std::time::Instant;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
@@ -168,7 +169,6 @@ fn infer_api_key() -> String {
.unwrap_or_default() .unwrap_or_default()
} }
#[derive(Debug)]
pub struct Embedder { pub struct Embedder {
tokenizer: tiktoken_rs::CoreBPE, tokenizer: tiktoken_rs::CoreBPE,
rest_embedder: RestEmbedder, rest_embedder: RestEmbedder,
@@ -302,3 +302,13 @@ impl Embedder {
self.options.distribution() self.options.distribution()
} }
} }
impl fmt::Debug for Embedder {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Embedder")
.field("tokenizer", &"CoreBPE")
.field("rest_embedder", &self.rest_embedder)
.field("options", &self.options)
.finish()
}
}

View File

@@ -175,7 +175,7 @@ impl Embedder {
pub fn embed_tokens( pub fn embed_tokens(
&self, &self,
tokens: &[usize], tokens: &[u32],
deadline: Option<Instant>, deadline: Option<Instant>,
) -> Result<Embedding, EmbedError> { ) -> Result<Embedding, EmbedError> {
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline)?; let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline)?;

View File

@@ -8,17 +8,17 @@ edition = "2021"
[dependencies] [dependencies]
color-spantrace = "0.2.1" color-spantrace = "0.2.1"
fxprof-processed-profile = "0.7.0" fxprof-processed-profile = "0.7.0"
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = "1.0.120" serde_json = "1.0.135"
tracing = "0.1.40" tracing = "0.1.41"
tracing-error = "0.2.0" tracing-error = "0.2.1"
tracing-subscriber = "0.3.18" tracing-subscriber = "0.3.19"
byte-unit = { version = "5.1.4", default-features = false, features = [ byte-unit = { version = "5.1.6", default-features = false, features = [
"std", "std",
"byte", "byte",
"serde", "serde",
] } ] }
tokio = { version = "1.38.0", features = ["sync"] } tokio = { version = "1.42.0", features = ["sync"] }
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies] [target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
libproc = "0.14.8" libproc = "0.14.10"

View File

@@ -11,34 +11,34 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
anyhow = "1.0.86" anyhow = "1.0.95"
build-info = { version = "1.7.0", path = "../build-info" } build-info = { version = "1.7.0", path = "../build-info" }
cargo_metadata = "0.18.1" cargo_metadata = "0.19.1"
clap = { version = "4.5.9", features = ["derive"] } clap = { version = "4.5.24", features = ["derive"] }
futures-core = "0.3.30" futures-core = "0.3.31"
futures-util = "0.3.30" futures-util = "0.3.31"
reqwest = { version = "0.12.5", features = [ reqwest = { version = "0.12.12", features = [
"stream", "stream",
"json", "json",
"rustls-tls", "rustls-tls",
], default-features = false } ], default-features = false }
serde = { version = "1.0.204", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = "1.0.120" serde_json = "1.0.135"
sha2 = "0.10.8" sha2 = "0.10.8"
sysinfo = "0.30.13" sysinfo = "0.33.1"
time = { version = "0.3.36", features = [ time = { version = "0.3.37", features = [
"serde", "serde",
"serde-human-readable", "serde-human-readable",
"macros", "macros",
] } ] }
tokio = { version = "1.38.0", features = [ tokio = { version = "1.42.0", features = [
"rt", "rt",
"net", "net",
"time", "time",
"process", "process",
"signal", "signal",
] } ] }
tracing = "0.1.40" tracing = "0.1.41"
tracing-subscriber = "0.3.18" tracing-subscriber = "0.3.19"
tracing-trace = { version = "0.1.0", path = "../tracing-trace" } tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
uuid = { version = "1.10.0", features = ["v7", "serde"] } uuid = { version = "1.11.0", features = ["v7", "serde"] }

View File

@@ -27,8 +27,7 @@ impl Environment {
let unknown_string = String::from("Unknown"); let unknown_string = String::from("Unknown");
let mut system = System::new(); let mut system = System::new();
system.refresh_cpu(); system.refresh_cpu_all();
system.refresh_cpu_frequency();
system.refresh_memory(); system.refresh_memory();
let (cpu, frequency) = match system.cpus().first() { let (cpu, frequency) = match system.cpus().first() {
@@ -50,9 +49,7 @@ impl Environment {
if let Some(os) = System::os_version() { if let Some(os) = System::os_version() {
software.push(VersionInfo { name: os, version: String::from("kernel-release") }); software.push(VersionInfo { name: os, version: String::from("kernel-release") });
} }
if let Some(arch) = System::cpu_arch() { software.push(VersionInfo { name: System::cpu_arch(), version: String::from("arch") });
software.push(VersionInfo { name: arch, version: String::from("arch") });
}
Self { Self {
hostname: System::host_name(), hostname: System::host_name(),

View File

@@ -1,3 +1,3 @@
[toolchain] [toolchain]
channel = "1.79.0" channel = "1.81.0"
components = ["clippy"] components = ["clippy"]