Compare commits

..

62 Commits

Author SHA1 Message Date
Louis Dureuil
4df6535043 fmt 2025-12-03 23:00:32 +01:00
Louis Dureuil
ae60f9663d Chore: remove some duplicated lambdas to ease compile time 2025-12-03 22:58:13 +01:00
Louis Dureuil
99b3d0bb5c Switch to migration-oriented dumpless upgrade 2025-12-03 22:58:13 +01:00
Louis Dureuil
dc6351520d Update snapshots 2025-12-03 22:58:13 +01:00
Louis Dureuil
25f21765ba Make snapshots independent on the version 2025-12-03 22:56:16 +01:00
Clément Renault
b566458aa2 Merge pull request #6027 from meilisearch/release-v1.28.2
Bring back changes from v1.28.2
2025-12-03 17:46:44 +00:00
Clément Renault
ae4344e359 Merge pull request #6004 from meilisearch/default-experimental-vector-store
Make Hannoy the default vector store
2025-12-03 17:16:46 +00:00
Kerollmops
b6cb384650 Fix settings tests 2025-12-03 17:52:52 +01:00
Clément Renault
2c3e3d856c Make hannoy the default vector store when creating an index 2025-12-03 17:52:52 +01:00
Kerollmops
e9350f033d Limit the number of retrieved task to one 2025-12-03 17:43:48 +01:00
Kerollmops
54c92fd6c0 Update the snapshots 2025-12-03 17:43:48 +01:00
Kerollmops
4f4df83a51 Bump the version to v1.28.2 2025-12-03 17:43:48 +01:00
Clément Renault
a51021cab7 Merge pull request #6026 from meilisearch/free-space
Fix the CI issues
2025-12-03 16:18:41 +00:00
Louis Dureuil
e33f4fdeae Attempt to eschew containers for ubuntu 2025-12-03 16:28:19 +01:00
Louis Dureuil
e407bca196 use feature as cache key 2025-12-03 16:24:48 +01:00
Louis Dureuil
cd24ea11b4 correctly clean space + remove test in debug 2025-12-03 16:12:08 +01:00
Louis Dureuil
ba578e7ab5 Fix ollama test following update on their side 2025-12-03 15:48:30 +01:00
Louis Dureuil
05a74d1e68 remove non-existing rust-toolchain action arguments 2025-12-03 15:37:51 +01:00
Louis Dureuil
41d61deb97 Make runners/containers more uniform 2025-12-03 15:34:57 +01:00
Louis Dureuil
bba292b01a Run ollama test on 22.04 2025-12-03 15:21:02 +01:00
Louis Dureuil
96923dff33 adjust test suite 2025-12-03 15:01:58 +01:00
Louis Dureuil
8f9c9305da set back the cache 2025-12-03 14:10:18 +01:00
Louis Dureuil
a9f309e1d1 Remove macos and windows from PRs 2025-12-03 13:54:02 +01:00
Louis Dureuil
e456a9acd8 Add the disk freeing to all ubuntu-22.04 jobs 2025-12-03 11:51:42 +01:00
Louis Dureuil
9b7d29466c Attempt to earn some free space... 2025-12-03 11:41:00 +01:00
Clément Renault
b0ef14b6f0 Merge pull request #5983 from meilisearch/new-searchable-settings-indexer
Support the searchable and exact attributes in the new Settings Indexer
2025-12-02 11:03:36 +00:00
Clément Renault
36febe2068 Merge pull request #6021 from meilisearch/skip-macos-windows-in-merge-queue
Skip the macOS and Windows CI in the merge queue
2025-12-02 08:29:06 +00:00
Kerollmops
6f14a6ec18 Skip the macOS and Windows CI in the merge queue 2025-12-01 16:59:55 +01:00
Clément Renault
1a45b19e7e Merge pull request #6020 from meilisearch/fix-release-ci-enterprise
Fix release CI after enterprise merge
2025-12-01 15:12:00 +00:00
Kerollmops
bd7525b166 Update the snapshots 2025-12-01 15:26:00 +01:00
Kerollmops
359757d939 Bump patch version 2025-12-01 15:25:56 +01:00
Paul de Nonancourt
1c6eea596c fix: Only trigger Cloud CI for enterprise edition 2025-12-01 15:08:23 +01:00
Paul de Nonancourt
693b6f483e fix: Update binary path for target x86_64 meilisearch release 2025-12-01 15:07:55 +01:00
Many the fish
818a4aa6d9 Merge pull request #6016 from EclipseAditya/fix-sort-on-empty-attribute-5998
Fix sort on /documents endpoint when field has no values
2025-12-01 13:50:05 +00:00
EclipseAditya
4737e1a2a5 Fix rustfmt formatting issues 2025-11-30 06:02:02 +00:00
EclipseAditya
36522e951b Fix sort on /documents endpoint when field has no values 2025-11-28 15:22:57 +00:00
Kerollmops
fce046d84d Fix non-detected searchable attribute 2025-11-28 11:29:31 +01:00
Kerollmops
3fc507bb44 Introduce a test for when a new nested field becomes searchable 2025-11-28 11:29:31 +01:00
Kerollmops
fdbcd033fb Clean up the CI 2025-11-28 11:29:31 +01:00
Clément Renault
aaab49baca Fix a bug and improve code quality
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-28 11:29:31 +01:00
Kerollmops
0d0d6e8099 Update the proximity precision for the settings delta 2025-11-28 11:29:31 +01:00
Clément Renault
c1e351c92b Show available space 2025-11-28 11:29:31 +01:00
Clément Renault
67cab4cc9d Trigger the new settings indexer when changing the proximity precision 2025-11-28 11:29:31 +01:00
Clément Renault
f30a37b0fe Clear old word prefix fid docids entries when removing searchable fields 2025-11-28 11:29:31 +01:00
Clément Renault
a78a9f80dd Introduce the word pair proximity extractor 2025-11-28 11:29:31 +01:00
Clément Renault
439fee5434 Move the has_searchable_children function to the appropriate module 2025-11-28 11:29:31 +01:00
Clément Renault
9e858590e0 Rename the function to extract document words when a setting changes
Co-authored-By: Maxime Legendre <maxime@meilisearch.com>
2025-11-28 11:29:31 +01:00
Clément Renault
29eebd5f93 Merge the logic of the function detecting searchable children fields 2025-11-28 11:29:31 +01:00
Clément Renault
07da6edbdf Fix a bug when nested fields appear
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-28 11:29:31 +01:00
Clément Renault
22b83042e6 Add some comments
Co-authored-by: Many the fish <many@meilisearch.com>
2025-11-28 11:29:31 +01:00
Clément Renault
52ab13906a Fix a test trying to change settings with a wtxn 2025-11-28 11:29:31 +01:00
Clément Renault
29bec8efd4 Make sure the embedders supports changing searchables 2025-11-28 11:29:31 +01:00
Clément Renault
6947a8990b Make sure we don't crash on unreferenced fields 2025-11-28 11:29:31 +01:00
Clément Renault
fbb2bb0c73 Make clippy happy 2025-11-28 11:29:31 +01:00
Clément Renault
15918f53a9 Introduce new progress steps when deleting fid-based entries 2025-11-28 11:29:30 +01:00
Clément Renault
d7f5f3a0a3 Delete entries from fid-based databases when searchables are deleted 2025-11-28 11:29:30 +01:00
Clément Renault
1afbf35f27 Support exact attributes in the settings delta 2025-11-28 11:29:30 +01:00
Clément Renault
d7675233d5 Call the post processing in the new settings indexer 2025-11-28 11:29:30 +01:00
Clément Renault
c63c1ac32b Support exact attributes in the field metadata 2025-11-28 11:29:30 +01:00
Clément Renault
6171dcde0d Call the new searchable extractor 2025-11-28 11:29:30 +01:00
Clément Renault
04bc134324 Introduce the new searchable extractor 2025-11-28 11:29:30 +01:00
Clément Renault
8ff39d927d Enable the new settings indexer when the searchable or exact are updates 2025-11-28 11:29:30 +01:00
58 changed files with 1933 additions and 776 deletions

View File

@@ -67,8 +67,6 @@ jobs:
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- name: Run benchmarks on PR ${{ github.event.issue.id }}
run: |

View File

@@ -13,8 +13,6 @@ jobs:
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}

View File

@@ -13,6 +13,12 @@ jobs:
image: ubuntu:22.04
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl

View File

@@ -13,8 +13,6 @@ jobs:
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
# Run benchmarks
- name: Run the fuzzer

View File

@@ -25,6 +25,12 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.89
- name: Install cargo-deb
run: cargo install cargo-deb

View File

@@ -211,7 +211,7 @@ jobs:
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: github.event_name == 'push'
if: ${{ (github.event_name == 'push') && (matrix.edition == 'enterprise') }}
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.MEILI_BOT_GH_PAT }}

View File

@@ -65,7 +65,7 @@ jobs:
extra-args: ""
- release: linux-amd64
os: ubuntu-22.04
binary_path: release/meilisearch
binary_path: x86_64-unknown-linux-gnu/release/meilisearch
asset_name: linux-amd64
extra-args: "--target x86_64-unknown-linux-gnu"
- release: linux-aarch64

View File

@@ -19,31 +19,36 @@ jobs:
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [ubuntu-24.04, ubuntu-24.04-arm]
runner: [ubuntu-22.04, ubuntu-22.04-arm]
features: ["", "--features enterprise"]
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
- name: check free space before
run: df -h
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: check free space after
run: df -h
- name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.89
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo check without any default features
with:
key: ${{ matrix.features }}
- name: Run cargo build without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
args: --locked --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all ${{ matrix.features }}
args: --locked --all ${{ matrix.features }}
test-others:
name: Tests on ${{ matrix.os }}
@@ -53,53 +58,56 @@ jobs:
matrix:
os: [macos-14, windows-2022]
features: ["", "--features enterprise"]
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v5
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo check without any default features
- name: Run cargo build without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
args: --locked --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all ${{ matrix.features }}
args: --locked --all ${{ matrix.features }}
test-all-features:
name: Tests almost all features
runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
runs-on: ubuntu-22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
apt-get update
apt-get install --assume-yes build-essential curl
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
cargo build --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
- name: Run cargo test with almost all features
run: |
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
cargo test --workspace --locked --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
ollama-ubuntu:
name: Test with Ollama
runs-on: ubuntu-latest
strategy:
matrix:
features: ["", "--features enterprise"]
runs-on: ubuntu-22.04
env:
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- name: Install Ollama
run: |
curl -fsSL https://ollama.com/install.sh | sudo -E sh
@@ -123,20 +131,20 @@ jobs:
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all --features test-ollama ollama ${{ matrix.features }}
args: --locked -p meilisearch --features test-ollama ollama
test-disabled-tokenization:
name: Test disabled tokenization
runs-on: ubuntu-latest
container:
image: ubuntu:22.04
runs-on: ubuntu-22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v5
- name: Install needed dependencies
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
apt-get update
apt-get install --assume-yes build-essential curl
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo tree without default features and check lindera is not present
run: |
@@ -148,35 +156,42 @@ jobs:
run: |
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
build:
name: Build in release
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.89
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo build in release
uses: actions-rs/cargo@v1
with:
command: build
args: --all-targets --release
clippy:
name: Run Clippy
runs-on: ubuntu-22.04
strategy:
matrix:
features: ["", "--features enterprise"]
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.89
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --all ${{ matrix.features }}
clippy:
name: Run Clippy
runs-on: ubuntu-latest
strategy:
matrix:
features: ["", "--features enterprise"]
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
@@ -188,14 +203,17 @@ jobs:
fmt:
name: Run Rustfmt
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
toolchain: nightly-2024-07-09
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0

View File

@@ -18,9 +18,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Clean space as per https://github.com/actions/virtual-environments/issues/709
run: |
sudo rm -rf "/opt/ghc" || true
sudo rm -rf "/usr/share/dotnet" || true
sudo rm -rf "/usr/local/lib/android" || true
sudo rm -rf "/usr/local/share/boost" || true
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- name: Install sd
run: cargo install sd
- name: Update Cargo.toml file

34
Cargo.lock generated
View File

@@ -580,7 +580,7 @@ source = "git+https://github.com/meilisearch/bbqueue#e8af4a4bccc8eb36b2b0442c4a9
[[package]]
name = "benchmarks"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"anyhow",
"bumpalo",
@@ -790,7 +790,7 @@ dependencies = [
[[package]]
name = "build-info"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"anyhow",
"time",
@@ -1786,7 +1786,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"anyhow",
"big_s",
@@ -2018,7 +2018,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "file-store"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"tempfile",
"thiserror 2.0.17",
@@ -2040,7 +2040,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"insta",
"levenshtein_automata",
@@ -2068,7 +2068,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"criterion",
"serde_json",
@@ -2231,7 +2231,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"arbitrary",
"bumpalo",
@@ -3198,7 +3198,7 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"anyhow",
"backoff",
@@ -3460,7 +3460,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"criterion",
"serde_json",
@@ -3974,7 +3974,7 @@ checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0"
[[package]]
name = "meili-snap"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"insta",
"md5 0.8.0",
@@ -3985,7 +3985,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"actix-cors",
"actix-http",
@@ -4083,7 +4083,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"base64 0.22.1",
"enum-iterator",
@@ -4102,7 +4102,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"actix-web",
"anyhow",
@@ -4137,7 +4137,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"anyhow",
"clap",
@@ -4171,7 +4171,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"arroy",
"bbqueue",
@@ -4750,7 +4750,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "permissive-json-pointer"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"big_s",
"serde_json",
@@ -7783,7 +7783,7 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.28.0"
version = "1.28.2"
dependencies = [
"anyhow",
"build-info",

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.28.0"
version = "1.28.2"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -21,6 +21,10 @@ use roaring::RoaringBitmap;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn no_cancel() -> bool {
false
}
const BENCHMARK_ITERATION: usize = 10;
fn setup_dir(path: impl AsRef<Path>) {
@@ -65,7 +69,7 @@ fn setup_settings<'t>(
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
builder.set_sortable_fields(sortable_fields);
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
builder.execute(&no_cancel, &Progress::default(), Default::default()).unwrap();
}
fn setup_index_with_settings(
@@ -152,7 +156,7 @@ fn indexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -168,7 +172,7 @@ fn indexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -220,7 +224,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -236,7 +240,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -266,7 +270,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -282,7 +286,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -336,7 +340,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -352,7 +356,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -414,7 +418,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -430,7 +434,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -460,7 +464,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -476,7 +480,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -502,7 +506,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -518,7 +522,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -571,7 +575,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -587,7 +591,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -639,7 +643,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -655,7 +659,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -707,7 +711,7 @@ fn indexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -723,7 +727,7 @@ fn indexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -774,7 +778,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -790,7 +794,7 @@ fn reindexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -820,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -836,7 +840,7 @@ fn reindexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -889,7 +893,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -905,7 +909,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -967,7 +971,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -983,7 +987,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1014,7 +1018,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1030,7 +1034,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1057,7 +1061,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1073,7 +1077,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1125,7 +1129,7 @@ fn indexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1141,7 +1145,7 @@ fn indexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1192,7 +1196,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1208,7 +1212,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1238,7 +1242,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1254,7 +1258,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1307,7 +1311,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1323,7 +1327,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1372,7 +1376,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
Some(primary_key),
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1422,7 +1426,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1438,7 +1442,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1468,7 +1472,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1484,7 +1488,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1510,7 +1514,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1526,7 +1530,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1601,7 +1605,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1617,7 +1621,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1693,7 +1697,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1709,7 +1713,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1777,7 +1781,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1793,7 +1797,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1845,7 +1849,7 @@ fn indexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1861,7 +1865,7 @@ fn indexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1912,7 +1916,7 @@ fn reindexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1928,7 +1932,7 @@ fn reindexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -1958,7 +1962,7 @@ fn reindexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -1974,7 +1978,7 @@ fn reindexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2027,7 +2031,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2043,7 +2047,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)

View File

@@ -6,7 +6,7 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Kind, Status, Task};
use meilisearch_types::versioning;
use meilisearch_types::versioning::{self, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use roaring::RoaringBitmap;
use crate::index_mapper::IndexMapper;
@@ -320,7 +320,11 @@ fn snapshot_details(d: &Details) -> String {
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
}
Details::UpgradeDatabase { from, to } => {
format!("{{ from: {from:?}, to: {to:?} }}")
if to == &(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) {
format!("{{ from: {from:?}, to: [current version] }}")
} else {
format!("{{ from: {from:?}, to: {to:?} }}")
}
}
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
@@ -400,7 +404,21 @@ pub fn snapshot_batch(batch: &Batch) -> String {
snap.push('{');
snap.push_str(&format!("uid: {uid}, "));
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
let details = if let Some(upgrade_to) = &details.upgrade_to {
if upgrade_to.as_str()
== format!("v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}").as_str()
{
let mut details = details.clone();
details.upgrade_to = Some("[current version]".into());
serde_json::to_string(&details).unwrap()
} else {
serde_json::to_string(details).unwrap()
}
} else {
serde_json::to_string(details).unwrap()
};
snap.push_str(&format!("details: {details}, "));
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
if !embedder_stats.skip_serializing() {
snap.push_str(&format!(

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -1,7 +1,7 @@
use anyhow::bail;
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use meilisearch_types::versioning;
use time::OffsetDateTime;
use tracing::info;
@@ -9,83 +9,82 @@ use crate::queue::TaskQueue;
use crate::versioning::Versioning;
trait UpgradeIndexScheduler {
fn upgrade(
&self,
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
original: (u32, u32, u32),
) -> anyhow::Result<()>;
fn target_version(&self) -> (u32, u32, u32);
fn upgrade(&self, env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> anyhow::Result<()>;
/// Whether the migration should be applied, depending on the initial version of the index scheduler before
/// any migration was applied
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool;
/// A progress-centric description of the migration
fn description(&self) -> &'static str;
}
/// Upgrade the index scheduler to the binary version.
///
/// # Warning
///
/// The current implementation uses a single wtxn to the index scheduler for the whole duration of the upgrade.
/// If migrations start taking take a long time, it might prevent tasks from being registered.
/// If this issue manifests, then it can be mitigated by adding a `fn target_version` to `UpgradeIndexScheduler`,
/// to be able to write intermediate versions and drop the wtxn between applying migrations.
pub fn upgrade_index_scheduler(
env: &Env<WithoutTls>,
versioning: &Versioning,
from: (u32, u32, u32),
to: (u32, u32, u32),
initial_version: (u32, u32, u32),
) -> anyhow::Result<()> {
let current_major = to.0;
let current_minor = to.1;
let current_patch = to.2;
let target_major: u32 = versioning::VERSION_MAJOR;
let target_minor: u32 = versioning::VERSION_MINOR;
let target_patch: u32 = versioning::VERSION_PATCH;
let target_version = (target_major, target_minor, target_patch);
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
// This is the last upgrade function, it will be called when the index is up to date.
// any other upgrade function should be added before this one.
&ToCurrentNoOp {},
];
let start = match from {
(1, 12, _) => 0,
(1, 13, _) => 0,
(1, 14, _) => 0,
(1, 15, _) => 0,
(1, 16, _) => 0,
(1, 17, _) => 0,
(1, 18, _) => 0,
(1, 19, _) => 0,
(1, 20, _) => 0,
(1, 21, _) => 0,
(1, 22, _) => 0,
(1, 23, _) => 0,
(1, 24, _) => 0,
(1, 25, _) => 0,
(1, 26, _) => 0,
(1, 27, _) => 0,
(1, 28, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)
|| (major == current_major && minor == current_minor && patch > current_patch)
{
bail!(
"Database version {major}.{minor}.{patch} is higher than the Meilisearch version {current_major}.{current_minor}.{current_patch}. Downgrade is not supported",
);
} else if major < 1 || (major == current_major && minor < 12) {
bail!(
"Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{current_major}.{current_minor}.{current_patch}",
);
} else {
bail!("Unknown database version: v{major}.{minor}.{patch}");
}
}
};
info!("Upgrading the task queue");
let mut local_from = from;
for upgrade in upgrade_functions[start..].iter() {
let target = upgrade.target_version();
info!(
"Upgrading from v{}.{}.{} to v{}.{}.{}",
local_from.0, local_from.1, local_from.2, target.0, target.1, target.2
);
let mut wtxn = env.write_txn()?;
upgrade.upgrade(env, &mut wtxn, local_from)?;
versioning.set_version(&mut wtxn, target)?;
wtxn.commit()?;
local_from = target;
if initial_version == target_version {
return Ok(());
}
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
// List all upgrade functions to apply in order here.
];
let (initial_major, initial_minor, initial_patch) = initial_version;
if initial_version > target_version {
bail!(
"Database version {initial_major}.{initial_minor}.{initial_patch} is higher than the Meilisearch version {target_major}.{target_minor}.{target_patch}. Downgrade is not supported",
);
}
if initial_version < (1, 12, 0) {
bail!(
"Database version {initial_major}.{initial_minor}.{initial_patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{initial_major}.{initial_minor}.{initial_patch} and import it in the v{target_major}.{target_minor}.{target_patch}",
);
}
info!("Upgrading the task queue");
let mut wtxn = env.write_txn()?;
let migration_count = upgrade_functions.len();
for (migration_index, upgrade) in upgrade_functions.iter().enumerate() {
if upgrade.must_upgrade(initial_version) {
info!(
"[{migration_index}/{migration_count}]Applying migration: {}",
upgrade.description()
);
upgrade.upgrade(env, &mut wtxn)?;
info!(
"[{}/{migration_count}]Migration applied: {}",
migration_index + 1,
upgrade.description()
)
} else {
info!(
"[{migration_index}/{migration_count}]Skipping unnecessary migration: {}",
upgrade.description()
)
}
}
versioning.set_version(&mut wtxn, target_version)?;
info!("Task queue upgraded, spawning the upgrade database task");
let queue = TaskQueue::new(env, &mut wtxn)?;
let uid = queue.next_task_id(&wtxn)?;
queue.register(
@@ -98,9 +97,9 @@ pub fn upgrade_index_scheduler(
finished_at: None,
error: None,
canceled_by: None,
details: Some(Details::UpgradeDatabase { from, to }),
details: Some(Details::UpgradeDatabase { from: initial_version, to: target_version }),
status: Status::Enqueued,
kind: KindWithContent::UpgradeDatabase { from },
kind: KindWithContent::UpgradeDatabase { from: initial_version },
network: None,
custom_metadata: None,
},
@@ -109,21 +108,3 @@ pub fn upgrade_index_scheduler(
Ok(())
}
#[allow(non_camel_case_types)]
struct ToCurrentNoOp {}
impl UpgradeIndexScheduler for ToCurrentNoOp {
fn upgrade(
&self,
_env: &Env<WithoutTls>,
_wtxn: &mut RwTxn,
_original: (u32, u32, u32),
) -> anyhow::Result<()> {
Ok(())
}
fn target_version(&self) -> (u32, u32, u32) {
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
}
}

View File

@@ -64,14 +64,7 @@ impl Versioning {
};
wtxn.commit()?;
let bin_major: u32 = versioning::VERSION_MAJOR;
let bin_minor: u32 = versioning::VERSION_MINOR;
let bin_patch: u32 = versioning::VERSION_PATCH;
let to = (bin_major, bin_minor, bin_patch);
if from != to {
upgrade_index_scheduler(env, &this, from, to)?;
}
upgrade_index_scheduler(env, &this, from)?;
// Once we reach this point it means the upgrade process, if there was one is entirely finished
// we can safely say we reached the latest version of the index scheduler

View File

@@ -183,7 +183,11 @@ pub async fn get_metrics(
crate::metrics::MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS.reset();
let (batches, _total) = index_scheduler.get_batches_from_authorized_indexes(
// Fetch the finished batches...
&Query { statuses: Some(vec![Status::Succeeded, Status::Failed]), ..Query::default() },
&Query {
statuses: Some(vec![Status::Succeeded, Status::Failed]),
limit: Some(1),
..Query::default()
},
auth_filters,
)?;
// ...and get the last batch only.

View File

@@ -1453,3 +1453,152 @@ async fn test_fetch_documents_pagination_with_sorting() {
]
"###);
}
// <https://github.com/meilisearch/meilisearch/issues/5998>
#[actix_rt::test]
async fn get_document_sort_field_not_in_any_document() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.create(None).await;
server.wait_task(task.uid()).await.succeeded();
let (task, _code) = index.update_settings_sortable_attributes(json!(["created_at"])).await;
server.wait_task(task.uid()).await.succeeded();
let documents = json!([
{ "id": 1, "name": "Document 1" },
{ "id": 2, "name": "Document 2" }
]);
let (task, _code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, code) = index
.fetch_documents(json!({
"sort": ["created_at:asc"]
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response), @r###"
{
"results": [
{
"id": 1,
"name": "Document 1"
},
{
"id": 2,
"name": "Document 2"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
#[actix_rt::test]
async fn get_document_sort_includes_docs_without_field() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.create(None).await;
server.wait_task(task.uid()).await.succeeded();
let (task, _code) = index.update_settings_sortable_attributes(json!(["created_at"])).await;
server.wait_task(task.uid()).await.succeeded();
let documents = json!([
{ "id": 1, "name": "Doc without created_at" },
{ "id": 2, "name": "Doc with created_at", "created_at": "2025-01-15" },
{ "id": 3, "name": "Another doc without created_at" },
{ "id": 4, "name": "Another doc with created_at", "created_at": "2025-01-10" }
]);
let (task, _code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, code) = index
.fetch_documents(json!({
"sort": ["created_at:asc"]
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response), @r###"
{
"results": [
{
"id": 4,
"name": "Another doc with created_at",
"created_at": "2025-01-10"
},
{
"id": 2,
"name": "Doc with created_at",
"created_at": "2025-01-15"
},
{
"id": 1,
"name": "Doc without created_at"
},
{
"id": 3,
"name": "Another doc without created_at"
}
],
"offset": 0,
"limit": 20,
"total": 4
}
"###);
}
#[actix_rt::test]
async fn get_document_sort_desc_includes_docs_without_field() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.create(None).await;
server.wait_task(task.uid()).await.succeeded();
let (task, _code) = index.update_settings_sortable_attributes(json!(["priority"])).await;
server.wait_task(task.uid()).await.succeeded();
let documents = json!([
{ "id": 1, "name": "Low priority", "priority": 1 },
{ "id": 2, "name": "No priority" },
{ "id": 3, "name": "High priority", "priority": 10 }
]);
let (task, _code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, code) = index
.fetch_documents(json!({
"sort": ["priority:desc"]
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response), @r###"
{
"results": [
{
"id": 3,
"name": "High priority",
"priority": 10
},
{
"id": 1,
"name": "Low priority",
"priority": 1
},
{
"id": 2,
"name": "No priority"
}
],
"offset": 0,
"limit": 20,
"total": 3
}
"###);
}

View File

@@ -197,7 +197,7 @@ test_setting_routes!(
{
setting: vector_store,
update_verb: patch,
default_value: null
default_value: "experimental"
},
);

View File

@@ -2,6 +2,7 @@ mod chat;
mod distinct;
mod errors;
mod get_settings;
mod parent_seachable_fields;
mod prefix_search_settings;
mod proximity_settings;
mod tokenizer_customization;

View File

@@ -0,0 +1,114 @@
use meili_snap::{json_string, snapshot};
use once_cell::sync::Lazy;
use crate::common::Server;
use crate::json;
static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
json!([
{
"id": 1,
"meta": {
"title": "Soup of the day",
"description": "many the fish",
}
},
{
"id": 2,
"meta": {
"title": "Soup of day",
"description": "many the lazy fish",
}
},
{
"id": 3,
"meta": {
"title": "the Soup of day",
"description": "many the fish",
}
},
])
});
#[actix_rt::test]
async fn nested_field_becomes_searchable() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, code) = index
.update_settings(json!({
"searchableAttributes": ["meta.title"]
}))
.await;
assert_eq!("202", code.as_str(), "{response:?}");
server.wait_task(response.uid()).await.succeeded();
// We expect no documents when searching for
// a nested non-searchable field
index
.search(json!({"q": "many fish"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"[]"###);
})
.await;
let (response, code) = index
.update_settings(json!({
"searchableAttributes": ["meta.title", "meta.description"]
}))
.await;
assert_eq!("202", code.as_str(), "{response:?}");
server.wait_task(response.uid()).await.succeeded();
// We expect all the documents when the nested field becomes searchable
index
.search(json!({"q": "many fish"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"meta": {
"title": "Soup of the day",
"description": "many the fish"
}
},
{
"id": 3,
"meta": {
"title": "the Soup of day",
"description": "many the fish"
}
},
{
"id": 2,
"meta": {
"title": "Soup of day",
"description": "many the lazy fish"
}
}
]
"###);
})
.await;
let (response, code) = index
.update_settings(json!({
"searchableAttributes": ["meta.title"]
}))
.await;
assert_eq!("202", code.as_str(), "{response:?}");
server.wait_task(response.uid()).await.succeeded();
// We expect no documents when searching for
// a nested non-searchable field
index
.search(json!({"q": "many fish"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"[]"###);
})
.await;
}

View File

@@ -42,8 +42,16 @@ async fn version_too_old() {
std::fs::create_dir_all(&db_path).unwrap();
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.28.0");
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err().to_string();
let major = meilisearch_types::versioning::VERSION_MAJOR;
let minor = meilisearch_types::versioning::VERSION_MINOR;
let patch = meilisearch_types::versioning::VERSION_PATCH;
let current_version = format!("{major}.{minor}.{patch}");
let err = err.replace(&current_version, "[current version]");
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v[current version]");
}
#[actix_rt::test]
@@ -54,11 +62,21 @@ async fn version_requires_downgrade() {
std::fs::create_dir_all(&db_path).unwrap();
let major = meilisearch_types::versioning::VERSION_MAJOR;
let minor = meilisearch_types::versioning::VERSION_MINOR;
let patch = meilisearch_types::versioning::VERSION_PATCH + 1;
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
let mut patch = meilisearch_types::versioning::VERSION_PATCH;
let current_version = format!("{major}.{minor}.{patch}");
patch += 1;
let future_version = format!("{major}.{minor}.{patch}");
std::fs::write(db_path.join("VERSION"), &future_version).unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.28.1 is higher than the Meilisearch version 1.28.0. Downgrade is not supported");
let err = err.to_string();
let err = err.replace(&current_version, "[current version]");
let err = err.replace(&future_version, "[future version]");
snapshot!(err, @"Database version [future version] is higher than the Meilisearch version [current version]. Downgrade is not supported");
}
#[actix_rt::test]

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.0"
"upgradeTo": "[current version]"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.0"
"upgradeTo": "[current version]"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.0"
"upgradeTo": "[current version]"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.0"
"upgradeTo": "[current version]"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.0"
"upgradeTo": "[current version]"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.0"
"upgradeTo": "[current version]"
},
"error": null,
"duration": "[duration]",

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.0"
"upgradeTo": "[current version]"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.28.0"
"upgradeTo": "[current version]"
},
"error": null,
"duration": "[duration]",

View File

@@ -166,55 +166,55 @@ async fn check_the_index_scheduler(server: &Server) {
// We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration.
// The other tasks should NOT change
let (tasks, _) = server.tasks_filter("limit=1000").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "the_whole_task_queue_once_everything_has_been_processed");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "the_whole_task_queue_once_everything_has_been_processed");
let (batches, _) = server.batches_filter("limit=1000").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "the_whole_batch_queue_once_everything_has_been_processed");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "the_whole_batch_queue_once_everything_has_been_processed");
// Tests all the tasks query parameters
let (tasks, _) = server.tasks_filter("uids=10").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_uids_equal_10");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_uids_equal_10");
let (tasks, _) = server.tasks_filter("batchUids=10").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_batchUids_equal_10");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_batchUids_equal_10");
let (tasks, _) = server.tasks_filter("statuses=canceled").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_statuses_equal_canceled");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_statuses_equal_canceled");
// types has already been tested above to retrieve the upgrade database
let (tasks, _) = server.tasks_filter("canceledBy=19").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_canceledBy_equal_19");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_canceledBy_equal_19");
let (tasks, _) = server.tasks_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
// Tests all the batches query parameters
let (batches, _) = server.batches_filter("uids=10").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
let (batches, _) = server.batches_filter("batchUids=10").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
let (batches, _) = server.batches_filter("statuses=canceled").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
// types has already been tested above to retrieve the upgrade database
let (batches, _) = server.batches_filter("canceledBy=19").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, {

View File

@@ -104,8 +104,8 @@ async fn binary_quantize_before_sending_documents() {
"manual": {
"embeddings": [
[
-1.0,
-1.0,
0.0,
0.0,
1.0
]
],
@@ -122,7 +122,7 @@ async fn binary_quantize_before_sending_documents() {
[
1.0,
1.0,
-1.0
0.0
]
],
"regenerate": false
@@ -191,8 +191,8 @@ async fn binary_quantize_after_sending_documents() {
"manual": {
"embeddings": [
[
-1.0,
-1.0,
0.0,
0.0,
1.0
]
],
@@ -209,7 +209,7 @@ async fn binary_quantize_after_sending_documents() {
[
1.0,
1.0,
-1.0
0.0
]
],
"regenerate": false

View File

@@ -500,13 +500,6 @@ async fn test_both_apis() {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
@@ -527,6 +520,13 @@ async fn test_both_apis() {
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
@@ -540,13 +540,6 @@ async fn test_both_apis() {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
@@ -567,6 +560,13 @@ async fn test_both_apis() {
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
@@ -581,18 +581,11 @@ async fn test_both_apis() {
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
@@ -602,11 +595,18 @@ async fn test_both_apis() {
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
@@ -621,18 +621,11 @@ async fn test_both_apis() {
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
@@ -642,11 +635,18 @@ async fn test_both_apis() {
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
@@ -661,18 +661,11 @@ async fn test_both_apis() {
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
@@ -682,11 +675,18 @@ async fn test_both_apis() {
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
@@ -701,18 +701,11 @@ async fn test_both_apis() {
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 2,
@@ -722,11 +715,18 @@ async fn test_both_apis() {
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);

View File

@@ -240,15 +240,25 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> {
) -> crate::Result<SortedDocumentsIterator<'ctx>> {
let size = candidates.len() as usize;
// Get documents that have this facet field
let faceted_candidates = index.exists_faceted_documents_ids(rtxn, field_id)?;
// Documents that don't have this facet field should be returned at the end
let not_faceted_candidates = &candidates - &faceted_candidates;
// Only sort candidates that have the facet field
let faceted_candidates = candidates & faceted_candidates;
let mut not_faceted_candidates = Some(not_faceted_candidates);
// Perform the sort on the first field
let (number_iter, string_iter) = if ascending {
let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?;
let number_iter =
ascending_facet_sort(rtxn, number_db, field_id, faceted_candidates.clone())?;
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, faceted_candidates)?;
(itertools::Either::Left(number_iter), itertools::Either::Left(string_iter))
} else {
let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?;
let number_iter =
descending_facet_sort(rtxn, number_db, field_id, faceted_candidates.clone())?;
let string_iter = descending_facet_sort(rtxn, string_db, field_id, faceted_candidates)?;
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
};
@@ -256,17 +266,37 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> {
// Create builders for the next level of the tree
let number_iter = number_iter.map(|r| r.map(|(d, _)| d));
let string_iter = string_iter.map(|r| r.map(|(d, _)| d));
let next_children = number_iter.chain(string_iter).map(move |r| {
Ok(SortedDocumentsIteratorBuilder {
index,
rtxn,
number_db,
string_db,
fields: next_fields,
candidates: r?,
geo_candidates,
// Chain faceted documents with non-faceted documents at the end
let next_children = number_iter
.chain(string_iter)
.map(move |r| {
Ok(SortedDocumentsIteratorBuilder {
index,
rtxn,
number_db,
string_db,
fields: next_fields,
candidates: r?,
geo_candidates,
})
})
});
.chain(std::iter::from_fn(move || {
// Once all faceted candidates have been processed, return the non-faceted ones
if let Some(not_faceted) = not_faceted_candidates.take() {
if !not_faceted.is_empty() {
return Some(Ok(SortedDocumentsIteratorBuilder {
index,
rtxn,
number_db,
string_db,
fields: next_fields,
candidates: not_faceted,
geo_candidates,
}));
}
}
None
}));
Ok(SortedDocumentsIterator::Branch {
current_child: None,
@@ -398,10 +428,14 @@ pub fn recursive_sort<'ctx>(
};
if let Some((field, ascending)) = field {
if is_faceted(&field, &sortable_fields) {
// The field may be in sortable_fields but not in fields_ids_map if no document
// has ever contained this field. In that case, we just skip this sort criterion
// since there are no values to sort by. Documents will be returned in their
// default order for this field.
if let Some(field_id) = fields_ids_map.id(&field) {
fields.push(AscDescId::Facet { field_id, ascending });
continue;
}
continue;
}
return Err(UserError::InvalidDocumentSortableAttribute {
field: field.to_string(),

View File

@@ -18,6 +18,8 @@ use crate::{
pub struct Metadata {
/// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
pub searchable: Option<Weight>,
/// The field is part of the exact attributes.
pub exact: bool,
/// The field is part of the sortable attributes.
pub sortable: bool,
/// The field is defined as the distinct attribute.
@@ -209,6 +211,7 @@ impl Metadata {
#[derive(Debug, Clone)]
pub struct MetadataBuilder {
searchable_attributes: Option<Vec<String>>,
exact_searchable_attributes: Vec<String>,
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_attributes: HashSet<String>,
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
@@ -220,15 +223,18 @@ impl MetadataBuilder {
pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
let searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(|s| s.to_string()).collect());
.map(|fields| fields.into_iter().map(String::from).collect());
let exact_searchable_attributes =
index.exact_attributes(rtxn)?.into_iter().map(String::from).collect();
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
let sortable_attributes = index.sortable_fields(rtxn)?;
let localized_attributes = index.localized_attributes_rules(rtxn)?;
let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
let distinct_attribute = index.distinct_field(rtxn)?.map(String::from);
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
Ok(Self::new(
searchable_attributes,
exact_searchable_attributes,
filterable_attributes,
sortable_attributes,
localized_attributes,
@@ -242,6 +248,7 @@ impl MetadataBuilder {
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
pub fn new(
searchable_attributes: Option<Vec<String>>,
exact_searchable_attributes: Vec<String>,
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_attributes: HashSet<String>,
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
@@ -256,6 +263,7 @@ impl MetadataBuilder {
Self {
searchable_attributes,
exact_searchable_attributes,
filterable_attributes,
sortable_attributes,
localized_attributes,
@@ -269,6 +277,7 @@ impl MetadataBuilder {
// Vectors fields are not searchable, filterable, distinct or asc_desc
return Metadata {
searchable: None,
exact: false,
sortable: false,
distinct: false,
asc_desc: false,
@@ -296,6 +305,7 @@ impl MetadataBuilder {
// Geo fields are not searchable, distinct or asc_desc
return Metadata {
searchable: None,
exact: false,
sortable,
distinct: false,
asc_desc: false,
@@ -309,6 +319,7 @@ impl MetadataBuilder {
debug_assert!(!sortable, "geojson fields should not be sortable");
return Metadata {
searchable: None,
exact: false,
sortable,
distinct: false,
asc_desc: false,
@@ -329,6 +340,8 @@ impl MetadataBuilder {
None => Some(0),
};
let exact = self.exact_searchable_attributes.iter().any(|attr| is_faceted_by(field, attr));
let distinct =
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
let asc_desc = self.asc_desc_attributes.contains(field);
@@ -343,6 +356,7 @@ impl MetadataBuilder {
Metadata {
searchable,
exact,
sortable,
distinct,
asc_desc,

View File

@@ -281,6 +281,9 @@ impl Index {
&mut wtxn,
(constants::VERSION_MAJOR, constants::VERSION_MINOR, constants::VERSION_PATCH),
)?;
// The database before v1.29 defaulted to using arroy, so we
// need to set it explicitly because the new default is hannoy.
this.put_vector_store(&mut wtxn, VectorStoreBackend::Hannoy)?;
}
wtxn.commit()?;

View File

@@ -806,6 +806,10 @@ mod tests {
use crate::vector::db::IndexEmbeddingConfig;
use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError};
fn no_cancel() -> bool {
false
}
#[test]
fn simple_document_replacement() {
let index = TempIndex::new();
@@ -1985,7 +1989,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2038,7 +2042,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2057,7 +2061,7 @@ mod tests {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2127,7 +2131,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2146,7 +2150,7 @@ mod tests {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2317,7 +2321,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2333,7 +2337,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2381,7 +2385,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2397,7 +2401,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2436,7 +2440,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2452,7 +2456,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2490,7 +2494,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2506,7 +2510,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2546,7 +2550,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2562,7 +2566,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2607,7 +2611,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2623,7 +2627,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2661,7 +2665,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2677,7 +2681,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2715,7 +2719,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2731,7 +2735,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2927,7 +2931,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -2943,7 +2947,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -2988,7 +2992,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -3004,7 +3008,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)
@@ -3046,7 +3050,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
&no_cancel,
Progress::default(),
None,
)
@@ -3062,7 +3066,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&|| false,
&no_cancel,
&Progress::default(),
&Default::default(),
)

View File

@@ -8,17 +8,26 @@ use bumpalo::Bump;
use super::match_searchable_field;
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use crate::fields_ids_map::metadata::Metadata;
use crate::update::new::document::DocumentContext;
use crate::update::new::extract::cache::BalancedCaches;
use crate::update::new::extract::perm_json_p::contained_in;
use crate::update::new::extract::searchable::has_searchable_children;
use crate::update::new::indexer::document_changes::{
extract, DocumentChanges, Extractor, IndexingContext,
};
use crate::update::new::indexer::settings_changes::{
settings_change_extract, DocumentsIndentifiers, SettingsChangeExtractor,
};
use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::{bucketed_position, DocumentId, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE};
use crate::update::new::{DocumentChange, DocumentIdentifiers};
use crate::update::settings::SettingsDelta;
use crate::{
bucketed_position, DocumentId, FieldId, PatternMatch, Result, UserError,
MAX_POSITION_PER_ATTRIBUTE,
};
const MAX_COUNTED_WORDS: usize = 30;
@@ -34,6 +43,15 @@ pub struct WordDocidsBalancedCaches<'extractor> {
unsafe impl MostlySend for WordDocidsBalancedCaches<'_> {}
/// Whether to extract or skip fields during word extraction.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum FieldDbExtraction {
/// Extract the word and put it in to the fid-based databases.
Extract,
/// Do not store the word in the fid-based databases.
Skip,
}
impl<'extractor> WordDocidsBalancedCaches<'extractor> {
pub fn new_in(buckets: usize, max_memory: Option<usize>, alloc: &'extractor Bump) -> Self {
Self {
@@ -47,12 +65,14 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
}
}
#[allow(clippy::too_many_arguments)]
fn insert_add_u32(
&mut self,
field_id: FieldId,
position: u16,
word: &str,
exact: bool,
field_db_extraction: FieldDbExtraction,
docid: u32,
bump: &Bump,
) -> Result<()> {
@@ -66,11 +86,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
buffer.clear();
buffer.extend_from_slice(word_bytes);
buffer.push(0);
buffer.extend_from_slice(&field_id.to_be_bytes());
self.word_fid_docids.insert_add_u32(&buffer, docid)?;
if field_db_extraction == FieldDbExtraction::Extract {
buffer.clear();
buffer.extend_from_slice(word_bytes);
buffer.push(0);
buffer.extend_from_slice(&field_id.to_be_bytes());
self.word_fid_docids.insert_add_u32(&buffer, docid)?;
}
let position = bucketed_position(position);
buffer.clear();
@@ -83,21 +105,26 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
self.flush_fid_word_count(&mut buffer)?;
}
self.fid_word_count
.entry(field_id)
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
.or_insert((None, Some(1)));
if field_db_extraction == FieldDbExtraction::Extract {
self.fid_word_count
.entry(field_id)
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
.or_insert((None, Some(1)));
}
self.current_docid = Some(docid);
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn insert_del_u32(
&mut self,
field_id: FieldId,
position: u16,
word: &str,
exact: bool,
field_db_extraction: FieldDbExtraction,
docid: u32,
bump: &Bump,
) -> Result<()> {
@@ -111,11 +138,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
buffer.clear();
buffer.extend_from_slice(word_bytes);
buffer.push(0);
buffer.extend_from_slice(&field_id.to_be_bytes());
self.word_fid_docids.insert_del_u32(&buffer, docid)?;
if field_db_extraction == FieldDbExtraction::Extract {
buffer.clear();
buffer.extend_from_slice(word_bytes);
buffer.push(0);
buffer.extend_from_slice(&field_id.to_be_bytes());
self.word_fid_docids.insert_del_u32(&buffer, docid)?;
}
let position = bucketed_position(position);
buffer.clear();
@@ -128,10 +157,12 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
self.flush_fid_word_count(&mut buffer)?;
}
self.fid_word_count
.entry(field_id)
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
.or_insert((Some(1), None));
if field_db_extraction == FieldDbExtraction::Extract {
self.fid_word_count
.entry(field_id)
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
.or_insert((Some(1), None));
}
self.current_docid = Some(docid);
@@ -325,6 +356,24 @@ impl WordDocidsExtractors {
exact_attributes.iter().any(|attr| contained_in(fname, attr))
|| disabled_typos_terms.is_exact(word)
};
let mut should_tokenize = |field_name: &str| {
let Some((field_id, meta)) = new_fields_ids_map.id_with_metadata_or_insert(field_name)
else {
return Err(UserError::AttributeLimitReached.into());
};
let pattern_match = if meta.is_searchable() {
PatternMatch::Match
} else {
// TODO: should be a match on the field_name using `match_field_legacy` function,
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
PatternMatch::Parent
};
Ok((field_id, pattern_match))
};
match document_change {
DocumentChange::Deletion(inner) => {
let mut token_fn = |fname: &str, fid, pos, word: &str| {
@@ -333,13 +382,14 @@ impl WordDocidsExtractors {
pos,
word,
is_exact(fname, word),
FieldDbExtraction::Extract,
inner.docid(),
doc_alloc,
)
};
document_tokenizer.tokenize_document(
inner.current(rtxn, index, context.db_fields_ids_map)?,
new_fields_ids_map,
&mut should_tokenize,
&mut token_fn,
)?;
}
@@ -361,13 +411,14 @@ impl WordDocidsExtractors {
pos,
word,
is_exact(fname, word),
FieldDbExtraction::Extract,
inner.docid(),
doc_alloc,
)
};
document_tokenizer.tokenize_document(
inner.current(rtxn, index, context.db_fields_ids_map)?,
new_fields_ids_map,
&mut should_tokenize,
&mut token_fn,
)?;
@@ -377,13 +428,14 @@ impl WordDocidsExtractors {
pos,
word,
is_exact(fname, word),
FieldDbExtraction::Extract,
inner.docid(),
doc_alloc,
)
};
document_tokenizer.tokenize_document(
inner.merged(rtxn, index, context.db_fields_ids_map)?,
new_fields_ids_map,
&mut should_tokenize,
&mut token_fn,
)?;
}
@@ -394,13 +446,14 @@ impl WordDocidsExtractors {
pos,
word,
is_exact(fname, word),
FieldDbExtraction::Extract,
inner.docid(),
doc_alloc,
)
};
document_tokenizer.tokenize_document(
inner.inserted(),
new_fields_ids_map,
&mut should_tokenize,
&mut token_fn,
)?;
}
@@ -411,3 +464,292 @@ impl WordDocidsExtractors {
cached_sorter.flush_fid_word_count(&mut buffer)
}
}
pub struct WordDocidsSettingsExtractorsData<'a, SD> {
tokenizer: DocumentTokenizer<'a>,
max_memory_by_thread: Option<usize>,
buckets: usize,
settings_delta: &'a SD,
}
impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
for WordDocidsSettingsExtractorsData<'_, SD>
{
type Data = RefCell<Option<WordDocidsBalancedCaches<'extractor>>>;
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in(
self.buckets,
self.max_memory_by_thread,
extractor_alloc,
))))
}
fn process<'doc>(
&'doc self,
documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
context: &'doc DocumentContext<Self::Data>,
) -> crate::Result<()> {
for document in documents {
let document = document?;
SettingsChangeWordDocidsExtractors::extract_document_from_settings_change(
document,
context,
&self.tokenizer,
self.settings_delta,
)?;
}
Ok(())
}
}
pub struct SettingsChangeWordDocidsExtractors;
impl SettingsChangeWordDocidsExtractors {
pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
settings_delta: &SD,
documents: &'indexer DocumentsIndentifiers<'indexer>,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: IndexingStep,
) -> Result<WordDocidsCaches<'extractor>>
where
SD: SettingsDelta + Sync,
MSP: Fn() -> bool + Sync,
{
// Warning: this is duplicated code from extract_word_pair_proximity_docids.rs
// TODO we need to read the new AND old settings to support changing global parameters
let rtxn = indexing_context.index.read_txn()?;
let stop_words = indexing_context.index.stop_words(&rtxn)?;
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
let allowed_separators: Option<Vec<_>> =
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
let dictionary = indexing_context.index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let mut builder = tokenizer_builder(
stop_words.as_ref(),
allowed_separators.as_deref(),
dictionary.as_deref(),
);
let tokenizer = builder.build();
let localized_attributes_rules =
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
let document_tokenizer = DocumentTokenizer {
tokenizer: &tokenizer,
localized_attributes_rules: &localized_attributes_rules,
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
};
let extractor_data = WordDocidsSettingsExtractorsData {
tokenizer: document_tokenizer,
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
buckets: rayon::current_num_threads(),
settings_delta,
};
let datastore = ThreadLocal::new();
{
let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
let _entered = span.enter();
settings_change_extract(
documents,
&extractor_data,
indexing_context,
extractor_allocs,
&datastore,
step,
)?;
}
let mut merger = WordDocidsCaches::new();
for cache in datastore.into_iter().flat_map(RefCell::into_inner) {
merger.push(cache)?;
}
Ok(merger)
}
/// Extracts document words from a settings change.
fn extract_document_from_settings_change<SD: SettingsDelta>(
document: DocumentIdentifiers<'_>,
context: &DocumentContext<RefCell<Option<WordDocidsBalancedCaches>>>,
document_tokenizer: &DocumentTokenizer,
settings_delta: &SD,
) -> Result<()> {
let mut cached_sorter_ref = context.data.borrow_mut_or_yield();
let cached_sorter = cached_sorter_ref.as_mut().unwrap();
let doc_alloc = &context.doc_alloc;
let new_fields_ids_map = settings_delta.new_fields_ids_map();
let old_fields_ids_map = context.index.fields_ids_map_with_metadata(&context.rtxn)?;
let old_searchable = settings_delta.old_searchable_attributes().as_ref();
let new_searchable = settings_delta.new_searchable_attributes().as_ref();
let current_document = document.current(
&context.rtxn,
context.index,
old_fields_ids_map.as_fields_ids_map(),
)?;
#[derive(Debug, Clone, Copy, PartialEq)]
enum ActionToOperate {
ReindexAllFields,
// TODO improve by listing field prefixes
IndexAddedFields,
SkipDocument,
}
let mut action = ActionToOperate::SkipDocument;
// Here we do a preliminary check to determine the action to take.
// This check doesn't trigger the tokenizer as we never return
// PatternMatch::Match.
document_tokenizer.tokenize_document(
current_document,
&mut |field_name| {
let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
// If the document must be reindexed, early return NoMatch to stop the scanning process.
if action == ActionToOperate::ReindexAllFields {
return Ok((fid, PatternMatch::NoMatch));
}
let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
action = match (old_field_metadata, new_field_metadata) {
// At least one field is added or removed from the exact fields => ReindexAllFields
(Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. })
if old_exact != new_exact =>
{
ActionToOperate::ReindexAllFields
}
// At least one field is removed from the searchable fields => ReindexAllFields
(Metadata { searchable: Some(_), .. }, Metadata { searchable: None, .. }) => {
ActionToOperate::ReindexAllFields
}
// At least one field is added in the searchable fields => IndexAddedFields
(Metadata { searchable: None, .. }, Metadata { searchable: Some(_), .. }) => {
// We can safely overwrite the action, because we early return when action is ReindexAllFields.
ActionToOperate::IndexAddedFields
}
_ => action,
};
Ok((fid, PatternMatch::Parent))
},
&mut |_, _, _, _| Ok(()),
)?;
// Early return when we don't need to index the document
if action == ActionToOperate::SkipDocument {
return Ok(());
}
let mut should_tokenize = |field_name: &str| {
let field_id = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
let pattern_match = match action {
ActionToOperate::ReindexAllFields => {
if old_field_metadata.is_searchable() || new_field_metadata.is_searchable() {
PatternMatch::Match
// If any old or new field is searchable then we need to iterate over all fields
// else if any field matches we need to iterate over all fields
} else if has_searchable_children(
field_name,
old_searchable.zip(new_searchable).map(|(old, new)| old.iter().chain(new)),
) {
PatternMatch::Parent
} else {
PatternMatch::NoMatch
}
}
ActionToOperate::IndexAddedFields => {
// Was not searchable but now is
if !old_field_metadata.is_searchable() && new_field_metadata.is_searchable() {
PatternMatch::Match
// If the field is now a parent of a searchable field
} else if has_searchable_children(field_name, new_searchable) {
PatternMatch::Parent
} else {
PatternMatch::NoMatch
}
}
ActionToOperate::SkipDocument => unreachable!(),
};
Ok((field_id, pattern_match))
};
let old_disabled_typos_terms = settings_delta.old_disabled_typos_terms();
let new_disabled_typos_terms = settings_delta.new_disabled_typos_terms();
let mut token_fn = |_field_name: &str, field_id, pos, word: &str| {
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
match (old_field_metadata, new_field_metadata) {
(
Metadata { searchable: Some(_), exact: old_exact, .. },
Metadata { searchable: None, .. },
) => cached_sorter.insert_del_u32(
field_id,
pos,
word,
old_exact || old_disabled_typos_terms.is_exact(word),
// We deleted the field globally
FieldDbExtraction::Skip,
document.docid(),
doc_alloc,
),
(
Metadata { searchable: None, .. },
Metadata { searchable: Some(_), exact: new_exact, .. },
) => cached_sorter.insert_add_u32(
field_id,
pos,
word,
new_exact || new_disabled_typos_terms.is_exact(word),
FieldDbExtraction::Extract,
document.docid(),
doc_alloc,
),
(Metadata { searchable: None, .. }, Metadata { searchable: None, .. }) => {
unreachable!()
}
(Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. }) => {
cached_sorter.insert_del_u32(
field_id,
pos,
word,
old_exact || old_disabled_typos_terms.is_exact(word),
// The field has already been extracted
FieldDbExtraction::Skip,
document.docid(),
doc_alloc,
)?;
cached_sorter.insert_add_u32(
field_id,
pos,
word,
new_exact || new_disabled_typos_terms.is_exact(word),
// The field has already been extracted
FieldDbExtraction::Skip,
document.docid(),
doc_alloc,
)
}
}
};
// TODO we must tokenize twice when we change global parameters like stop words,
// the language settings, dictionary, separators, non-separators...
document_tokenizer.tokenize_document(
current_document,
&mut should_tokenize,
&mut token_fn,
)?;
Ok(())
}
}

View File

@@ -6,17 +6,24 @@ use bumpalo::Bump;
use super::match_searchable_field;
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use crate::fields_ids_map::metadata::Metadata;
use crate::proximity::ProximityPrecision::*;
use crate::proximity::{index_proximity, MAX_DISTANCE};
use crate::update::new::document::{Document, DocumentContext};
use crate::update::new::extract::cache::BalancedCaches;
use crate::update::new::indexer::document_changes::{
extract, DocumentChanges, Extractor, IndexingContext,
};
use crate::update::new::indexer::settings_change_extract;
use crate::update::new::indexer::settings_changes::{
DocumentsIndentifiers, SettingsChangeExtractor,
};
use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::{FieldId, GlobalFieldsIdsMap, Result, MAX_POSITION_PER_ATTRIBUTE};
use crate::update::new::{DocumentChange, DocumentIdentifiers};
use crate::update::settings::SettingsDelta;
use crate::{FieldId, PatternMatch, Result, UserError, MAX_POSITION_PER_ATTRIBUTE};
pub struct WordPairProximityDocidsExtractorData<'a> {
tokenizer: DocumentTokenizer<'a>,
@@ -116,7 +123,7 @@ impl WordPairProximityDocidsExtractor {
// and to store the docids of the documents that have a number of words in a given field
// equal to or under than MAX_COUNTED_WORDS.
fn extract_document_change(
context: &DocumentContext<RefCell<BalancedCaches>>,
context: &DocumentContext<RefCell<BalancedCaches<'_>>>,
document_tokenizer: &DocumentTokenizer,
searchable_attributes: Option<&[&str]>,
document_change: DocumentChange,
@@ -147,8 +154,12 @@ impl WordPairProximityDocidsExtractor {
process_document_tokens(
document,
document_tokenizer,
new_fields_ids_map,
&mut word_positions,
&mut |field_name| {
new_fields_ids_map
.id_with_metadata_or_insert(field_name)
.ok_or(UserError::AttributeLimitReached.into())
},
&mut |(w1, w2), prox| {
del_word_pair_proximity.push(((w1, w2), prox));
},
@@ -170,8 +181,12 @@ impl WordPairProximityDocidsExtractor {
process_document_tokens(
document,
document_tokenizer,
new_fields_ids_map,
&mut word_positions,
&mut |field_name| {
new_fields_ids_map
.id_with_metadata_or_insert(field_name)
.ok_or(UserError::AttributeLimitReached.into())
},
&mut |(w1, w2), prox| {
del_word_pair_proximity.push(((w1, w2), prox));
},
@@ -180,8 +195,12 @@ impl WordPairProximityDocidsExtractor {
process_document_tokens(
document,
document_tokenizer,
new_fields_ids_map,
&mut word_positions,
&mut |field_name| {
new_fields_ids_map
.id_with_metadata_or_insert(field_name)
.ok_or(UserError::AttributeLimitReached.into())
},
&mut |(w1, w2), prox| {
add_word_pair_proximity.push(((w1, w2), prox));
},
@@ -192,8 +211,12 @@ impl WordPairProximityDocidsExtractor {
process_document_tokens(
document,
document_tokenizer,
new_fields_ids_map,
&mut word_positions,
&mut |field_name| {
new_fields_ids_map
.id_with_metadata_or_insert(field_name)
.ok_or(UserError::AttributeLimitReached.into())
},
&mut |(w1, w2), prox| {
add_word_pair_proximity.push(((w1, w2), prox));
},
@@ -257,8 +280,8 @@ fn drain_word_positions(
fn process_document_tokens<'doc>(
document: impl Document<'doc>,
document_tokenizer: &DocumentTokenizer,
fields_ids_map: &mut GlobalFieldsIdsMap,
word_positions: &mut VecDeque<(Rc<str>, u16)>,
field_id_and_metadata: &mut impl FnMut(&str) -> Result<(FieldId, Metadata)>,
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
) -> Result<()> {
let mut field_id = None;
@@ -279,8 +302,248 @@ fn process_document_tokens<'doc>(
word_positions.push_back((Rc::from(word), pos));
Ok(())
};
document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?;
let mut should_tokenize = |field_name: &str| {
let (field_id, meta) = field_id_and_metadata(field_name)?;
let pattern_match = if meta.is_searchable() {
PatternMatch::Match
} else {
// TODO: should be a match on the field_name using `match_field_legacy` function,
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
PatternMatch::Parent
};
Ok((field_id, pattern_match))
};
document_tokenizer.tokenize_document(document, &mut should_tokenize, &mut token_fn)?;
drain_word_positions(word_positions, word_pair_proximity);
Ok(())
}
pub struct WordPairProximityDocidsSettingsExtractorsData<'a, SD> {
tokenizer: DocumentTokenizer<'a>,
max_memory_by_thread: Option<usize>,
buckets: usize,
settings_delta: &'a SD,
}
impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
for WordPairProximityDocidsSettingsExtractorsData<'_, SD>
{
type Data = RefCell<BalancedCaches<'extractor>>;
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
Ok(RefCell::new(BalancedCaches::new_in(
self.buckets,
self.max_memory_by_thread,
extractor_alloc,
)))
}
fn process<'doc>(
&'doc self,
documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
context: &'doc DocumentContext<Self::Data>,
) -> crate::Result<()> {
for document in documents {
let document = document?;
SettingsChangeWordPairProximityDocidsExtractors::extract_document_from_settings_change(
document,
context,
&self.tokenizer,
self.settings_delta,
)?;
}
Ok(())
}
}
pub struct SettingsChangeWordPairProximityDocidsExtractors;
impl SettingsChangeWordPairProximityDocidsExtractors {
pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
settings_delta: &SD,
documents: &'indexer DocumentsIndentifiers<'indexer>,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>>
where
SD: SettingsDelta + Sync,
MSP: Fn() -> bool + Sync,
{
// Warning: this is duplicated code from extract_word_docids.rs
let rtxn = indexing_context.index.read_txn()?;
let stop_words = indexing_context.index.stop_words(&rtxn)?;
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
let allowed_separators: Option<Vec<_>> =
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
let dictionary = indexing_context.index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let mut builder = tokenizer_builder(
stop_words.as_ref(),
allowed_separators.as_deref(),
dictionary.as_deref(),
);
let tokenizer = builder.build();
let localized_attributes_rules =
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
let document_tokenizer = DocumentTokenizer {
tokenizer: &tokenizer,
localized_attributes_rules: &localized_attributes_rules,
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
};
let extractor_data = WordPairProximityDocidsSettingsExtractorsData {
tokenizer: document_tokenizer,
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
buckets: rayon::current_num_threads(),
settings_delta,
};
let datastore = ThreadLocal::new();
{
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids_extraction");
let _entered = span.enter();
settings_change_extract(
documents,
&extractor_data,
indexing_context,
extractor_allocs,
&datastore,
step,
)?;
}
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
}
/// Extracts document words from a settings change.
fn extract_document_from_settings_change<SD: SettingsDelta>(
document: DocumentIdentifiers<'_>,
context: &DocumentContext<RefCell<BalancedCaches<'_>>>,
document_tokenizer: &DocumentTokenizer,
settings_delta: &SD,
) -> Result<()> {
let mut cached_sorter = context.data.borrow_mut_or_yield();
let doc_alloc = &context.doc_alloc;
let new_fields_ids_map = settings_delta.new_fields_ids_map();
let old_fields_ids_map = settings_delta.old_fields_ids_map();
let old_proximity_precision = *settings_delta.old_proximity_precision();
let new_proximity_precision = *settings_delta.new_proximity_precision();
let current_document = document.current(
&context.rtxn,
context.index,
old_fields_ids_map.as_fields_ids_map(),
)?;
#[derive(Debug, Clone, Copy, PartialEq)]
enum ActionToOperate {
ReindexAllFields,
SkipDocument,
}
// TODO prefix_fid delete_old_fid_based_databases
let mut action = match (old_proximity_precision, new_proximity_precision) {
(ByAttribute, ByWord) => ActionToOperate::ReindexAllFields,
(_, _) => ActionToOperate::SkipDocument,
};
// Here we do a preliminary check to determine the action to take.
// This check doesn't trigger the tokenizer as we never return
// PatternMatch::Match.
if action != ActionToOperate::ReindexAllFields {
document_tokenizer.tokenize_document(
current_document,
&mut |field_name| {
let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
// If the document must be reindexed, early return NoMatch to stop the scanning process.
if action == ActionToOperate::ReindexAllFields {
return Ok((fid, PatternMatch::NoMatch));
}
let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
action = match (old_field_metadata, new_field_metadata) {
// At least one field is removed or added from the searchable fields
(
Metadata { searchable: Some(_), .. },
Metadata { searchable: None, .. },
)
| (
Metadata { searchable: None, .. },
Metadata { searchable: Some(_), .. },
) => ActionToOperate::ReindexAllFields,
_ => action,
};
Ok((fid, PatternMatch::Parent))
},
&mut |_, _, _, _| Ok(()),
)?;
}
// Early return when we don't need to index the document
if action == ActionToOperate::SkipDocument {
return Ok(());
}
let mut del_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
let mut add_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
// is a vecdequeue, and will be smol, so can stay on the heap for now
let mut word_positions: VecDeque<(Rc<str>, u16)> =
VecDeque::with_capacity(MAX_DISTANCE as usize);
process_document_tokens(
current_document,
// TODO Tokenize must be based on old settings
document_tokenizer,
&mut word_positions,
&mut |field_name| {
Ok(old_fields_ids_map.id_with_metadata(field_name).expect("All fields must exist"))
},
&mut |(w1, w2), prox| {
del_word_pair_proximity.push(((w1, w2), prox));
},
)?;
process_document_tokens(
current_document,
// TODO Tokenize must be based on new settings
document_tokenizer,
&mut word_positions,
&mut |field_name| {
Ok(new_fields_ids_map.id_with_metadata(field_name).expect("All fields must exist"))
},
&mut |(w1, w2), prox| {
add_word_pair_proximity.push(((w1, w2), prox));
},
)?;
let mut key_buffer = bumpalo::collections::Vec::new_in(doc_alloc);
del_word_pair_proximity.sort_unstable();
del_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
for ((w1, w2), prox) in del_word_pair_proximity.iter() {
let key = build_key(*prox, w1, w2, &mut key_buffer);
cached_sorter.insert_del_u32(key, document.docid())?;
}
add_word_pair_proximity.sort_unstable();
add_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
for ((w1, w2), prox) in add_word_pair_proximity.iter() {
let key = build_key(*prox, w1, w2, &mut key_buffer);
cached_sorter.insert_add_u32(key, document.docid())?;
}
Ok(())
}
}

View File

@@ -2,8 +2,12 @@ mod extract_word_docids;
mod extract_word_pair_proximity_docids;
mod tokenize_document;
pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors};
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
pub use extract_word_docids::{
SettingsChangeWordDocidsExtractors, WordDocidsCaches, WordDocidsExtractors,
};
pub use extract_word_pair_proximity_docids::{
SettingsChangeWordPairProximityDocidsExtractors, WordPairProximityDocidsExtractor,
};
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
@@ -27,3 +31,17 @@ pub fn match_searchable_field(
selection
}
/// return `true` if the provided `field_name` is a parent of at least one of the fields contained in `searchable`,
/// or if `searchable` is `None`.
fn has_searchable_children<I, A>(field_name: &str, searchable: Option<I>) -> bool
where
I: IntoIterator<Item = A>,
A: AsRef<str>,
{
searchable.is_none_or(|fields| {
fields
.into_iter()
.any(|attr| match_field_legacy(attr.as_ref(), field_name) == PatternMatch::Parent)
})
}

View File

@@ -8,10 +8,7 @@ use crate::update::new::document::Document;
use crate::update::new::extract::perm_json_p::{
seek_leaf_values_in_array, seek_leaf_values_in_object, Depth,
};
use crate::{
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
MAX_WORD_LENGTH,
};
use crate::{FieldId, InternalError, LocalizedAttributesRule, Result, MAX_WORD_LENGTH};
// todo: should be crate::proximity::MAX_DISTANCE but it has been forgotten
const MAX_DISTANCE: u32 = 8;
@@ -26,26 +23,25 @@ impl DocumentTokenizer<'_> {
pub fn tokenize_document<'doc>(
&self,
document: impl Document<'doc>,
field_id_map: &mut GlobalFieldsIdsMap,
should_tokenize: &mut impl FnMut(&str) -> Result<(FieldId, PatternMatch)>,
token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
) -> Result<()> {
let mut field_position = HashMap::new();
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
let Some((field_id, meta)) = field_id_map.id_with_metadata_or_insert(field_name) else {
return Err(UserError::AttributeLimitReached.into());
};
if meta.is_searchable() {
self.tokenize_field(field_id, field_name, value, token_fn, &mut field_position)?;
}
// todo: should be a match on the field_name using `match_field_legacy` function,
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
Ok(PatternMatch::Match)
};
for entry in document.iter_top_level_fields() {
let (field_name, value) = entry?;
if let (_, PatternMatch::NoMatch) = should_tokenize(field_name)? {
continue;
}
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
let (fid, pattern_match) = should_tokenize(field_name)?;
if pattern_match == PatternMatch::Match {
self.tokenize_field(fid, field_name, value, token_fn, &mut field_position)?;
}
Ok(pattern_match)
};
// parse json.
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
Value::Object(object) => seek_leaf_values_in_object(
@@ -192,7 +188,7 @@ mod test {
use super::*;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::update::new::document::{DocumentFromVersions, Versions};
use crate::FieldsIdsMap;
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, UserError};
#[test]
fn test_tokenize_document() {
@@ -231,6 +227,7 @@ mod test {
Default::default(),
Default::default(),
Default::default(),
Default::default(),
None,
None,
Default::default(),
@@ -251,15 +248,19 @@ mod test {
let document = Versions::single(document);
let document = DocumentFromVersions::new(&document);
let mut should_tokenize = |field_name: &str| {
let Some(field_id) = global_fields_ids_map.id_or_insert(field_name) else {
return Err(UserError::AttributeLimitReached.into());
};
Ok((field_id, PatternMatch::Match))
};
document_tokenizer
.tokenize_document(
document,
&mut global_fields_ids_map,
&mut |_fname, fid, pos, word| {
words.insert([fid, pos], word.to_string());
Ok(())
},
)
.tokenize_document(document, &mut should_tokenize, &mut |_fname, fid, pos, word| {
words.insert([fid, pos], word.to_string());
Ok(())
})
.unwrap();
snapshot!(format!("{:#?}", words), @r###"

View File

@@ -1,5 +1,6 @@
use std::cell::RefCell;
use std::fmt::Debug;
use std::sync::RwLock;
use bumpalo::collections::Vec as BVec;
use bumpalo::Bump;
@@ -27,7 +28,10 @@ use crate::vector::extractor::{
use crate::vector::session::{EmbedSession, Input, Metadata, OnEmbed};
use crate::vector::settings::ReindexAction;
use crate::vector::{Embedding, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment};
use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};
use crate::{
DocumentId, FieldDistribution, GlobalFieldsIdsMap, InternalError, Result, ThreadPoolNoAbort,
UserError,
};
pub struct EmbeddingExtractor<'a, 'b> {
embedders: &'a RuntimeEmbedders,
@@ -321,6 +325,15 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
let old_embedders = self.settings_delta.old_embedders();
let unused_vectors_distribution = UnusedVectorsDistributionBump::new_in(&context.doc_alloc);
// We get a reference to the new and old fields ids maps but
// note that those are local versions where updates to them
// will not be reflected in the database. It's not an issue
// because new settings do not generate new fields.
let new_fields_ids_map = RwLock::new(self.settings_delta.new_fields_ids_map().clone());
let new_fields_ids_map = RefCell::new(GlobalFieldsIdsMap::new(&new_fields_ids_map));
let old_fields_ids_map = RwLock::new(self.settings_delta.old_fields_ids_map().clone());
let old_fields_ids_map = RefCell::new(GlobalFieldsIdsMap::new(&old_fields_ids_map));
let mut all_chunks = BVec::with_capacity_in(embedders.len(), &context.doc_alloc);
let embedder_configs = context.index.embedding_configs();
for (embedder_name, action) in self.settings_delta.embedder_actions().iter() {
@@ -396,6 +409,7 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
if !must_regenerate {
continue;
}
// we need to regenerate the prompts for the document
chunks.settings_change_autogenerated(
document.docid(),
@@ -406,7 +420,8 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
context.db_fields_ids_map,
)?,
self.settings_delta,
context.new_fields_ids_map,
&old_fields_ids_map,
&new_fields_ids_map,
&unused_vectors_distribution,
old_is_user_provided,
fragments_changed,
@@ -442,7 +457,8 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
context.db_fields_ids_map,
)?,
self.settings_delta,
context.new_fields_ids_map,
&old_fields_ids_map,
&new_fields_ids_map,
&unused_vectors_distribution,
old_is_user_provided,
true,
@@ -638,7 +654,8 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
external_docid: &'a str,
document: D,
settings_delta: &SD,
fields_ids_map: &'a RefCell<crate::GlobalFieldsIdsMap>,
old_fields_ids_map: &'a RefCell<GlobalFieldsIdsMap<'a>>,
new_fields_ids_map: &'a RefCell<GlobalFieldsIdsMap<'a>>,
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
old_is_user_provided: bool,
full_reindex: bool,
@@ -733,10 +750,17 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template)
};
let extractor =
DocumentTemplateExtractor::new(document_template, doc_alloc, fields_ids_map);
let extractor = DocumentTemplateExtractor::new(
document_template,
doc_alloc,
new_fields_ids_map,
);
let old_extractor = old_document_template.map(|old_document_template| {
DocumentTemplateExtractor::new(old_document_template, doc_alloc, fields_ids_map)
DocumentTemplateExtractor::new(
old_document_template,
doc_alloc,
old_fields_ids_map,
)
});
let metadata =
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };

View File

@@ -372,11 +372,10 @@ where
SD: SettingsDelta + Sync,
{
// Create the list of document ids to extract
let rtxn = indexing_context.index.read_txn()?;
let all_document_ids =
indexing_context.index.documents_ids(&rtxn)?.into_iter().collect::<Vec<_>>();
let primary_key =
primary_key_from_db(indexing_context.index, &rtxn, &indexing_context.db_fields_ids_map)?;
let index = indexing_context.index;
let rtxn = index.read_txn()?;
let all_document_ids = index.documents_ids(&rtxn)?.into_iter().collect::<Vec<_>>();
let primary_key = primary_key_from_db(index, &rtxn, &indexing_context.db_fields_ids_map)?;
let documents = DocumentsIndentifiers::new(&all_document_ids, primary_key);
let span =
@@ -391,6 +390,133 @@ where
extractor_allocs,
)?;
{
let WordDocidsCaches {
word_docids,
word_fid_docids,
exact_word_docids,
word_position_docids,
fid_word_count_docids,
} = {
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
let _entered = span.enter();
SettingsChangeWordDocidsExtractors::run_extraction(
settings_delta,
&documents,
indexing_context,
extractor_allocs,
IndexingStep::ExtractingWords,
)?
};
indexing_context.progress.update_progress(IndexingStep::MergingWordCaches);
{
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::WordDocids);
merge_and_send_docids(
word_docids,
index.word_docids.remap_types(),
index,
extractor_sender.docids::<WordDocids>(),
&indexing_context.must_stop_processing,
)?;
}
{
let span =
tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::WordFieldIdDocids);
merge_and_send_docids(
word_fid_docids,
index.word_fid_docids.remap_types(),
index,
extractor_sender.docids::<WordFidDocids>(),
&indexing_context.must_stop_processing,
)?;
}
{
let span =
tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::ExactWordDocids);
merge_and_send_docids(
exact_word_docids,
index.exact_word_docids.remap_types(),
index,
extractor_sender.docids::<ExactWordDocids>(),
&indexing_context.must_stop_processing,
)?;
}
{
let span =
tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::WordPositionDocids);
merge_and_send_docids(
word_position_docids,
index.word_position_docids.remap_types(),
index,
extractor_sender.docids::<WordPositionDocids>(),
&indexing_context.must_stop_processing,
)?;
}
{
let span =
tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::FieldIdWordCountDocids);
merge_and_send_docids(
fid_word_count_docids,
index.field_id_word_count_docids.remap_types(),
index,
extractor_sender.docids::<FidWordCountDocids>(),
&indexing_context.must_stop_processing,
)?;
}
}
// Run the proximity extraction only if the precision is ByWord.
let new_proximity_precision = settings_delta.new_proximity_precision();
if *new_proximity_precision == ProximityPrecision::ByWord {
let caches = {
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
let _entered = span.enter();
SettingsChangeWordPairProximityDocidsExtractors::run_extraction(
settings_delta,
&documents,
indexing_context,
extractor_allocs,
IndexingStep::ExtractingWordProximity,
)?
};
{
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(IndexingStep::MergingWordProximity);
merge_and_send_docids(
caches,
index.word_pair_proximity_docids.remap_types(),
index,
extractor_sender.docids::<WordPairProximityDocids>(),
&indexing_context.must_stop_processing,
)?;
}
}
'vectors: {
if settings_delta.embedder_actions().is_empty() {
break 'vectors;

View File

@@ -1,4 +1,4 @@
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use std::sync::atomic::AtomicBool;
use std::sync::{Arc, Once, RwLock};
use std::thread::{self, Builder};
@@ -8,9 +8,11 @@ use document_changes::{DocumentChanges, IndexingContext};
pub use document_deletion::DocumentDeletion;
pub use document_operation::{DocumentOperation, PayloadStats};
use hashbrown::HashMap;
use heed::{RoTxn, RwTxn};
use heed::types::DecodeIgnore;
use heed::{BytesDecode, Database, RoTxn, RwTxn};
pub use partial_dump::PartialDump;
pub use post_processing::recompute_word_fst_from_word_docids_database;
pub use settings_changes::settings_change_extract;
pub use update_by_function::UpdateByFunction;
pub use write::ChannelCongestion;
use write::{build_vectors, update_index, write_to_db};
@@ -20,12 +22,18 @@ use super::steps::IndexingStep;
use super::thread_local::ThreadLocal;
use crate::documents::PrimaryKey;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::heed_codec::StrBEU16Codec;
use crate::progress::{EmbedderStats, Progress};
use crate::proximity::ProximityPrecision;
use crate::update::new::steps::SettingsIndexerStep;
use crate::update::new::FacetFieldIdsDelta;
use crate::update::settings::SettingsDelta;
use crate::update::GrenadParameters;
use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments};
use crate::vector::{Embedder, RuntimeEmbedders, VectorStore};
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort};
use crate::{
Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
};
#[cfg(not(feature = "enterprise"))]
pub mod community_edition;
@@ -242,6 +250,20 @@ where
SD: SettingsDelta + Sync,
{
delete_old_embedders_and_fragments(wtxn, index, settings_delta)?;
delete_old_fid_based_databases(wtxn, index, settings_delta, must_stop_processing, progress)?;
// Clear word_pair_proximity if byWord to byAttribute
let old_proximity_precision = settings_delta.old_proximity_precision();
let new_proximity_precision = settings_delta.new_proximity_precision();
if *old_proximity_precision == ProximityPrecision::ByWord
&& *new_proximity_precision == ProximityPrecision::ByAttribute
{
index.word_pair_proximity_docids.clear(wtxn)?;
}
// TODO delete useless searchable databases
// - Clear fid_prefix_* in the post processing
// - clear the prefix + fid_prefix if setting `PrefixSearch` is enabled
let mut bbbuffers = Vec::new();
let finished_extraction = AtomicBool::new(false);
@@ -300,6 +322,8 @@ where
.unwrap()
})?;
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
let new_embedders = settings_delta.new_embedders();
let embedder_actions = settings_delta.embedder_actions();
let index_embedder_category_ids = settings_delta.new_embedder_category_id();
@@ -334,6 +358,18 @@ where
})
.unwrap()?;
pool.install(|| {
// WARN When implementing the facets don't forget this
let facet_field_ids_delta = FacetFieldIdsDelta::new(0, 0);
post_processing::post_process(
indexing_context,
wtxn,
global_fields_ids_map,
facet_field_ids_delta,
)
})
.unwrap()?;
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
index.cellulite.build(
wtxn,
@@ -463,6 +499,106 @@ where
Ok(())
}
/// Deletes entries refering the provided
/// fids from the fid-based databases.
fn delete_old_fid_based_databases<SD, MSP>(
wtxn: &mut RwTxn<'_>,
index: &Index,
settings_delta: &SD,
must_stop_processing: &MSP,
progress: &Progress,
) -> Result<()>
where
SD: SettingsDelta + Sync,
MSP: Fn() -> bool + Sync,
{
let fids_to_delete: Option<BTreeSet<_>> = {
let rtxn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let old_searchable_attributes = settings_delta.old_searchable_attributes().as_ref();
let new_searchable_attributes = settings_delta.new_searchable_attributes().as_ref();
old_searchable_attributes.zip(new_searchable_attributes).map(|(old, new)| {
old.iter()
// Ignore the field if it is not searchable anymore
// or if it was never referenced in any document
.filter_map(|name| if new.contains(name) { None } else { fields_ids_map.id(name) })
.collect()
})
};
let Some(fids_to_delete) = fids_to_delete else {
return Ok(());
};
progress.update_progress(SettingsIndexerStep::DeletingOldWordFidDocids);
delete_old_word_fid_docids(wtxn, index.word_fid_docids, must_stop_processing, &fids_to_delete)?;
progress.update_progress(SettingsIndexerStep::DeletingOldFidWordCountDocids);
delete_old_fid_word_count_docids(wtxn, index, must_stop_processing, &fids_to_delete)?;
progress.update_progress(SettingsIndexerStep::DeletingOldWordPrefixFidDocids);
delete_old_word_fid_docids(
wtxn,
index.word_prefix_fid_docids,
must_stop_processing,
&fids_to_delete,
)?;
Ok(())
}
fn delete_old_word_fid_docids<'txn, MSP, DC>(
wtxn: &mut RwTxn<'txn>,
database: Database<StrBEU16Codec, DC>,
must_stop_processing: &MSP,
fids_to_delete: &BTreeSet<u16>,
) -> Result<(), Error>
where
MSP: Fn() -> bool + Sync,
DC: BytesDecode<'txn>,
{
let mut iter = database.iter_mut(wtxn)?.remap_data_type::<DecodeIgnore>();
while let Some(((_word, fid), ())) = iter.next().transpose()? {
// TODO should I call it that often?
if must_stop_processing() {
return Err(Error::InternalError(InternalError::AbortedIndexation));
}
if fids_to_delete.contains(&fid) {
// safety: We don't keep any references to the data.
unsafe { iter.del_current()? };
}
}
Ok(())
}
fn delete_old_fid_word_count_docids<MSP>(
wtxn: &mut RwTxn<'_>,
index: &Index,
must_stop_processing: &MSP,
fids_to_delete: &BTreeSet<u16>,
) -> Result<(), Error>
where
MSP: Fn() -> bool + Sync,
{
let db = index.field_id_word_count_docids.remap_data_type::<DecodeIgnore>();
for &fid_to_delete in fids_to_delete {
if must_stop_processing() {
return Err(Error::InternalError(InternalError::AbortedIndexation));
}
let mut iter = db.prefix_iter_mut(wtxn, &(fid_to_delete, 0))?;
while let Some(((fid, _word_count), ())) = iter.next().transpose()? {
debug_assert_eq!(fid, fid_to_delete);
// safety: We don't keep any references to the data.
unsafe { iter.del_current()? };
}
}
Ok(())
}
fn indexer_memory_settings(
current_num_threads: usize,
grenad_parameters: GrenadParameters,

View File

@@ -28,6 +28,9 @@ make_enum_progress! {
ChangingVectorStore,
UsingStableIndexer,
UsingExperimentalIndexer,
DeletingOldWordFidDocids,
DeletingOldFidWordCountDocids,
DeletingOldWordPrefixFidDocids,
}
}

View File

@@ -1589,33 +1589,33 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
// only use the new indexer when only the embedder possibly changed
if let Self {
searchable_fields: Setting::NotSet,
searchable_fields: _,
displayed_fields: Setting::NotSet,
filterable_fields: Setting::NotSet,
sortable_fields: Setting::NotSet,
criteria: Setting::NotSet,
stop_words: Setting::NotSet,
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
dictionary: Setting::NotSet,
stop_words: Setting::NotSet, // TODO (require force reindexing of searchables)
non_separator_tokens: Setting::NotSet, // TODO (require force reindexing of searchables)
separator_tokens: Setting::NotSet, // TODO (require force reindexing of searchables)
dictionary: Setting::NotSet, // TODO (require force reindexing of searchables)
distinct_field: Setting::NotSet,
synonyms: Setting::NotSet,
primary_key: Setting::NotSet,
authorize_typos: Setting::NotSet,
min_word_len_two_typos: Setting::NotSet,
min_word_len_one_typo: Setting::NotSet,
exact_words: Setting::NotSet,
exact_attributes: Setting::NotSet,
exact_words: Setting::NotSet, // TODO (require force reindexing of searchables)
exact_attributes: _,
max_values_per_facet: Setting::NotSet,
sort_facet_values_by: Setting::NotSet,
pagination_max_total_hits: Setting::NotSet,
proximity_precision: Setting::NotSet,
proximity_precision: _,
embedder_settings: _,
search_cutoff: Setting::NotSet,
localized_attributes_rules: Setting::NotSet,
prefix_search: Setting::NotSet,
localized_attributes_rules: Setting::NotSet, // TODO to start with
prefix_search: Setting::NotSet, // TODO continue with this
facet_search: Setting::NotSet,
disable_on_numbers: Setting::NotSet,
disable_on_numbers: Setting::NotSet, // TODO (require force reindexing of searchables)
chat: Setting::NotSet,
vector_store: Setting::NotSet,
wtxn: _,
@@ -1632,10 +1632,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
// Update index settings
let embedding_config_updates = self.update_embedding_configs()?;
self.update_user_defined_searchable_attributes()?;
self.update_exact_attributes()?;
self.update_proximity_precision()?;
let mut new_inner_settings =
InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
// Note that we don't need to update the searchables here,
// as it will be done after the settings update.
let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
let primary_key_id = self
.index
@@ -2062,9 +2064,12 @@ impl InnerIndexSettings {
let sortable_fields = index.sortable_fields(rtxn)?;
let asc_desc_fields = index.asc_desc_fields(rtxn)?;
let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
let user_defined_searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
let user_defined_searchable_attributes = match index.user_defined_searchable_fields(rtxn)? {
Some(fields) if fields.contains(&"*") => None,
Some(fields) => Some(fields.into_iter().map(|f| f.to_string()).collect()),
None => None,
};
let builder = MetadataBuilder::from_index(index, rtxn)?;
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
@@ -2578,8 +2583,20 @@ fn deserialize_sub_embedder(
/// Implement this trait for the settings delta type.
/// This is used in the new settings update flow and will allow to easily replace the old settings delta type: `InnerIndexSettingsDiff`.
pub trait SettingsDelta {
fn new_embedders(&self) -> &RuntimeEmbedders;
fn old_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
fn old_searchable_attributes(&self) -> &Option<Vec<String>>;
fn new_searchable_attributes(&self) -> &Option<Vec<String>>;
fn old_disabled_typos_terms(&self) -> &DisabledTyposTerms;
fn new_disabled_typos_terms(&self) -> &DisabledTyposTerms;
fn old_proximity_precision(&self) -> &ProximityPrecision;
fn new_proximity_precision(&self) -> &ProximityPrecision;
fn old_embedders(&self) -> &RuntimeEmbedders;
fn new_embedders(&self) -> &RuntimeEmbedders;
fn new_embedder_category_id(&self) -> &HashMap<String, u8>;
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction>;
fn try_for_each_fragment_diff<F, E>(
@@ -2589,7 +2606,6 @@ pub trait SettingsDelta {
) -> std::result::Result<(), E>
where
F: FnMut(FragmentDiff) -> std::result::Result<(), E>;
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
}
pub struct FragmentDiff<'a> {
@@ -2598,26 +2614,47 @@ pub struct FragmentDiff<'a> {
}
impl SettingsDelta for InnerIndexSettingsDiff {
fn new_embedders(&self) -> &RuntimeEmbedders {
&self.new.runtime_embedders
fn old_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
&self.old.fields_ids_map
}
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
&self.new.fields_ids_map
}
fn old_searchable_attributes(&self) -> &Option<Vec<String>> {
&self.old.user_defined_searchable_attributes
}
fn new_searchable_attributes(&self) -> &Option<Vec<String>> {
&self.new.user_defined_searchable_attributes
}
fn old_disabled_typos_terms(&self) -> &DisabledTyposTerms {
&self.old.disabled_typos_terms
}
fn new_disabled_typos_terms(&self) -> &DisabledTyposTerms {
&self.new.disabled_typos_terms
}
fn old_proximity_precision(&self) -> &ProximityPrecision {
&self.old.proximity_precision
}
fn new_proximity_precision(&self) -> &ProximityPrecision {
&self.new.proximity_precision
}
fn old_embedders(&self) -> &RuntimeEmbedders {
&self.old.runtime_embedders
}
fn new_embedders(&self) -> &RuntimeEmbedders {
&self.new.runtime_embedders
}
fn new_embedder_category_id(&self) -> &HashMap<String, u8> {
&self.new.embedder_category_id
}
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction> {
&self.embedding_config_updates
}
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
&self.new.fields_ids_map
}
fn try_for_each_fragment_diff<F, E>(
&self,
embedder_name: &str,

View File

@@ -14,28 +14,21 @@ fn set_and_reset_searchable_fields() {
let index = TempIndex::new();
// First we send 3 documents with ids from 1 to 3.
let mut wtxn = index.write_txn().unwrap();
index
.add_documents_using_wtxn(
&mut wtxn,
documents!([
{ "id": 1, "name": "kevin", "age": 23 },
{ "id": 2, "name": "kevina", "age": 21},
{ "id": 3, "name": "benoit", "age": 34 }
]),
)
.add_documents(documents!([
{ "id": 1, "name": "kevin", "age": 23 },
{ "id": 2, "name": "kevina", "age": 21},
{ "id": 3, "name": "benoit", "age": 34 }
]))
.unwrap();
// We change the searchable fields to be the "name" field only.
index
.update_settings_using_wtxn(&mut wtxn, |settings| {
.update_settings(|settings| {
settings.set_searchable_fields(vec!["name".into()]);
})
.unwrap();
wtxn.commit().unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 name |

View File

@@ -5,103 +5,36 @@ mod v1_15;
mod v1_16;
use heed::RwTxn;
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
use v1_14::Latest_V1_13_To_Latest_V1_14;
use v1_15::Latest_V1_14_To_Latest_V1_15;
use v1_16::Latest_V1_15_To_V1_16_0;
use v1_12::{FixFieldDistribution, RecomputeStats};
use v1_13::AddNewStats;
use v1_14::UpgradeArroyVersion;
use v1_15::RecomputeWordFst;
use v1_16::SwitchToMultimodal;
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use crate::progress::{Progress, VariableNameStep};
use crate::{Index, InternalError, Result};
trait UpgradeIndex {
/// Returns `true` if `upgrade` should be called when the index started with version `initial_version`.
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool;
/// Returns `true` if the index scheduler must regenerate its cached stats.
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
original: (u32, u32, u32),
progress: Progress,
) -> Result<bool>;
fn target_version(&self) -> (u32, u32, u32);
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool>;
/// Description of the upgrade for progress display purposes.
fn description(&self) -> &'static str;
}
const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
&V1_12_To_V1_12_3 {},
&V1_12_3_To_V1_13_0 {},
&V1_13_0_To_V1_13_1 {},
&V1_13_1_To_Latest_V1_13 {},
&Latest_V1_13_To_Latest_V1_14 {},
&Latest_V1_14_To_Latest_V1_15 {},
&Latest_V1_15_To_V1_16_0 {},
&ToTargetNoOp { target: (1, 18, 0) },
&ToTargetNoOp { target: (1, 19, 0) },
&ToTargetNoOp { target: (1, 20, 0) },
&ToTargetNoOp { target: (1, 21, 0) },
&ToTargetNoOp { target: (1, 22, 0) },
&ToTargetNoOp { target: (1, 23, 0) },
&ToTargetNoOp { target: (1, 24, 0) },
&ToTargetNoOp { target: (1, 25, 0) },
&ToTargetNoOp { target: (1, 26, 0) },
&ToTargetNoOp { target: (1, 27, 0) },
&ToTargetNoOp { target: (1, 28, 0) },
// This is the last upgrade function, it will be called when the index is up to date.
// any other upgrade function should be added before this one.
&ToCurrentNoOp {},
&FixFieldDistribution {},
&RecomputeStats {},
&AddNewStats {},
&UpgradeArroyVersion {},
&RecomputeWordFst {},
&SwitchToMultimodal {},
];
/// Causes a compile-time error if the argument is not in range of `0..UPGRADE_FUNCTIONS.len()`
macro_rules! function_index {
($start:expr) => {{
const _CHECK_INDEX: () = {
if $start >= $crate::update::upgrade::UPGRADE_FUNCTIONS.len() {
panic!("upgrade functions out of range")
}
};
$start
}};
}
const fn start(from: (u32, u32, u32)) -> Option<usize> {
let start = match from {
(1, 12, 0..=2) => function_index!(0),
(1, 12, 3..) => function_index!(1),
(1, 13, 0) => function_index!(2),
(1, 13, _) => function_index!(4),
(1, 14, _) => function_index!(5),
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
(1, 15, _) => function_index!(6),
(1, 16, _) | (1, 17, _) => function_index!(7),
(1, 18, _) => function_index!(8),
(1, 19, _) => function_index!(9),
(1, 20, _) => function_index!(10),
(1, 21, _) => function_index!(11),
(1, 22, _) => function_index!(12),
(1, 23, _) => function_index!(13),
(1, 24, _) => function_index!(14),
(1, 25, _) => function_index!(15),
(1, 26, _) => function_index!(16),
(1, 27, _) => function_index!(17),
(1, 28, _) => function_index!(18),
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
// considering dumpless upgrade.
(_major, _minor, _patch) => return None,
};
Some(start)
}
/// Causes a compile-time error if the latest package cannot be upgraded.
///
/// This serves as a reminder to consider the proper dumpless upgrade implementation when changing the package version.
const _CHECK_PACKAGE_CAN_UPGRADE: () = {
if start((VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)).is_none() {
panic!("cannot upgrade from latest package version")
}
};
/// Return true if the cached stats of the index must be regenerated
pub fn upgrade<MSP>(
wtxn: &mut RwTxn,
@@ -113,79 +46,34 @@ pub fn upgrade<MSP>(
where
MSP: Fn() -> bool + Sync,
{
let from = index.get_version(wtxn)?.unwrap_or(db_version);
let upgrade_functions = UPGRADE_FUNCTIONS;
let start =
start(from).ok_or_else(|| InternalError::CannotUpgradeToVersion(from.0, from.1, from.2))?;
let initial_version = index.get_version(wtxn)?.unwrap_or(db_version);
enum UpgradeVersion {}
let upgrade_path = &UPGRADE_FUNCTIONS[start..];
let mut current_version = from;
let mut regenerate_stats = false;
for (i, upgrade) in upgrade_path.iter().enumerate() {
for (i, upgrade) in upgrade_functions.iter().enumerate() {
if (must_stop_processing)() {
return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
}
let target = upgrade.target_version();
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
format!(
"Upgrading from v{}.{}.{} to v{}.{}.{}",
current_version.0,
current_version.1,
current_version.2,
target.0,
target.1,
target.2
),
i as u32,
upgrade_path.len() as u32,
));
regenerate_stats |= upgrade.upgrade(wtxn, index, from, progress.clone())?;
index.put_version(wtxn, target)?;
current_version = target;
if upgrade.must_upgrade(initial_version) {
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
upgrade.description(),
i as u32,
upgrade_functions.len() as u32,
));
} else {
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
"Skipping migration that must not be applied",
i as u32,
upgrade_functions.len() as u32,
));
}
}
index.put_version(wtxn, (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH))?;
Ok(regenerate_stats)
}
#[allow(non_camel_case_types)]
struct ToCurrentNoOp {}
impl UpgradeIndex for ToCurrentNoOp {
fn upgrade(
&self,
_wtxn: &mut RwTxn,
_index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
}
}
/// Perform no operation during the upgrade except changing to the specified target version.
#[allow(non_camel_case_types)]
struct ToTargetNoOp {
pub target: (u32, u32, u32),
}
impl UpgradeIndex for ToTargetNoOp {
fn upgrade(
&self,
_wtxn: &mut RwTxn,
_index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
self.target
}
}

View File

@@ -4,17 +4,10 @@ use super::UpgradeIndex;
use crate::progress::Progress;
use crate::{make_enum_progress, Index, Result};
#[allow(non_camel_case_types)]
pub(super) struct V1_12_To_V1_12_3 {}
pub(super) struct FixFieldDistribution {}
impl UpgradeIndex for V1_12_To_V1_12_3 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
impl UpgradeIndex for FixFieldDistribution {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
make_enum_progress! {
enum FieldDistribution {
RebuildingFieldDistribution,
@@ -25,27 +18,28 @@ impl UpgradeIndex for V1_12_To_V1_12_3 {
Ok(true)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 12, 3)
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 12, 3)
}
fn description(&self) -> &'static str {
"Recomputing field distribution which was wrong before v1.12.3"
}
}
#[allow(non_camel_case_types)]
pub(super) struct V1_12_3_To_V1_13_0 {}
pub(super) struct RecomputeStats {}
impl UpgradeIndex for V1_12_3_To_V1_13_0 {
fn upgrade(
&self,
_wtxn: &mut RwTxn,
_index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
impl UpgradeIndex for RecomputeStats {
fn upgrade(&self, _wtxn: &mut RwTxn, _index: &Index, _progress: Progress) -> Result<bool> {
// recompute the indexes stats
Ok(true)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 13, 0)
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 13, 0)
}
fn description(&self) -> &'static str {
"Recomputing stats"
}
}

View File

@@ -5,17 +5,10 @@ use crate::database_stats::DatabaseStats;
use crate::progress::Progress;
use crate::{make_enum_progress, Index, Result};
#[allow(non_camel_case_types)]
pub(super) struct V1_13_0_To_V1_13_1();
pub(super) struct AddNewStats();
impl UpgradeIndex for V1_13_0_To_V1_13_1 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
impl UpgradeIndex for AddNewStats {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
make_enum_progress! {
enum DocumentsStats {
CreatingDocumentsStats,
@@ -30,26 +23,11 @@ impl UpgradeIndex for V1_13_0_To_V1_13_1 {
Ok(true)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 13, 1)
}
}
#[allow(non_camel_case_types)]
pub(super) struct V1_13_1_To_Latest_V1_13();
impl UpgradeIndex for V1_13_1_To_Latest_V1_13 {
fn upgrade(
&self,
_wtxn: &mut RwTxn,
_index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 13, 3)
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 13, 1)
}
fn description(&self) -> &'static str {
"Computing newly introduced document stats"
}
}

View File

@@ -5,17 +5,10 @@ use super::UpgradeIndex;
use crate::progress::Progress;
use crate::{make_enum_progress, Index, Result};
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_13_To_Latest_V1_14();
pub(super) struct UpgradeArroyVersion();
impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
impl UpgradeIndex for UpgradeArroyVersion {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
make_enum_progress! {
enum VectorStore {
UpdateInternalVersions,
@@ -35,7 +28,11 @@ impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 14, 0)
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 14, 0)
}
fn description(&self) -> &'static str {
"Updating vector store with an internal version"
}
}

View File

@@ -7,25 +7,21 @@ use crate::progress::Progress;
use crate::update::new::indexer::recompute_word_fst_from_word_docids_database;
use crate::{Index, Result};
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_14_To_Latest_V1_15();
pub(super) struct RecomputeWordFst();
impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
impl UpgradeIndex for RecomputeWordFst {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
// Recompute the word FST from the word docids database.
recompute_word_fst_from_word_docids_database(index, wtxn, &progress)?;
Ok(false)
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 15, 0)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 15, 0)
fn description(&self) -> &'static str {
"Recomputing word FST from word docids database as it was wrong before v1.15.0"
}
}

View File

@@ -6,17 +6,10 @@ use crate::progress::Progress;
use crate::vector::db::{EmbedderInfo, EmbeddingStatus};
use crate::{Index, InternalError, Result};
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_15_To_V1_16_0();
pub(super) struct SwitchToMultimodal();
impl UpgradeIndex for Latest_V1_15_To_V1_16_0 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
impl UpgradeIndex for SwitchToMultimodal {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, _progress: Progress) -> Result<bool> {
let v1_15_indexing_configs = index
.main
.remap_types::<Str, SerdeJson<Vec<super::v1_15::IndexEmbeddingConfig>>>()
@@ -41,8 +34,11 @@ impl UpgradeIndex for Latest_V1_15_To_V1_16_0 {
Ok(false)
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 16, 0)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 16, 0)
fn description(&self) -> &'static str {
"Migrating the database for multimodal support"
}
}