mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-22 06:11:01 +00:00
Compare commits
87 Commits
snapshot-o
...
post-updat
Author | SHA1 | Date | |
---|---|---|---|
dfca4b6219 | |||
8d9eb2a7c4 | |||
44586e089d | |||
c8b7822d0d | |||
446b9c142c | |||
e755e25847 | |||
81419935f2 | |||
51acd7a381 | |||
3ec5b9d488 | |||
55adbac2dd | |||
fd7fbfa9eb | |||
3a93f88ba6 | |||
7c1c4f9c26 | |||
1f5412003d | |||
5da92a3d53 | |||
c4a8b84dc0 | |||
ffe3faeca7 | |||
0f07cfed14 | |||
326a728434 | |||
e4733dcd42 | |||
a500fa053c | |||
61db56f785 | |||
235556d699 | |||
a3a1065c16 | |||
b025f1bcf1 | |||
707d106a24 | |||
97d6726291 | |||
82fa571ef7 | |||
5d453e6049 | |||
9e7d7beb4a | |||
a225ab2637 | |||
94b43001db | |||
796a325972 | |||
1db550ec7f | |||
a10efedd2f | |||
55ec96d31a | |||
4249630791 | |||
418fa47963 | |||
0656a0d515 | |||
e36a8c50b9 | |||
08ff135ad6 | |||
f729864466 | |||
94ea263bef | |||
0e475cb5e6 | |||
62de70b73c | |||
7707fb18dd | |||
bb2e9419d3 | |||
cf68713145 | |||
811143cbe9 | |||
c670e9a39b | |||
65f1b13475 | |||
db7ce03763 | |||
7ed9adde29 | |||
9ce7ccfbe7 | |||
3deb1ef78f | |||
5820d822c8 | |||
637bea0370 | |||
fd079c6757 | |||
182e5d5632 | |||
82aee6a9af | |||
fca947219f | |||
fb7ae9f97f | |||
cd421fea1e | |||
1ad4235beb | |||
de6c7e551e | |||
c0fe70c5f0 | |||
a09d08c7b6 | |||
2e6aa63efc | |||
f9807ba32e | |||
8c8cc59a6c | |||
f540a69ac3 | |||
7df2bdfb15 | |||
71f7456748 | |||
c98b313d03 | |||
69678ed8e1 | |||
bf144a94d8 | |||
b0b1888ef9 | |||
6ec1d2b712 | |||
cbdf80893d | |||
e2156ddfc7 | |||
49dd50dab2 | |||
13a88d6131 | |||
d9875b782d | |||
cb16baab18 | |||
d3e4b2dfe7 | |||
d3cd5ea689 | |||
3ed43f9097 |
2
.github/workflows/check-valid-milestone.yml
vendored
2
.github/workflows/check-valid-milestone.yml
vendored
@ -17,7 +17,7 @@ jobs:
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Validate PR milestone
|
||||
uses: actions/github-script@v6
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
2
.github/workflows/db-change-comments.yml
vendored
2
.github/workflows/db-change-comments.yml
vendored
@ -44,7 +44,7 @@ jobs:
|
||||
if: github.event.label.name == 'db change'
|
||||
steps:
|
||||
- name: Add comment
|
||||
uses: actions/github-script@v6
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
2
.github/workflows/db-change-missing.yml
vendored
2
.github/workflows/db-change-missing.yml
vendored
@ -12,7 +12,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v3
|
||||
- name: Check db change labels
|
||||
id: check_labels
|
||||
run: |
|
||||
|
28
.github/workflows/sdks-tests.yml
vendored
28
.github/workflows/sdks-tests.yml
vendored
@ -22,7 +22,7 @@ jobs:
|
||||
outputs:
|
||||
docker-image: ${{ steps.define-image.outputs.docker-image }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
- name: Define the Docker image we need to use
|
||||
id: define-image
|
||||
run: |
|
||||
@ -46,7 +46,7 @@ jobs:
|
||||
MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-dotnet
|
||||
- name: Setup .NET Core
|
||||
@ -75,7 +75,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-dart
|
||||
- uses: dart-lang/setup-dart@v1
|
||||
@ -103,7 +103,7 @@ jobs:
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-go
|
||||
- name: Get dependencies
|
||||
@ -129,7 +129,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-java
|
||||
- name: Set up Java
|
||||
@ -156,7 +156,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
@ -191,7 +191,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-php
|
||||
- name: Install PHP
|
||||
@ -220,7 +220,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-python
|
||||
- name: Set up Python
|
||||
@ -245,7 +245,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-ruby
|
||||
- name: Set up Ruby 3
|
||||
@ -270,7 +270,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-rust
|
||||
- name: Build
|
||||
@ -291,7 +291,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-swift
|
||||
- name: Run tests
|
||||
@ -314,7 +314,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js-plugins
|
||||
- name: Setup node
|
||||
@ -345,7 +345,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-rails
|
||||
- name: Set up Ruby 3
|
||||
@ -369,7 +369,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-symfony
|
||||
- name: Install PHP
|
||||
|
14
.github/workflows/test-suite.yml
vendored
14
.github/workflows/test-suite.yml
vendored
@ -21,7 +21,7 @@ jobs:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
@ -29,7 +29,7 @@ jobs:
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -51,7 +51,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
@ -91,7 +91,7 @@ jobs:
|
||||
env:
|
||||
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install Ollama
|
||||
run: |
|
||||
curl -fsSL https://ollama.com/install.sh | sudo -E sh
|
||||
@ -155,7 +155,7 @@ jobs:
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -172,7 +172,7 @@ jobs:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -191,7 +191,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
146
Cargo.lock
generated
146
Cargo.lock
generated
@ -258,7 +258,7 @@ version = "0.7.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
]
|
||||
@ -271,7 +271,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"const-random",
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy",
|
||||
@ -790,22 +790,20 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "bzip2"
|
||||
version = "0.4.4"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
|
||||
checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
|
||||
dependencies = [
|
||||
"bzip2-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bzip2-sys"
|
||||
version = "0.1.11+1.0.8"
|
||||
version = "0.1.13+1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
|
||||
checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
@ -1143,7 +1141,7 @@ version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"once_cell",
|
||||
"tiny-keccak",
|
||||
]
|
||||
@ -1257,9 +1255,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.14"
|
||||
version = "0.5.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471"
|
||||
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
@ -2216,10 +2214,24 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
"libc",
|
||||
"wasi",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
"libc",
|
||||
"wasi 0.13.3+wasi-0.2.2",
|
||||
"wasm-bindgen",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.27.3"
|
||||
@ -2733,6 +2745,7 @@ dependencies = [
|
||||
"bincode",
|
||||
"bumpalo",
|
||||
"bumparaw-collections",
|
||||
"byte-unit",
|
||||
"convert_case 0.6.0",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
@ -2741,6 +2754,7 @@ dependencies = [
|
||||
"enum-iterator",
|
||||
"file-store",
|
||||
"flate2",
|
||||
"indexmap",
|
||||
"insta",
|
||||
"maplit",
|
||||
"meili-snap",
|
||||
@ -2923,10 +2937,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.69"
|
||||
version = "0.3.77"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
|
||||
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
@ -3518,6 +3533,17 @@ dependencies = [
|
||||
"crc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lzma-sys"
|
||||
version = "0.1.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "macro_rules_attribute"
|
||||
version = "0.2.0"
|
||||
@ -3656,7 +3682,7 @@ dependencies = [
|
||||
"uuid",
|
||||
"wiremock",
|
||||
"yaup",
|
||||
"zip 2.2.2",
|
||||
"zip 2.3.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3882,7 +3908,7 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
@ -3893,7 +3919,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"wasi",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
@ -4670,7 +4696,7 @@ version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -4762,7 +4788,7 @@ version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"redox_syscall 0.2.16",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
@ -4886,13 +4912,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.13"
|
||||
version = "0.17.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee"
|
||||
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"libc",
|
||||
"untrusted",
|
||||
"windows-sys 0.52.0",
|
||||
@ -5576,7 +5602,7 @@ checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
"windows-sys 0.52.0",
|
||||
@ -5751,7 +5777,7 @@ dependencies = [
|
||||
"aho-corasick",
|
||||
"derive_builder 0.12.0",
|
||||
"esaxx-rs",
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"itertools 0.12.1",
|
||||
"lazy_static",
|
||||
"log",
|
||||
@ -5775,9 +5801,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.42.0"
|
||||
version = "1.43.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551"
|
||||
checksum = "492a604e2fd7f814268a378409e6c92b5525d747d10db9a229723f55a417958c"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"bytes",
|
||||
@ -5793,9 +5819,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio-macros"
|
||||
version = "2.4.0"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
|
||||
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@ -6238,7 +6264,7 @@ version = "1.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.15",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@ -6335,24 +6361,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.92"
|
||||
name = "wasi"
|
||||
version = "0.13.3+wasi-0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
|
||||
checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
|
||||
dependencies = [
|
||||
"wit-bindgen-rt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
"rustversion",
|
||||
"wasm-bindgen-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.92"
|
||||
version = "0.2.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
|
||||
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"log",
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.87",
|
||||
@ -6373,9 +6409,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.92"
|
||||
version = "0.2.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
|
||||
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
@ -6383,9 +6419,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.92"
|
||||
version = "0.2.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
|
||||
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@ -6396,9 +6432,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.92"
|
||||
version = "0.2.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
|
||||
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-streams"
|
||||
@ -6803,6 +6842,15 @@ dependencies = [
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen-rt"
|
||||
version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
|
||||
dependencies = [
|
||||
"bitflags 2.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "write16"
|
||||
version = "1.0.0"
|
||||
@ -6858,6 +6906,15 @@ dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xz2"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
|
||||
dependencies = [
|
||||
"lzma-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yada"
|
||||
version = "0.5.1"
|
||||
@ -6999,9 +7056,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zip"
|
||||
version = "2.2.2"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae9c1ea7b3a5e1f4b922ff856a129881167511563dc219869afe3787fc0c1a45"
|
||||
checksum = "84e9a772a54b54236b9b744aaaf8d7be01b4d6e99725523cb82cb32d1c81b1d7"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"arbitrary",
|
||||
@ -7012,15 +7069,16 @@ dependencies = [
|
||||
"deflate64",
|
||||
"displaydoc",
|
||||
"flate2",
|
||||
"getrandom 0.3.1",
|
||||
"hmac",
|
||||
"indexmap",
|
||||
"lzma-rs",
|
||||
"memchr",
|
||||
"pbkdf2",
|
||||
"rand",
|
||||
"sha1",
|
||||
"thiserror 2.0.9",
|
||||
"time",
|
||||
"xz2",
|
||||
"zeroize",
|
||||
"zopfli",
|
||||
"zstd",
|
||||
|
@ -305,6 +305,7 @@ pub(crate) mod test {
|
||||
localized_attributes: Setting::NotSet,
|
||||
facet_search: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
execute_after_update: Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
};
|
||||
settings.check()
|
||||
@ -326,6 +327,7 @@ pub(crate) mod test {
|
||||
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
|
||||
progress_trace: Default::default(),
|
||||
write_channel_congestion: None,
|
||||
internal_database_sizes: Default::default(),
|
||||
},
|
||||
enqueued_at: Some(BatchEnqueuedAt {
|
||||
earliest: datetime!(2022-11-11 0:00 UTC),
|
||||
|
@ -397,6 +397,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
search_cutoff_ms: v6::Setting::NotSet,
|
||||
facet_search: v6::Setting::NotSet,
|
||||
prefix_search: v6::Setting::NotSet,
|
||||
execute_after_update: v6::Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
@ -13,6 +13,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
anyhow = "1.0.95"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.6.0"
|
||||
@ -22,6 +23,7 @@ dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.35"
|
||||
indexmap = "2.7.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.5"
|
||||
@ -45,7 +47,7 @@ uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
crossbeam-channel = "0.5.14"
|
||||
crossbeam-channel = "0.5.15"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
|
@ -344,6 +344,7 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
|
||||
let stats = BatchStats {
|
||||
progress_trace: Default::default(),
|
||||
internal_database_sizes: Default::default(),
|
||||
write_channel_congestion: None,
|
||||
..stats.clone()
|
||||
};
|
||||
|
@ -625,8 +625,8 @@ impl IndexScheduler {
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
|
||||
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40
|
||||
{
|
||||
return Err(Error::NoSpaceLeftInTaskQueue);
|
||||
|
@ -64,6 +64,13 @@ make_enum_progress! {
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum FinalizingIndexStep {
|
||||
Committing,
|
||||
ComputingStats,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum TaskCancelationProgress {
|
||||
RetrievingTasks,
|
||||
|
@ -292,8 +292,6 @@ impl Queue {
|
||||
return Ok(task);
|
||||
}
|
||||
|
||||
// Get rid of the mutability.
|
||||
let task = task;
|
||||
self.tasks.register(wtxn, &task)?;
|
||||
|
||||
Ok(task)
|
||||
|
@ -364,7 +364,7 @@ fn test_task_queue_is_full() {
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// Even the task deletion that doesn't delete anything shouldn't be accepted
|
||||
// Even the task deletion and cancelation that don't delete anything should be refused
|
||||
let result = index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() },
|
||||
@ -373,10 +373,39 @@ fn test_task_queue_is_full() {
|
||||
)
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
let result = index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskCancelation { query: S("test"), tasks: RoaringBitmap::new() },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// But a task deletion that delete something should works
|
||||
// But a task cancelation that cancel something should work
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskCancelation { query: S("test"), tasks: (0..100).collect() },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
// But we should still be forbidden from enqueuing new tasks
|
||||
let result = index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
|
||||
// And a task deletion that delete something should works
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() },
|
||||
|
@ -1,4 +1,5 @@
|
||||
use std::fmt;
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
@ -535,7 +536,11 @@ impl IndexScheduler {
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
||||
|
||||
if let Some(uuid) = task.content_uuid() {
|
||||
let content_size = self.queue.file_store.compute_size(uuid)?;
|
||||
let content_size = match self.queue.file_store.compute_size(uuid) {
|
||||
Ok(content_size) => content_size,
|
||||
Err(file_store::Error::IoError(err)) if err.kind() == ErrorKind::NotFound => 0,
|
||||
Err(otherwise) => return Err(otherwise.into()),
|
||||
};
|
||||
total_size = total_size.saturating_add(content_size);
|
||||
}
|
||||
|
||||
|
@ -20,10 +20,12 @@ use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use convert_case::{Case, Casing as _};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::Status;
|
||||
use process_batch::ProcessBatchInfo;
|
||||
use rayon::current_num_threads;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use roaring::RoaringBitmap;
|
||||
@ -223,16 +225,16 @@ impl IndexScheduler {
|
||||
let mut stop_scheduler_forever = false;
|
||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
let mut canceled = RoaringBitmap::new();
|
||||
let mut congestion = None;
|
||||
let mut process_batch_info = ProcessBatchInfo::default();
|
||||
|
||||
match res {
|
||||
Ok((tasks, cong)) => {
|
||||
Ok((tasks, info)) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
|
||||
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
||||
progress.update_progress(task_progress_obj);
|
||||
congestion = cong;
|
||||
process_batch_info = info;
|
||||
let mut success = 0;
|
||||
let mut failure = 0;
|
||||
let mut canceled_by = None;
|
||||
@ -350,6 +352,9 @@ impl IndexScheduler {
|
||||
// We must re-add the canceled task so they're part of the same batch.
|
||||
ids |= canceled;
|
||||
|
||||
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
|
||||
process_batch_info;
|
||||
|
||||
processing_batch.stats.progress_trace =
|
||||
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
|
||||
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
|
||||
@ -359,6 +364,33 @@ impl IndexScheduler {
|
||||
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
|
||||
congestion_info
|
||||
});
|
||||
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
|
||||
.iter()
|
||||
.flat_map(|(dbname, pre_size)| {
|
||||
post_commit_dabases_sizes
|
||||
.get(dbname)
|
||||
.map(|post_size| {
|
||||
use byte_unit::{Byte, UnitType::Binary};
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
|
||||
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
|
||||
let diff_size = post_size.abs_diff(*pre_size) as u64;
|
||||
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
|
||||
let sign = match post_size.cmp(pre_size) {
|
||||
Equal => return None,
|
||||
Greater => "+",
|
||||
Less => "-",
|
||||
};
|
||||
|
||||
Some((
|
||||
dbname.to_case(Case::Camel),
|
||||
format!("{post:#.2} ({sign}{diff:#.2})").into(),
|
||||
))
|
||||
})
|
||||
.into_iter()
|
||||
.flatten()
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(congestion) = congestion {
|
||||
tracing::debug!(
|
||||
|
@ -12,7 +12,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress,
|
||||
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
|
||||
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
||||
UpdateIndexProgress,
|
||||
};
|
||||
@ -22,6 +22,16 @@ use crate::utils::{
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ProcessBatchInfo {
|
||||
/// The write channel congestion. None when unavailable: settings update.
|
||||
pub congestion: Option<ChannelCongestion>,
|
||||
/// The sizes of the different databases before starting the indexation.
|
||||
pub pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
/// The sizes of the different databases after commiting the indexation.
|
||||
pub post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Apply the operation associated with the given batch.
|
||||
///
|
||||
@ -35,7 +45,7 @@ impl IndexScheduler {
|
||||
batch: Batch,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
progress: Progress,
|
||||
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
|
||||
@ -76,7 +86,7 @@ impl IndexScheduler {
|
||||
|
||||
canceled_tasks.push(task);
|
||||
|
||||
Ok((canceled_tasks, None))
|
||||
Ok((canceled_tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::TaskDeletions(mut tasks) => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
@ -115,14 +125,14 @@ impl IndexScheduler {
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Ok((tasks, None))
|
||||
}
|
||||
Batch::SnapshotCreation(tasks) => {
|
||||
self.process_snapshot(progress, tasks).map(|tasks| (tasks, None))
|
||||
}
|
||||
Batch::Dump(task) => {
|
||||
self.process_dump_creation(progress, task).map(|tasks| (tasks, None))
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::SnapshotCreation(tasks) => self
|
||||
.process_snapshot(progress, tasks)
|
||||
.map(|tasks| (tasks, ProcessBatchInfo::default())),
|
||||
Batch::Dump(task) => self
|
||||
.process_dump_creation(progress, task)
|
||||
.map(|tasks| (tasks, ProcessBatchInfo::default())),
|
||||
Batch::IndexOperation { op, must_create_index } => {
|
||||
let index_uid = op.index_uid().to_string();
|
||||
let index = if must_create_index {
|
||||
@ -139,10 +149,12 @@ impl IndexScheduler {
|
||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
|
||||
let (tasks, congestion) =
|
||||
self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
|
||||
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
|
||||
|
||||
{
|
||||
progress.update_progress(FinalizingIndexStep::Committing);
|
||||
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
||||
let _entered = span.enter();
|
||||
|
||||
@ -153,12 +165,15 @@ impl IndexScheduler {
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let mut post_commit_dabases_sizes = None;
|
||||
let res = || -> Result<()> {
|
||||
progress.update_progress(FinalizingIndexStep::ComputingStats);
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
|
||||
post_commit_dabases_sizes = Some(index.database_sizes(&index_rtxn)?);
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}();
|
||||
@ -171,7 +186,16 @@ impl IndexScheduler {
|
||||
),
|
||||
}
|
||||
|
||||
Ok((tasks, congestion))
|
||||
let info = ProcessBatchInfo {
|
||||
congestion,
|
||||
// In case we fail to the get post-commit sizes we decide
|
||||
// that nothing changed and use the pre-commit sizes.
|
||||
post_commit_dabases_sizes: post_commit_dabases_sizes
|
||||
.unwrap_or_else(|| pre_commit_dabases_sizes.clone()),
|
||||
pre_commit_dabases_sizes,
|
||||
};
|
||||
|
||||
Ok((tasks, info))
|
||||
}
|
||||
Batch::IndexCreation { index_uid, primary_key, task } => {
|
||||
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
|
||||
@ -239,7 +263,7 @@ impl IndexScheduler {
|
||||
),
|
||||
}
|
||||
|
||||
Ok((vec![task], None))
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
||||
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
|
||||
@ -273,7 +297,9 @@ impl IndexScheduler {
|
||||
};
|
||||
}
|
||||
|
||||
Ok((tasks, None))
|
||||
// Here we could also show that all the internal database sizes goes to 0
|
||||
// but it would mean opening the index and that's costly.
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexSwap { mut task } => {
|
||||
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
|
||||
@ -321,7 +347,7 @@ impl IndexScheduler {
|
||||
}
|
||||
wtxn.commit()?;
|
||||
task.status = Status::Succeeded;
|
||||
Ok((vec![task], None))
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::UpgradeDatabase { mut tasks } => {
|
||||
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
||||
@ -351,7 +377,7 @@ impl IndexScheduler {
|
||||
task.error = None;
|
||||
}
|
||||
|
||||
Ok((tasks, None))
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ impl IndexScheduler {
|
||||
index_wtxn: &mut RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
progress: Progress,
|
||||
progress: &Progress,
|
||||
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||
let indexer_alloc = Bump::new();
|
||||
let started_processing_at = std::time::Instant::now();
|
||||
@ -186,7 +186,7 @@ impl IndexScheduler {
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&progress,
|
||||
progress,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
|
||||
);
|
||||
@ -307,7 +307,7 @@ impl IndexScheduler {
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&progress,
|
||||
progress,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
@ -465,7 +465,7 @@ impl IndexScheduler {
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&progress,
|
||||
progress,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
@ -520,7 +520,7 @@ impl IndexScheduler {
|
||||
index_uid: index_uid.clone(),
|
||||
tasks: cleared_tasks,
|
||||
},
|
||||
progress.clone(),
|
||||
progress,
|
||||
)?;
|
||||
|
||||
let (settings_tasks, _congestion) = self.apply_index_operation(
|
||||
|
@ -41,7 +41,7 @@ impl IndexScheduler {
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
|
||||
let dst = temp_snapshot_dir.path().join("tasks");
|
||||
fs::create_dir_all(&dst)?;
|
||||
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)?;
|
||||
|
||||
// 2.2 Create a read transaction on the index-scheduler
|
||||
let rtxn = self.env.read_txn()?;
|
||||
@ -80,7 +80,7 @@ impl IndexScheduler {
|
||||
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
||||
fs::create_dir_all(&dst)?;
|
||||
index
|
||||
.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)
|
||||
.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)
|
||||
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
|
||||
}
|
||||
|
||||
@ -90,7 +90,7 @@ impl IndexScheduler {
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
|
||||
let dst = temp_snapshot_dir.path().join("auth");
|
||||
fs::create_dir_all(&dst)?;
|
||||
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)?;
|
||||
|
||||
// 5. Copy and tarball the flat snapshot
|
||||
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
|
||||
|
@ -39,7 +39,7 @@ time = { version = "0.3.37", features = [
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.42"
|
||||
tokio = "1.43"
|
||||
utoipa = { version = "5.3.1", features = ["macros"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
|
@ -64,4 +64,6 @@ pub struct BatchStats {
|
||||
pub progress_trace: serde_json::Map<String, serde_json::Value>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub write_channel_congestion: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
|
||||
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
|
||||
}
|
||||
|
@ -312,6 +312,7 @@ InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFacetSearch , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsexecuteAfterUpdate , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsPrefixSearch , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
@ -454,7 +455,10 @@ impl ErrorCode for milli::Error {
|
||||
}
|
||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
|
||||
UserError::InvalidVectorDimensions { .. }
|
||||
| UserError::InvalidIndexingVectorDimensions { .. } => {
|
||||
Code::InvalidVectorDimensions
|
||||
}
|
||||
UserError::InvalidVectorsMapType { .. }
|
||||
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
|
||||
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
||||
|
@ -289,6 +289,12 @@ pub struct Settings<T> {
|
||||
#[schema(value_type = Option<PrefixSearchSettings>, example = json!("Hemlo"))]
|
||||
pub prefix_search: Setting<PrefixSearchSettings>,
|
||||
|
||||
/// Function to execute after an update
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsexecuteAfterUpdate>)]
|
||||
#[schema(value_type = Option<String>, example = json!("doc.likes += 1"))]
|
||||
pub execute_after_update: Setting<String>,
|
||||
|
||||
#[serde(skip)]
|
||||
#[deserr(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
@ -354,6 +360,7 @@ impl Settings<Checked> {
|
||||
localized_attributes: Setting::Reset,
|
||||
facet_search: Setting::Reset,
|
||||
prefix_search: Setting::Reset,
|
||||
execute_after_update: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -380,6 +387,7 @@ impl Settings<Checked> {
|
||||
localized_attributes: localized_attributes_rules,
|
||||
facet_search,
|
||||
prefix_search,
|
||||
execute_after_update,
|
||||
_kind,
|
||||
} = self;
|
||||
|
||||
@ -404,6 +412,7 @@ impl Settings<Checked> {
|
||||
localized_attributes: localized_attributes_rules,
|
||||
facet_search,
|
||||
prefix_search,
|
||||
execute_after_update,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -454,6 +463,7 @@ impl Settings<Unchecked> {
|
||||
localized_attributes: self.localized_attributes,
|
||||
facet_search: self.facet_search,
|
||||
prefix_search: self.prefix_search,
|
||||
execute_after_update: self.execute_after_update,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -530,6 +540,10 @@ impl Settings<Unchecked> {
|
||||
},
|
||||
prefix_search: other.prefix_search.or(self.prefix_search),
|
||||
facet_search: other.facet_search.or(self.facet_search),
|
||||
execute_after_update: other
|
||||
.execute_after_update
|
||||
.clone()
|
||||
.or(self.execute_after_update.clone()),
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -568,6 +582,7 @@ pub fn apply_settings_to_builder(
|
||||
localized_attributes: localized_attributes_rules,
|
||||
facet_search,
|
||||
prefix_search,
|
||||
execute_after_update,
|
||||
_kind,
|
||||
} = settings;
|
||||
|
||||
@ -772,6 +787,14 @@ pub fn apply_settings_to_builder(
|
||||
Setting::Reset => builder.reset_facet_search(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match execute_after_update {
|
||||
Setting::Set(execute_after_update) => {
|
||||
builder.set_execute_after_update(execute_after_update.clone())
|
||||
}
|
||||
Setting::Reset => builder.reset_execute_after_update(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
|
||||
pub enum SecretPolicy {
|
||||
@ -867,14 +890,11 @@ pub fn settings(
|
||||
})
|
||||
.collect();
|
||||
let embedders = Setting::Set(embedders);
|
||||
|
||||
let search_cutoff_ms = index.search_cutoff(rtxn)?;
|
||||
|
||||
let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
|
||||
|
||||
let prefix_search = index.prefix_search(rtxn)?.map(PrefixSearchSettings::from);
|
||||
|
||||
let facet_search = index.facet_search(rtxn)?;
|
||||
let execute_after_update = index.execute_after_update(rtxn)?;
|
||||
|
||||
let mut settings = Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
@ -914,6 +934,10 @@ pub fn settings(
|
||||
},
|
||||
prefix_search: Setting::Set(prefix_search.unwrap_or_default()),
|
||||
facet_search: Setting::Set(facet_search),
|
||||
execute_after_update: match execute_after_update {
|
||||
Some(function) => Setting::Set(function.to_string()),
|
||||
None => Setting::NotSet,
|
||||
},
|
||||
_kind: PhantomData,
|
||||
};
|
||||
|
||||
@ -1141,6 +1165,7 @@ pub(crate) mod test {
|
||||
search_cutoff_ms: Setting::NotSet,
|
||||
facet_search: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
execute_after_update: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
@ -1172,6 +1197,7 @@ pub(crate) mod test {
|
||||
search_cutoff_ms: Setting::NotSet,
|
||||
facet_search: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
execute_after_update: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
|
@ -30,14 +30,10 @@ actix-web = { version = "4.9.0", default-features = false, features = [
|
||||
anyhow = { version = "1.0.95", features = ["backtrace"] }
|
||||
async-trait = "0.1.85"
|
||||
bstr = "1.11.3"
|
||||
byte-unit = { version = "5.1.6", default-features = false, features = [
|
||||
"std",
|
||||
"byte",
|
||||
"serde",
|
||||
] }
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
bytes = "1.9.0"
|
||||
clap = { version = "4.5.24", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.14"
|
||||
crossbeam-channel = "0.5.15"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.13.0"
|
||||
@ -92,7 +88,7 @@ time = { version = "0.3.37", features = [
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.42.0", features = ["full"] }
|
||||
tokio = { version = "1.43.1", features = ["full"] }
|
||||
toml = "0.8.19"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
@ -140,7 +136,7 @@ reqwest = { version = "0.12.12", features = [
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.4", optional = true }
|
||||
tempfile = { version = "3.15.0", optional = true }
|
||||
zip = { version = "2.2.2", optional = true }
|
||||
zip = { version = "2.3.0", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
@ -170,5 +166,5 @@ german = ["meilisearch-types/german"]
|
||||
turkish = ["meilisearch-types/turkish"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.18/build.zip"
|
||||
sha1 = "b408a30dcb6e20cddb0c153c23385bcac4c8e912"
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.19/build.zip"
|
||||
sha1 = "7974430d5277c97f67cf6e95eec6faaac2788834"
|
||||
|
@ -329,7 +329,8 @@ impl Infos {
|
||||
http_addr: http_addr != default_http_addr(),
|
||||
http_payload_size_limit,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
experimental_limit_batched_tasks_total_size,
|
||||
experimental_limit_batched_tasks_total_size:
|
||||
experimental_limit_batched_tasks_total_size.into(),
|
||||
task_queue_webhook: task_webhook_url.is_some(),
|
||||
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
|
||||
log_level: log_level.to_string(),
|
||||
|
@ -228,7 +228,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size,
|
||||
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(),
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features: opt.to_instance_features(),
|
||||
|
@ -445,7 +445,7 @@ pub struct Opt {
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/801>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
|
||||
#[serde(default = "default_limit_batched_tasks_total_size")]
|
||||
pub experimental_limit_batched_tasks_total_size: u64,
|
||||
pub experimental_limit_batched_tasks_total_size: Byte,
|
||||
|
||||
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
|
||||
/// distinct embedder.
|
||||
@ -968,8 +968,8 @@ fn default_limit_batched_tasks() -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
|
||||
fn default_limit_batched_tasks_total_size() -> u64 {
|
||||
u64::MAX
|
||||
fn default_limit_batched_tasks_total_size() -> Byte {
|
||||
Byte::from_u64(u64::MAX)
|
||||
}
|
||||
|
||||
fn default_embedding_cache_entries() -> usize {
|
||||
|
@ -518,7 +518,7 @@ impl From<index_scheduler::IndexStats> for IndexStats {
|
||||
.inner_stats
|
||||
.number_of_documents
|
||||
.unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()),
|
||||
raw_document_db_size: stats.inner_stats.documents_database_stats.total_value_size(),
|
||||
raw_document_db_size: stats.inner_stats.documents_database_stats.total_size(),
|
||||
avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(),
|
||||
is_indexing: stats.is_indexing,
|
||||
number_of_embeddings: stats.inner_stats.number_of_embeddings,
|
||||
|
@ -497,6 +497,17 @@ make_setting_routes!(
|
||||
camelcase_attr: "facetSearch",
|
||||
analytics: FacetSearchAnalytics
|
||||
},
|
||||
{
|
||||
route: "/execute-after-update",
|
||||
update_verb: put,
|
||||
value_type: String,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsexecuteAfterUpdate,
|
||||
>,
|
||||
attr: execute_after_update,
|
||||
camelcase_attr: "executeAfterUpdate",
|
||||
analytics: ExecuteAfterUpdateAnalytics
|
||||
},
|
||||
{
|
||||
route: "/prefix-search",
|
||||
update_verb: put,
|
||||
@ -596,6 +607,9 @@ pub async fn update_all(
|
||||
new_settings.non_separator_tokens.as_ref().set(),
|
||||
),
|
||||
facet_search: FacetSearchAnalytics::new(new_settings.facet_search.as_ref().set()),
|
||||
execute_after_update: ExecuteAfterUpdateAnalytics::new(
|
||||
new_settings.execute_after_update.as_ref().set(),
|
||||
),
|
||||
prefix_search: PrefixSearchAnalytics::new(new_settings.prefix_search.as_ref().set()),
|
||||
},
|
||||
&req,
|
||||
|
@ -39,6 +39,7 @@ pub struct SettingsAnalytics {
|
||||
pub non_separator_tokens: NonSeparatorTokensAnalytics,
|
||||
pub facet_search: FacetSearchAnalytics,
|
||||
pub prefix_search: PrefixSearchAnalytics,
|
||||
pub execute_after_update: ExecuteAfterUpdateAnalytics,
|
||||
}
|
||||
|
||||
impl Aggregate for SettingsAnalytics {
|
||||
@ -194,6 +195,9 @@ impl Aggregate for SettingsAnalytics {
|
||||
set: new.facet_search.set | self.facet_search.set,
|
||||
value: new.facet_search.value.or(self.facet_search.value),
|
||||
},
|
||||
execute_after_update: ExecuteAfterUpdateAnalytics {
|
||||
set: new.execute_after_update.set | self.execute_after_update.set,
|
||||
},
|
||||
prefix_search: PrefixSearchAnalytics {
|
||||
set: new.prefix_search.set | self.prefix_search.set,
|
||||
value: new.prefix_search.value.or(self.prefix_search.value),
|
||||
@ -659,6 +663,21 @@ impl FacetSearchAnalytics {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Default)]
|
||||
pub struct ExecuteAfterUpdateAnalytics {
|
||||
pub set: bool,
|
||||
}
|
||||
|
||||
impl ExecuteAfterUpdateAnalytics {
|
||||
pub fn new(distinct: Option<&String>) -> Self {
|
||||
Self { set: distinct.is_some() }
|
||||
}
|
||||
|
||||
pub fn into_settings(self) -> SettingsAnalytics {
|
||||
SettingsAnalytics { execute_after_update: self, ..Default::default() }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Default)]
|
||||
pub struct PrefixSearchAnalytics {
|
||||
pub set: bool,
|
||||
|
@ -281,7 +281,8 @@ async fn test_summarized_document_addition_or_update() {
|
||||
".startedAt" => "[date]",
|
||||
".finishedAt" => "[date]",
|
||||
".stats.progressTrace" => "[progressTrace]",
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
|
||||
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
|
||||
},
|
||||
@r###"
|
||||
{
|
||||
@ -303,7 +304,8 @@ async fn test_summarized_document_addition_or_update() {
|
||||
"test": 1
|
||||
},
|
||||
"progressTrace": "[progressTrace]",
|
||||
"writeChannelCongestion": "[writeChannelCongestion]"
|
||||
"writeChannelCongestion": "[writeChannelCongestion]",
|
||||
"internalDatabaseSizes": "[internalDatabaseSizes]"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"startedAt": "[date]",
|
||||
@ -322,7 +324,8 @@ async fn test_summarized_document_addition_or_update() {
|
||||
".startedAt" => "[date]",
|
||||
".finishedAt" => "[date]",
|
||||
".stats.progressTrace" => "[progressTrace]",
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
|
||||
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
|
||||
},
|
||||
@r###"
|
||||
{
|
||||
@ -407,7 +410,8 @@ async fn test_summarized_delete_documents_by_batch() {
|
||||
".startedAt" => "[date]",
|
||||
".finishedAt" => "[date]",
|
||||
".stats.progressTrace" => "[progressTrace]",
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
|
||||
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
|
||||
},
|
||||
@r###"
|
||||
{
|
||||
@ -495,7 +499,8 @@ async fn test_summarized_delete_documents_by_filter() {
|
||||
".startedAt" => "[date]",
|
||||
".finishedAt" => "[date]",
|
||||
".stats.progressTrace" => "[progressTrace]",
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
|
||||
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
|
||||
},
|
||||
@r###"
|
||||
{
|
||||
@ -537,7 +542,8 @@ async fn test_summarized_delete_documents_by_filter() {
|
||||
".startedAt" => "[date]",
|
||||
".finishedAt" => "[date]",
|
||||
".stats.progressTrace" => "[progressTrace]",
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
|
||||
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
|
||||
},
|
||||
@r#"
|
||||
{
|
||||
@ -623,7 +629,8 @@ async fn test_summarized_delete_document_by_id() {
|
||||
".startedAt" => "[date]",
|
||||
".finishedAt" => "[date]",
|
||||
".stats.progressTrace" => "[progressTrace]",
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
|
||||
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
|
||||
},
|
||||
@r#"
|
||||
{
|
||||
@ -679,7 +686,8 @@ async fn test_summarized_settings_update() {
|
||||
".startedAt" => "[date]",
|
||||
".finishedAt" => "[date]",
|
||||
".stats.progressTrace" => "[progressTrace]",
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
|
||||
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
|
||||
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
|
||||
},
|
||||
@r###"
|
||||
{
|
||||
|
@ -1777,7 +1777,7 @@ async fn add_documents_with_geo_field() {
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": { "lat": "1", "lng": "1" },
|
||||
"_geo": { "lat": "2", "lng": "2" },
|
||||
},
|
||||
]);
|
||||
|
||||
@ -1828,8 +1828,8 @@ async fn add_documents_with_geo_field() {
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
"lat": "2",
|
||||
"lng": "2"
|
||||
}
|
||||
}
|
||||
],
|
||||
@ -1848,14 +1848,6 @@ async fn add_documents_with_geo_field() {
|
||||
@r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": {
|
||||
@ -1864,6 +1856,14 @@ async fn add_documents_with_geo_field() {
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "2",
|
||||
"lng": "2"
|
||||
},
|
||||
"_geoDistance": 5408322
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
},
|
||||
@ -1897,11 +1897,11 @@ async fn update_documents_with_geo_field() {
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": { "lat": 1, "lng": 1 },
|
||||
"_geo": { "lat": 3, "lng": 0 },
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": { "lat": "1", "lng": "1" },
|
||||
"_geo": { "lat": "4", "lng": "0" },
|
||||
},
|
||||
]);
|
||||
|
||||
@ -1928,9 +1928,7 @@ async fn update_documents_with_geo_field() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
|
||||
.await;
|
||||
let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
// we are expecting docs 4 and 3 first as they have geo
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
|
||||
@ -1940,18 +1938,18 @@ async fn update_documents_with_geo_field() {
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
"lat": "4",
|
||||
"lng": "0"
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
"_geoDistance": 667170
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": {
|
||||
"lat": 1,
|
||||
"lng": 1
|
||||
"lat": 3,
|
||||
"lng": 0
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
"_geoDistance": 778364
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
@ -1969,10 +1967,13 @@ async fn update_documents_with_geo_field() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let updated_documents = json!([{
|
||||
let updated_documents = json!([
|
||||
{
|
||||
"id": "3",
|
||||
"doggo": "kefir",
|
||||
}]);
|
||||
"_geo": { "lat": 5, "lng": 0 },
|
||||
}
|
||||
]);
|
||||
let (task, _status_code) = index.update_documents(updated_documents, None).await;
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@ -2012,16 +2013,16 @@ async fn update_documents_with_geo_field() {
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": {
|
||||
"lat": 1,
|
||||
"lng": 1
|
||||
"lat": 5,
|
||||
"lng": 0
|
||||
},
|
||||
"doggo": "kefir"
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
"lat": "4",
|
||||
"lng": "0"
|
||||
}
|
||||
}
|
||||
],
|
||||
@ -2031,31 +2032,29 @@ async fn update_documents_with_geo_field() {
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
|
||||
.await;
|
||||
let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
// the search response should not have changed: we are expecting docs 4 and 3 first as they have geo
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
|
||||
@r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": {
|
||||
"lat": 1,
|
||||
"lng": 1
|
||||
"lat": 5,
|
||||
"lng": 0
|
||||
},
|
||||
"doggo": "kefir",
|
||||
"_geoDistance": 5522018
|
||||
"_geoDistance": 555975
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "4",
|
||||
"lng": "0"
|
||||
},
|
||||
"_geoDistance": 667170
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
|
@ -157,11 +157,14 @@ async fn delete_document_by_filter() {
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (stats, _) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 4,
|
||||
"rawDocumentDbSize": 42,
|
||||
"avgDocumentSize": 10,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -208,11 +211,14 @@ async fn delete_document_by_filter() {
|
||||
"###);
|
||||
|
||||
let (stats, _) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"rawDocumentDbSize": 16,
|
||||
"avgDocumentSize": 8,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -278,11 +284,14 @@ async fn delete_document_by_filter() {
|
||||
"###);
|
||||
|
||||
let (stats, _) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 1,
|
||||
"rawDocumentDbSize": 12,
|
||||
"avgDocumentSize": 12,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
|
@ -28,12 +28,15 @@ async fn import_dump_v1_movie_raw() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 21965,
|
||||
"avgDocumentSize": 414,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -185,12 +188,15 @@ async fn import_dump_v1_movie_with_settings() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 21965,
|
||||
"avgDocumentSize": 414,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -355,12 +361,15 @@ async fn import_dump_v1_rubygems_with_settings() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 8606,
|
||||
"avgDocumentSize": 162,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -522,12 +531,15 @@ async fn import_dump_v2_movie_raw() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 21965,
|
||||
"avgDocumentSize": 414,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -679,12 +691,15 @@ async fn import_dump_v2_movie_with_settings() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 21965,
|
||||
"avgDocumentSize": 414,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -846,12 +861,15 @@ async fn import_dump_v2_rubygems_with_settings() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 8606,
|
||||
"avgDocumentSize": 162,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -1010,12 +1028,15 @@ async fn import_dump_v3_movie_raw() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 21965,
|
||||
"avgDocumentSize": 414,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -1167,12 +1188,15 @@ async fn import_dump_v3_movie_with_settings() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 21965,
|
||||
"avgDocumentSize": 414,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -1334,12 +1358,15 @@ async fn import_dump_v3_rubygems_with_settings() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 8606,
|
||||
"avgDocumentSize": 162,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -1498,12 +1525,15 @@ async fn import_dump_v4_movie_raw() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 21965,
|
||||
"avgDocumentSize": 414,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -1655,12 +1685,15 @@ async fn import_dump_v4_movie_with_settings() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 21965,
|
||||
"avgDocumentSize": 414,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -1822,12 +1855,15 @@ async fn import_dump_v4_rubygems_with_settings() {
|
||||
let (stats, code) = index.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 53,
|
||||
"rawDocumentDbSize": 8606,
|
||||
"avgDocumentSize": 162,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -1994,11 +2030,14 @@ async fn import_dump_v5() {
|
||||
|
||||
let (stats, code) = index1.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 10,
|
||||
"rawDocumentDbSize": 6782,
|
||||
"avgDocumentSize": 678,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -2031,12 +2070,15 @@ async fn import_dump_v5() {
|
||||
let (stats, code) = index2.stats().await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(
|
||||
json_string!(stats),
|
||||
json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}),
|
||||
@r###"
|
||||
{
|
||||
"numberOfDocuments": 10,
|
||||
"rawDocumentDbSize": 6782,
|
||||
"avgDocumentSize": 678,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -2237,6 +2279,7 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
|
||||
".results[0].duration" => "[date]",
|
||||
".results[0].stats.progressTrace" => "[progressTrace]",
|
||||
".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]",
|
||||
".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]",
|
||||
}), name: "batches");
|
||||
|
||||
let (indexes, code) = server.list_indexes(None, None).await;
|
||||
|
@ -1783,6 +1783,146 @@ async fn test_nested_fields() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_typo_settings() {
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 0,
|
||||
"title": "The zeroth document",
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"title": "The first document",
|
||||
"nested": {
|
||||
"object": "field",
|
||||
"machin": "bidule",
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "The second document",
|
||||
"nested": [
|
||||
"array",
|
||||
{
|
||||
"object": "field",
|
||||
},
|
||||
{
|
||||
"prout": "truc",
|
||||
"machin": "lol",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "The third document",
|
||||
"nested": "I lied",
|
||||
},
|
||||
]);
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&json!({
|
||||
"searchableAttributes": ["title", "nested.object", "nested.machin"],
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"disableOnAttributes": ["title"]
|
||||
}
|
||||
}),
|
||||
&json!({"q": "document"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"title": "The zeroth document"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"title": "The first document",
|
||||
"nested": {
|
||||
"object": "field",
|
||||
"machin": "bidule"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "The second document",
|
||||
"nested": [
|
||||
"array",
|
||||
{
|
||||
"object": "field"
|
||||
},
|
||||
{
|
||||
"prout": "truc",
|
||||
"machin": "lol"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "The third document",
|
||||
"nested": "I lied"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Test prefix search
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&json!({
|
||||
"searchableAttributes": ["title", "nested.object", "nested.machin"],
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"disableOnAttributes": ["title"]
|
||||
}
|
||||
}),
|
||||
&json!({"q": "docume"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"title": "The zeroth document"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"title": "The first document",
|
||||
"nested": {
|
||||
"object": "field",
|
||||
"machin": "bidule"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "The second document",
|
||||
"nested": [
|
||||
"array",
|
||||
{
|
||||
"object": "field"
|
||||
},
|
||||
{
|
||||
"prout": "truc",
|
||||
"machin": "lol"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "The third document",
|
||||
"nested": "I lied"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Modifying facets with different casing should work correctly
|
||||
#[actix_rt::test]
|
||||
async fn change_facet_casing() {
|
||||
|
@ -110,11 +110,14 @@ async fn add_remove_embeddings() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"rawDocumentDbSize": 27,
|
||||
"avgDocumentSize": 13,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 5,
|
||||
"numberOfEmbeddedDocuments": 2,
|
||||
@ -135,11 +138,14 @@ async fn add_remove_embeddings() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"rawDocumentDbSize": 27,
|
||||
"avgDocumentSize": 13,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 3,
|
||||
"numberOfEmbeddedDocuments": 2,
|
||||
@ -160,11 +166,14 @@ async fn add_remove_embeddings() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"rawDocumentDbSize": 27,
|
||||
"avgDocumentSize": 13,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 2,
|
||||
"numberOfEmbeddedDocuments": 2,
|
||||
@ -186,11 +195,14 @@ async fn add_remove_embeddings() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"rawDocumentDbSize": 27,
|
||||
"avgDocumentSize": 13,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 2,
|
||||
"numberOfEmbeddedDocuments": 1,
|
||||
@ -236,11 +248,14 @@ async fn add_remove_embedded_documents() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"rawDocumentDbSize": 27,
|
||||
"avgDocumentSize": 13,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 5,
|
||||
"numberOfEmbeddedDocuments": 2,
|
||||
@ -257,11 +272,14 @@ async fn add_remove_embedded_documents() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 1,
|
||||
"rawDocumentDbSize": 13,
|
||||
"avgDocumentSize": 13,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 3,
|
||||
"numberOfEmbeddedDocuments": 1,
|
||||
@ -290,11 +308,14 @@ async fn update_embedder_settings() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"rawDocumentDbSize": 108,
|
||||
"avgDocumentSize": 54,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -326,11 +347,14 @@ async fn update_embedder_settings() {
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (stats, _code) = index.stats().await;
|
||||
snapshot!(json_string!(stats), @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[size]",
|
||||
".avgDocumentSize" => "[size]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 2,
|
||||
"rawDocumentDbSize": 108,
|
||||
"avgDocumentSize": 54,
|
||||
"rawDocumentDbSize": "[size]",
|
||||
"avgDocumentSize": "[size]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 3,
|
||||
"numberOfEmbeddedDocuments": 2,
|
||||
|
@ -133,7 +133,9 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
let (stats, _) = server.stats().await;
|
||||
assert_json_snapshot!(stats, {
|
||||
".databaseSize" => "[bytes]",
|
||||
".usedDatabaseSize" => "[bytes]"
|
||||
".usedDatabaseSize" => "[bytes]",
|
||||
".indexes.kefir.rawDocumentDbSize" => "[bytes]",
|
||||
".indexes.kefir.avgDocumentSize" => "[bytes]",
|
||||
},
|
||||
@r###"
|
||||
{
|
||||
@ -143,8 +145,8 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
"indexes": {
|
||||
"kefir": {
|
||||
"numberOfDocuments": 1,
|
||||
"rawDocumentDbSize": 109,
|
||||
"avgDocumentSize": 109,
|
||||
"rawDocumentDbSize": "[bytes]",
|
||||
"avgDocumentSize": "[bytes]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -193,31 +195,33 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
|
||||
// Tests all the batches query parameters
|
||||
let (batches, _) = server.batches_filter("uids=10").await;
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
|
||||
let (batches, _) = server.batches_filter("batchUids=10").await;
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
|
||||
let (batches, _) = server.batches_filter("statuses=canceled").await;
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
|
||||
// types has already been tested above to retrieve the upgrade database
|
||||
let (batches, _) = server.batches_filter("canceledBy=19").await;
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
|
||||
let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
|
||||
let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
|
||||
let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
|
||||
let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
|
||||
let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
|
||||
let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
|
||||
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
|
||||
|
||||
let (stats, _) = server.stats().await;
|
||||
assert_json_snapshot!(stats, {
|
||||
".databaseSize" => "[bytes]",
|
||||
".usedDatabaseSize" => "[bytes]"
|
||||
".usedDatabaseSize" => "[bytes]",
|
||||
".indexes.kefir.rawDocumentDbSize" => "[bytes]",
|
||||
".indexes.kefir.avgDocumentSize" => "[bytes]",
|
||||
},
|
||||
@r###"
|
||||
{
|
||||
@ -227,8 +231,8 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
"indexes": {
|
||||
"kefir": {
|
||||
"numberOfDocuments": 1,
|
||||
"rawDocumentDbSize": 109,
|
||||
"avgDocumentSize": 109,
|
||||
"rawDocumentDbSize": "[bytes]",
|
||||
"avgDocumentSize": "[bytes]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
@ -245,11 +249,14 @@ async fn check_the_index_scheduler(server: &Server) {
|
||||
"###);
|
||||
let index = server.index("kefir");
|
||||
let (stats, _) = index.stats().await;
|
||||
snapshot!(stats, @r###"
|
||||
snapshot!(json_string!(stats, {
|
||||
".rawDocumentDbSize" => "[bytes]",
|
||||
".avgDocumentSize" => "[bytes]",
|
||||
}), @r###"
|
||||
{
|
||||
"numberOfDocuments": 1,
|
||||
"rawDocumentDbSize": 109,
|
||||
"avgDocumentSize": 109,
|
||||
"rawDocumentDbSize": "[bytes]",
|
||||
"avgDocumentSize": "[bytes]",
|
||||
"isIndexing": false,
|
||||
"numberOfEmbeddings": 0,
|
||||
"numberOfEmbeddedDocuments": 0,
|
||||
|
@ -188,7 +188,7 @@ async fn user_provide_mismatched_embedding_dimension() {
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, @r#"
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"batchUid": "[batch_uid]",
|
||||
@ -201,7 +201,7 @@ async fn user_provide_mismatched_embedding_dimension() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Index `doggo`: Invalid vector dimensions: expected: `3`, found: `2`.",
|
||||
"message": "Index `doggo`: Invalid vector dimensions in document with id `0` in `._vectors.manual`.\n - note: embedding #0 has dimensions 2\n - note: embedder `manual` requires 3",
|
||||
"code": "invalid_vector_dimensions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
|
||||
@ -211,46 +211,36 @@ async fn user_provide_mismatched_embedding_dimension() {
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
// FIXME: /!\ Case where number of embeddings is divisor of `dimensions` would still pass
|
||||
let new_document = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [[0, 0], [1, 1], [2, 2]] }},
|
||||
]);
|
||||
let (response, code) = index.add_documents(new_document, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
let task = index.wait_task(response.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
0.0,
|
||||
0.0,
|
||||
1.0
|
||||
],
|
||||
[
|
||||
1.0,
|
||||
2.0,
|
||||
2.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 1
|
||||
"uid": "[uid]",
|
||||
"batchUid": "[batch_uid]",
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Index `doggo`: Invalid vector dimensions in document with id `0` in `._vectors.manual`.\n - note: embedding #0 has dimensions 2\n - note: embedder `manual` requires 3",
|
||||
"code": "invalid_vector_dimensions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ byteorder = "1.5.0"
|
||||
charabia = { version = "0.9.3", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
convert_case = "0.6.0"
|
||||
crossbeam-channel = "0.5.14"
|
||||
crossbeam-channel = "0.5.15"
|
||||
deserr = "0.6.3"
|
||||
either = { version = "1.13.0", features = ["serde"] }
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
|
@ -1,8 +1,13 @@
|
||||
use heed::types::Bytes;
|
||||
use std::mem;
|
||||
|
||||
use heed::Database;
|
||||
use heed::DatabaseStat;
|
||||
use heed::RoTxn;
|
||||
use heed::Unspecified;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::BEU32;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
/// The stats of a database.
|
||||
@ -20,58 +25,24 @@ impl DatabaseStats {
|
||||
///
|
||||
/// This function iterates over the whole database and computes the stats.
|
||||
/// It is not efficient and should be cached somewhere.
|
||||
pub(crate) fn new(database: Database<Bytes, Bytes>, rtxn: &RoTxn<'_>) -> heed::Result<Self> {
|
||||
let mut database_stats =
|
||||
Self { number_of_entries: 0, total_key_size: 0, total_value_size: 0 };
|
||||
pub(crate) fn new(
|
||||
database: Database<BEU32, Unspecified>,
|
||||
rtxn: &RoTxn<'_>,
|
||||
) -> heed::Result<Self> {
|
||||
let DatabaseStat { page_size, depth: _, branch_pages, leaf_pages, overflow_pages, entries } =
|
||||
database.stat(rtxn)?;
|
||||
|
||||
let mut iter = database.iter(rtxn)?;
|
||||
while let Some((key, value)) = iter.next().transpose()? {
|
||||
let key_size = key.len() as u64;
|
||||
let value_size = value.len() as u64;
|
||||
database_stats.total_key_size += key_size;
|
||||
database_stats.total_value_size += value_size;
|
||||
}
|
||||
// We first take the total size without overflow pages as the overflow pages contains the values and only that.
|
||||
let total_size = (branch_pages + leaf_pages + overflow_pages) * page_size as usize;
|
||||
// We compute an estimated size for the keys.
|
||||
let total_key_size = entries * (mem::size_of::<u32>() + 4);
|
||||
let total_value_size = total_size - total_key_size;
|
||||
|
||||
database_stats.number_of_entries = database.len(rtxn)?;
|
||||
|
||||
Ok(database_stats)
|
||||
}
|
||||
|
||||
/// Recomputes the stats of the database and returns the new stats.
|
||||
///
|
||||
/// This function is used to update the stats of the database when some keys are modified.
|
||||
/// It is more efficient than the `new` function because it does not iterate over the whole database but only the modified keys comparing the before and after states.
|
||||
pub(crate) fn recompute<I, K>(
|
||||
mut stats: Self,
|
||||
database: Database<Bytes, Bytes>,
|
||||
before_rtxn: &RoTxn<'_>,
|
||||
after_rtxn: &RoTxn<'_>,
|
||||
modified_keys: I,
|
||||
) -> heed::Result<Self>
|
||||
where
|
||||
I: IntoIterator<Item = K>,
|
||||
K: AsRef<[u8]>,
|
||||
{
|
||||
for key in modified_keys {
|
||||
let key = key.as_ref();
|
||||
if let Some(value) = database.get(after_rtxn, key)? {
|
||||
let key_size = key.len() as u64;
|
||||
let value_size = value.len() as u64;
|
||||
stats.total_key_size = stats.total_key_size.saturating_add(key_size);
|
||||
stats.total_value_size = stats.total_value_size.saturating_add(value_size);
|
||||
}
|
||||
|
||||
if let Some(value) = database.get(before_rtxn, key)? {
|
||||
let key_size = key.len() as u64;
|
||||
let value_size = value.len() as u64;
|
||||
stats.total_key_size = stats.total_key_size.saturating_sub(key_size);
|
||||
stats.total_value_size = stats.total_value_size.saturating_sub(value_size);
|
||||
}
|
||||
}
|
||||
|
||||
stats.number_of_entries = database.len(after_rtxn)?;
|
||||
|
||||
Ok(stats)
|
||||
Ok(Self {
|
||||
number_of_entries: entries as u64,
|
||||
total_key_size: total_key_size as u64,
|
||||
total_value_size: total_value_size as u64,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn average_key_size(&self) -> u64 {
|
||||
@ -86,6 +57,10 @@ impl DatabaseStats {
|
||||
self.number_of_entries
|
||||
}
|
||||
|
||||
pub fn total_size(&self) -> u64 {
|
||||
self.total_key_size + self.total_value_size
|
||||
}
|
||||
|
||||
pub fn total_key_size(&self) -> u64 {
|
||||
self.total_key_size
|
||||
}
|
||||
|
@ -154,6 +154,14 @@ and can not be more than 511 bytes.", .document_id.to_string()
|
||||
InvalidGeoField(#[from] Box<GeoError>),
|
||||
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
|
||||
InvalidVectorDimensions { expected: usize, found: usize },
|
||||
#[error("Invalid vector dimensions in document with id `{document_id}` in `._vectors.{embedder_name}`.\n - note: embedding #{embedding_index} has dimensions {found}\n - note: embedder `{embedder_name}` requires {expected}")]
|
||||
InvalidIndexingVectorDimensions {
|
||||
embedder_name: String,
|
||||
document_id: String,
|
||||
embedding_index: usize,
|
||||
expected: usize,
|
||||
found: usize,
|
||||
},
|
||||
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
|
||||
InvalidVectorsMapType { document_id: String, value: Value },
|
||||
#[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
|
||||
|
@ -3,8 +3,9 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
use heed::{types::*, WithoutTls};
|
||||
use heed::{types::*, DatabaseStat, WithoutTls};
|
||||
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
|
||||
use indexmap::IndexMap;
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -75,6 +76,7 @@ pub mod main_key {
|
||||
pub const SEARCH_CUTOFF: &str = "search_cutoff";
|
||||
pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
|
||||
pub const FACET_SEARCH: &str = "facet_search";
|
||||
pub const EXECUTE_AFTER_UPDATE: &str = "execute-after-update";
|
||||
pub const PREFIX_SEARCH: &str = "prefix_search";
|
||||
pub const DOCUMENTS_STATS: &str = "documents_stats";
|
||||
}
|
||||
@ -410,38 +412,6 @@ impl Index {
|
||||
Ok(count.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Updates the stats of the documents database based on the previous stats and the modified docids.
|
||||
pub fn update_documents_stats(
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
modified_docids: roaring::RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
let before_rtxn = self.read_txn()?;
|
||||
let document_stats = match self.documents_stats(&before_rtxn)? {
|
||||
Some(before_stats) => DatabaseStats::recompute(
|
||||
before_stats,
|
||||
self.documents.remap_types(),
|
||||
&before_rtxn,
|
||||
wtxn,
|
||||
modified_docids.iter().map(|docid| docid.to_be_bytes()),
|
||||
)?,
|
||||
None => {
|
||||
// This should never happen when there are already documents in the index, the documents stats should be present.
|
||||
// If it happens, it means that the index was not properly initialized/upgraded.
|
||||
debug_assert_eq!(
|
||||
self.documents.len(&before_rtxn)?,
|
||||
0,
|
||||
"The documents stats should be present when there are documents in the index"
|
||||
);
|
||||
tracing::warn!("No documents stats found, creating new ones");
|
||||
DatabaseStats::new(self.documents.remap_types(), &*wtxn)?
|
||||
}
|
||||
};
|
||||
|
||||
self.put_documents_stats(wtxn, document_stats)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Writes the stats of the documents database.
|
||||
pub fn put_documents_stats(
|
||||
&self,
|
||||
@ -1654,6 +1624,22 @@ impl Index {
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::FACET_SEARCH)
|
||||
}
|
||||
|
||||
pub fn execute_after_update<'t>(&self, txn: &'t RoTxn<'_>) -> heed::Result<Option<&'t str>> {
|
||||
self.main.remap_types::<Str, Str>().get(txn, main_key::EXECUTE_AFTER_UPDATE)
|
||||
}
|
||||
|
||||
pub(crate) fn put_execute_after_update(
|
||||
&self,
|
||||
txn: &mut RwTxn<'_>,
|
||||
val: &str,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, Str>().put(txn, main_key::EXECUTE_AFTER_UPDATE, &val)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_execute_after_update(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::EXECUTE_AFTER_UPDATE)
|
||||
}
|
||||
|
||||
pub fn localized_attributes_rules(
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
@ -1755,6 +1741,122 @@ impl Index {
|
||||
}
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
/// Check if the word is indexed in the index.
|
||||
///
|
||||
/// This function checks if the word is indexed in the index by looking at the word_docids and exact_word_docids.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `rtxn`: The read transaction.
|
||||
/// * `word`: The word to check.
|
||||
pub fn contains_word(&self, rtxn: &RoTxn<'_>, word: &str) -> Result<bool> {
|
||||
Ok(self.word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some()
|
||||
|| self.exact_word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some())
|
||||
}
|
||||
|
||||
/// Returns the sizes in bytes of each of the index database at the given rtxn.
|
||||
pub fn database_sizes(&self, rtxn: &RoTxn<'_>) -> heed::Result<IndexMap<&'static str, usize>> {
|
||||
let Self {
|
||||
env: _,
|
||||
main,
|
||||
external_documents_ids,
|
||||
word_docids,
|
||||
exact_word_docids,
|
||||
word_prefix_docids,
|
||||
exact_word_prefix_docids,
|
||||
word_pair_proximity_docids,
|
||||
word_position_docids,
|
||||
word_fid_docids,
|
||||
word_prefix_position_docids,
|
||||
word_prefix_fid_docids,
|
||||
field_id_word_count_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_normalized_string_strings,
|
||||
facet_id_string_fst,
|
||||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_arroy,
|
||||
embedder_category_id,
|
||||
documents,
|
||||
} = self;
|
||||
|
||||
fn compute_size(stats: DatabaseStat) -> usize {
|
||||
let DatabaseStat {
|
||||
page_size,
|
||||
depth: _,
|
||||
branch_pages,
|
||||
leaf_pages,
|
||||
overflow_pages,
|
||||
entries: _,
|
||||
} = stats;
|
||||
|
||||
(branch_pages + leaf_pages + overflow_pages) * page_size as usize
|
||||
}
|
||||
|
||||
let mut sizes = IndexMap::new();
|
||||
sizes.insert("main", main.stat(rtxn).map(compute_size)?);
|
||||
sizes
|
||||
.insert("external_documents_ids", external_documents_ids.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert("word_docids", word_docids.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert("exact_word_docids", exact_word_docids.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert("word_prefix_docids", word_prefix_docids.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert(
|
||||
"exact_word_prefix_docids",
|
||||
exact_word_prefix_docids.stat(rtxn).map(compute_size)?,
|
||||
);
|
||||
sizes.insert(
|
||||
"word_pair_proximity_docids",
|
||||
word_pair_proximity_docids.stat(rtxn).map(compute_size)?,
|
||||
);
|
||||
sizes.insert("word_position_docids", word_position_docids.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert("word_fid_docids", word_fid_docids.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert(
|
||||
"word_prefix_position_docids",
|
||||
word_prefix_position_docids.stat(rtxn).map(compute_size)?,
|
||||
);
|
||||
sizes
|
||||
.insert("word_prefix_fid_docids", word_prefix_fid_docids.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert(
|
||||
"field_id_word_count_docids",
|
||||
field_id_word_count_docids.stat(rtxn).map(compute_size)?,
|
||||
);
|
||||
sizes.insert("facet_id_f64_docids", facet_id_f64_docids.stat(rtxn).map(compute_size)?);
|
||||
sizes
|
||||
.insert("facet_id_string_docids", facet_id_string_docids.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert(
|
||||
"facet_id_normalized_string_strings",
|
||||
facet_id_normalized_string_strings.stat(rtxn).map(compute_size)?,
|
||||
);
|
||||
sizes.insert("facet_id_string_fst", facet_id_string_fst.stat(rtxn).map(compute_size)?);
|
||||
sizes
|
||||
.insert("facet_id_exists_docids", facet_id_exists_docids.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert(
|
||||
"facet_id_is_null_docids",
|
||||
facet_id_is_null_docids.stat(rtxn).map(compute_size)?,
|
||||
);
|
||||
sizes.insert(
|
||||
"facet_id_is_empty_docids",
|
||||
facet_id_is_empty_docids.stat(rtxn).map(compute_size)?,
|
||||
);
|
||||
sizes.insert(
|
||||
"field_id_docid_facet_f64s",
|
||||
field_id_docid_facet_f64s.stat(rtxn).map(compute_size)?,
|
||||
);
|
||||
sizes.insert(
|
||||
"field_id_docid_facet_strings",
|
||||
field_id_docid_facet_strings.stat(rtxn).map(compute_size)?,
|
||||
);
|
||||
sizes.insert("vector_arroy", vector_arroy.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
|
||||
|
||||
Ok(sizes)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
|
@ -193,6 +193,16 @@ macro_rules! make_atomic_progress {
|
||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document");
|
||||
make_atomic_progress!(Payload alias AtomicPayloadStep => "payload");
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum MergingWordCache {
|
||||
WordDocids,
|
||||
WordFieldIdDocids,
|
||||
ExactWordDocids,
|
||||
WordPositionDocids,
|
||||
FieldIdWordCountDocids,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
|
@ -164,7 +164,7 @@ impl Search<'_> {
|
||||
sort_criteria: self.sort_criteria.clone(),
|
||||
distinct: self.distinct.clone(),
|
||||
searchable_attributes: self.searchable_attributes,
|
||||
geo_strategy: self.geo_strategy,
|
||||
geo_param: self.geo_param,
|
||||
terms_matching_strategy: self.terms_matching_strategy,
|
||||
scoring_strategy: ScoringStrategy::Detailed,
|
||||
words_limit: self.words_limit,
|
||||
|
@ -45,7 +45,7 @@ pub struct Search<'a> {
|
||||
sort_criteria: Option<Vec<AscDesc>>,
|
||||
distinct: Option<String>,
|
||||
searchable_attributes: Option<&'a [String]>,
|
||||
geo_strategy: new::GeoSortStrategy,
|
||||
geo_param: new::GeoSortParameter,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
words_limit: usize,
|
||||
@ -68,7 +68,7 @@ impl<'a> Search<'a> {
|
||||
sort_criteria: None,
|
||||
distinct: None,
|
||||
searchable_attributes: None,
|
||||
geo_strategy: new::GeoSortStrategy::default(),
|
||||
geo_param: new::GeoSortParameter::default(),
|
||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||
scoring_strategy: Default::default(),
|
||||
exhaustive_number_hits: false,
|
||||
@ -145,7 +145,13 @@ impl<'a> Search<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
|
||||
self.geo_strategy = strategy;
|
||||
self.geo_param.strategy = strategy;
|
||||
self
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn geo_max_bucket_size(&mut self, max_size: u64) -> &mut Search<'a> {
|
||||
self.geo_param.max_bucket_size = max_size;
|
||||
self
|
||||
}
|
||||
|
||||
@ -232,7 +238,7 @@ impl<'a> Search<'a> {
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
self.geo_strategy,
|
||||
self.geo_param,
|
||||
self.offset,
|
||||
self.limit,
|
||||
embedder_name,
|
||||
@ -251,7 +257,7 @@ impl<'a> Search<'a> {
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
self.geo_strategy,
|
||||
self.geo_param,
|
||||
self.offset,
|
||||
self.limit,
|
||||
Some(self.words_limit),
|
||||
@ -290,7 +296,7 @@ impl fmt::Debug for Search<'_> {
|
||||
sort_criteria,
|
||||
distinct,
|
||||
searchable_attributes,
|
||||
geo_strategy: _,
|
||||
geo_param: _,
|
||||
terms_matching_strategy,
|
||||
scoring_strategy,
|
||||
words_limit,
|
||||
|
@ -173,16 +173,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
ranking_rule_scores.push(ScoreDetails::Skipped);
|
||||
|
||||
// remove candidates from the universe without adding them to result if their score is below the threshold
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
let is_below_threshold =
|
||||
ranking_score_threshold.is_some_and(|ranking_score_threshold| {
|
||||
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
if current_score < ranking_score_threshold {
|
||||
all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
current_score < ranking_score_threshold
|
||||
});
|
||||
|
||||
if is_below_threshold {
|
||||
all_candidates -= &bucket;
|
||||
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
|
||||
} else {
|
||||
maybe_add_to_results!(bucket);
|
||||
}
|
||||
|
||||
ranking_rule_scores.pop();
|
||||
|
||||
@ -237,23 +239,24 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
);
|
||||
|
||||
// remove candidates from the universe without adding them to result if their score is below the threshold
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
let is_below_threshold = ranking_score_threshold.is_some_and(|ranking_score_threshold| {
|
||||
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
if current_score < ranking_score_threshold {
|
||||
all_candidates -=
|
||||
next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
current_score < ranking_score_threshold
|
||||
});
|
||||
|
||||
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
|
||||
|
||||
if cur_ranking_rule_index == ranking_rules_len - 1
|
||||
|| (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1)
|
||||
|| cur_offset + (next_bucket.candidates.len() as usize) < from
|
||||
|| is_below_threshold
|
||||
{
|
||||
if is_below_threshold {
|
||||
all_candidates -= &next_bucket.candidates;
|
||||
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
|
||||
} else {
|
||||
maybe_add_to_results!(next_bucket.candidates);
|
||||
}
|
||||
ranking_rule_scores.pop();
|
||||
continue;
|
||||
}
|
||||
|
@ -1,10 +1,8 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use heed::types::{Bytes, Unit};
|
||||
use heed::{RoPrefix, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use super::facet_string_values;
|
||||
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||
@ -41,6 +39,21 @@ fn facet_number_values<'a>(
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Parameter {
|
||||
// Define the strategy used by the geo sort
|
||||
pub strategy: Strategy,
|
||||
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
|
||||
pub max_bucket_size: u64,
|
||||
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
|
||||
pub distance_error_margin: f64,
|
||||
}
|
||||
|
||||
impl Default for Parameter {
|
||||
fn default() -> Self {
|
||||
Self { strategy: Strategy::default(), max_bucket_size: 1000, distance_error_margin: 1.0 }
|
||||
}
|
||||
}
|
||||
/// Define the strategy used by the geo sort.
|
||||
/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
|
||||
/// the point where we move from using the iterative strategy to the rtree.
|
||||
@ -84,15 +97,21 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
|
||||
|
||||
cached_sorted_docids: VecDeque<(u32, [f64; 2])>,
|
||||
geo_candidates: RoaringBitmap,
|
||||
|
||||
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
|
||||
max_bucket_size: u64,
|
||||
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
|
||||
distance_error_margin: f64,
|
||||
}
|
||||
|
||||
impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
pub fn new(
|
||||
strategy: Strategy,
|
||||
parameter: Parameter,
|
||||
geo_faceted_docids: RoaringBitmap,
|
||||
point: [f64; 2],
|
||||
ascending: bool,
|
||||
) -> Result<Self> {
|
||||
let Parameter { strategy, max_bucket_size, distance_error_margin } = parameter;
|
||||
Ok(Self {
|
||||
query: None,
|
||||
strategy,
|
||||
@ -102,6 +121,8 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
field_ids: None,
|
||||
rtree: None,
|
||||
cached_sorted_docids: VecDeque::new(),
|
||||
max_bucket_size,
|
||||
distance_error_margin,
|
||||
})
|
||||
}
|
||||
|
||||
@ -240,12 +261,12 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
fn next_bucket(
|
||||
&mut self,
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
logger: &mut dyn SearchLogger<Q>,
|
||||
_logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<Option<RankingRuleOutput<Q>>> {
|
||||
let query = self.query.as_ref().unwrap().clone();
|
||||
|
||||
let geo_candidates = &self.geo_candidates & universe;
|
||||
let mut geo_candidates = &self.geo_candidates & universe;
|
||||
|
||||
if geo_candidates.is_empty() {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
@ -267,24 +288,102 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
cache.pop_back()
|
||||
}
|
||||
};
|
||||
while let Some((id, point)) = next(&mut self.cached_sorted_docids) {
|
||||
let put_back = |cache: &mut VecDeque<_>, x: _| {
|
||||
if ascending {
|
||||
cache.push_front(x)
|
||||
} else {
|
||||
cache.push_back(x)
|
||||
}
|
||||
};
|
||||
|
||||
let mut current_bucket = RoaringBitmap::new();
|
||||
// current_distance stores the first point and distance in current bucket
|
||||
let mut current_distance: Option<([f64; 2], f64)> = None;
|
||||
loop {
|
||||
// The loop will only exit when we have found all points with equal distance or have exhausted the candidates.
|
||||
if let Some((id, point)) = next(&mut self.cached_sorted_docids) {
|
||||
if geo_candidates.contains(id) {
|
||||
let distance = distance_between_two_points(&self.point, &point);
|
||||
if let Some((point0, bucket_distance)) = current_distance.as_ref() {
|
||||
if (bucket_distance - distance).abs() > self.distance_error_margin {
|
||||
// different distance, point belongs to next bucket
|
||||
put_back(&mut self.cached_sorted_docids, (id, point));
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: RoaringBitmap::from_iter([id]),
|
||||
candidates: current_bucket,
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: Some(point),
|
||||
value: Some(point0.to_owned()),
|
||||
}),
|
||||
}));
|
||||
} else {
|
||||
// same distance, point belongs to current bucket
|
||||
current_bucket.insert(id);
|
||||
// remove from cadidates to prevent it from being added to the cache again
|
||||
geo_candidates.remove(id);
|
||||
// current bucket size reaches limit, force return
|
||||
if current_bucket.len() == self.max_bucket_size {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: current_bucket,
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: Some(point0.to_owned()),
|
||||
}),
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
// if we got out of this loop it means we've exhausted our cache.
|
||||
// we need to refill it and run the function again.
|
||||
} else {
|
||||
// first doc in current bucket
|
||||
current_distance = Some((point, distance));
|
||||
current_bucket.insert(id);
|
||||
geo_candidates.remove(id);
|
||||
// current bucket size reaches limit, force return
|
||||
if current_bucket.len() == self.max_bucket_size {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: current_bucket,
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: Some(point.to_owned()),
|
||||
}),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// cache exhausted, we need to refill it
|
||||
self.fill_buffer(ctx, &geo_candidates)?;
|
||||
self.next_bucket(ctx, logger, universe)
|
||||
|
||||
if self.cached_sorted_docids.is_empty() {
|
||||
// candidates exhausted, exit
|
||||
if let Some((point0, _)) = current_distance.as_ref() {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: current_bucket,
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: Some(point0.to_owned()),
|
||||
}),
|
||||
}));
|
||||
} else {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: universe.clone(),
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: None,
|
||||
}),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
|
||||
|
@ -513,7 +513,7 @@ mod tests {
|
||||
universe,
|
||||
&None,
|
||||
&None,
|
||||
crate::search::new::GeoSortStrategy::default(),
|
||||
crate::search::new::GeoSortParameter::default(),
|
||||
0,
|
||||
100,
|
||||
Some(10),
|
||||
|
@ -45,6 +45,7 @@ use sort::Sort;
|
||||
|
||||
use self::distinct::facet_string_values;
|
||||
use self::geo_sort::GeoSort;
|
||||
pub use self::geo_sort::Parameter as GeoSortParameter;
|
||||
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
@ -274,7 +275,7 @@ fn resolve_negative_phrases(
|
||||
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
||||
let mut sort = false;
|
||||
let mut sorted_fields = HashSet::new();
|
||||
@ -299,7 +300,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
&mut ranking_rules,
|
||||
&mut sorted_fields,
|
||||
&mut geo_sorted,
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
)?;
|
||||
sort = true;
|
||||
}
|
||||
@ -326,7 +327,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
fn get_ranking_rules_for_vector<'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
limit_plus_offset: usize,
|
||||
target: &[f32],
|
||||
embedder_name: &str,
|
||||
@ -375,7 +376,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
||||
&mut ranking_rules,
|
||||
&mut sorted_fields,
|
||||
&mut geo_sorted,
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
)?;
|
||||
sort = true;
|
||||
}
|
||||
@ -403,7 +404,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
||||
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
|
||||
// query graph search
|
||||
@ -477,7 +478,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
&mut ranking_rules,
|
||||
&mut sorted_fields,
|
||||
&mut geo_sorted,
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
)?;
|
||||
sort = true;
|
||||
}
|
||||
@ -514,7 +515,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
ranking_rules: &mut Vec<BoxRankingRule<'ctx, Query>>,
|
||||
sorted_fields: &mut HashSet<String>,
|
||||
geo_sorted: &mut bool,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
) -> Result<()> {
|
||||
let sort_criteria = sort_criteria.clone().unwrap_or_default();
|
||||
ranking_rules.reserve(sort_criteria.len());
|
||||
@ -540,7 +541,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
}
|
||||
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
|
||||
ranking_rules.push(Box::new(GeoSort::new(
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
geo_faceted_docids,
|
||||
point,
|
||||
true,
|
||||
@ -552,7 +553,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
}
|
||||
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
|
||||
ranking_rules.push(Box::new(GeoSort::new(
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
geo_faceted_docids,
|
||||
point,
|
||||
false,
|
||||
@ -584,7 +585,7 @@ pub fn execute_vector_search(
|
||||
universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
from: usize,
|
||||
length: usize,
|
||||
embedder_name: &str,
|
||||
@ -600,7 +601,7 @@ pub fn execute_vector_search(
|
||||
let ranking_rules = get_ranking_rules_for_vector(
|
||||
ctx,
|
||||
sort_criteria,
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
from + length,
|
||||
vector,
|
||||
embedder_name,
|
||||
@ -647,7 +648,7 @@ pub fn execute_search(
|
||||
mut universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
from: usize,
|
||||
length: usize,
|
||||
words_limit: Option<usize>,
|
||||
@ -761,7 +762,7 @@ pub fn execute_search(
|
||||
let ranking_rules = get_ranking_rules_for_query_graph_search(
|
||||
ctx,
|
||||
sort_criteria,
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
terms_matching_strategy,
|
||||
)?;
|
||||
|
||||
@ -783,7 +784,7 @@ pub fn execute_search(
|
||||
)?
|
||||
} else {
|
||||
let ranking_rules =
|
||||
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
|
||||
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_param)?;
|
||||
bucket_sort(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
|
@ -1,10 +1,12 @@
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BTreeSet;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use fst::automaton::Str;
|
||||
use fst::{Automaton, IntoStreamer, Streamer};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use heed::types::DecodeIgnore;
|
||||
use itertools::{merge_join_by, EitherOrBoth};
|
||||
|
||||
use super::{OneTypoTerm, Phrase, QueryTerm, ZeroTypoTerm};
|
||||
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||
@ -16,16 +18,10 @@ use crate::{Result, MAX_WORD_LENGTH};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum NumberOfTypos {
|
||||
Zero,
|
||||
One,
|
||||
Two,
|
||||
}
|
||||
|
||||
pub enum ZeroOrOneTypo {
|
||||
Zero,
|
||||
One,
|
||||
}
|
||||
|
||||
impl Interned<QueryTerm> {
|
||||
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> {
|
||||
let s = ctx.term_interner.get_mut(self);
|
||||
@ -47,19 +43,27 @@ impl Interned<QueryTerm> {
|
||||
}
|
||||
|
||||
fn find_zero_typo_prefix_derivations(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
word_interned: Interned<String>,
|
||||
fst: fst::Set<Cow<'_, [u8]>>,
|
||||
word_interner: &mut DedupInterner<String>,
|
||||
mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
|
||||
) -> Result<()> {
|
||||
let word = word_interner.get(word_interned).to_owned();
|
||||
let word = ctx.word_interner.get(word_interned).to_owned();
|
||||
let word = word.as_str();
|
||||
let prefix = Str::new(word).starts_with();
|
||||
let mut stream = fst.search(prefix).into_stream();
|
||||
|
||||
while let Some(derived_word) = stream.next() {
|
||||
let derived_word = std::str::from_utf8(derived_word)?.to_owned();
|
||||
let derived_word_interned = word_interner.insert(derived_word);
|
||||
let words =
|
||||
ctx.index.word_docids.remap_data_type::<DecodeIgnore>().prefix_iter(ctx.txn, word)?;
|
||||
let exact_words =
|
||||
ctx.index.exact_word_docids.remap_data_type::<DecodeIgnore>().prefix_iter(ctx.txn, word)?;
|
||||
|
||||
for eob in merge_join_by(words, exact_words, |lhs, rhs| match (lhs, rhs) {
|
||||
(Ok((word, _)), Ok((exact_word, _))) => word.cmp(exact_word),
|
||||
(Err(_), _) | (_, Err(_)) => Ordering::Equal,
|
||||
}) {
|
||||
match eob {
|
||||
EitherOrBoth::Both(kv, _) | EitherOrBoth::Left(kv) | EitherOrBoth::Right(kv) => {
|
||||
let (derived_word, _) = kv?;
|
||||
let derived_word = derived_word.to_string();
|
||||
let derived_word_interned = ctx.word_interner.insert(derived_word);
|
||||
if derived_word_interned != word_interned {
|
||||
let cf = visit(derived_word_interned)?;
|
||||
if cf.is_break() {
|
||||
@ -67,14 +71,17 @@ fn find_zero_typo_prefix_derivations(
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_zero_one_typo_derivations(
|
||||
fn find_one_typo_derivations(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
word_interned: Interned<String>,
|
||||
is_prefix: bool,
|
||||
mut visit: impl FnMut(Interned<String>, ZeroOrOneTypo) -> Result<ControlFlow<()>>,
|
||||
mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
|
||||
) -> Result<()> {
|
||||
let fst = ctx.get_words_fst()?;
|
||||
let word = ctx.word_interner.get(word_interned).to_owned();
|
||||
@ -89,16 +96,9 @@ fn find_zero_one_typo_derivations(
|
||||
let derived_word = ctx.word_interner.insert(derived_word.to_owned());
|
||||
let d = dfa.distance(state.1);
|
||||
match d.to_u8() {
|
||||
0 => {
|
||||
if derived_word != word_interned {
|
||||
let cf = visit(derived_word, ZeroOrOneTypo::Zero)?;
|
||||
if cf.is_break() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
0 => (),
|
||||
1 => {
|
||||
let cf = visit(derived_word, ZeroOrOneTypo::One)?;
|
||||
let cf = visit(derived_word)?;
|
||||
if cf.is_break() {
|
||||
break;
|
||||
}
|
||||
@ -111,7 +111,7 @@ fn find_zero_one_typo_derivations(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_zero_one_two_typo_derivations(
|
||||
fn find_one_two_typo_derivations(
|
||||
word_interned: Interned<String>,
|
||||
is_prefix: bool,
|
||||
fst: fst::Set<Cow<'_, [u8]>>,
|
||||
@ -144,14 +144,7 @@ fn find_zero_one_two_typo_derivations(
|
||||
// correct distance
|
||||
let d = second_dfa.distance((state.1).0);
|
||||
match d.to_u8() {
|
||||
0 => {
|
||||
if derived_word_interned != word_interned {
|
||||
let cf = visit(derived_word_interned, NumberOfTypos::Zero)?;
|
||||
if cf.is_break() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
0 => (),
|
||||
1 => {
|
||||
let cf = visit(derived_word_interned, NumberOfTypos::One)?;
|
||||
if cf.is_break() {
|
||||
@ -194,8 +187,6 @@ pub fn partially_initialized_term_from_word(
|
||||
});
|
||||
}
|
||||
|
||||
let fst = ctx.index.words_fst(ctx.txn)?;
|
||||
|
||||
let use_prefix_db = is_prefix
|
||||
&& (ctx
|
||||
.index
|
||||
@ -215,24 +206,19 @@ pub fn partially_initialized_term_from_word(
|
||||
let mut zero_typo = None;
|
||||
let mut prefix_of = BTreeSet::new();
|
||||
|
||||
if fst.contains(word) || ctx.index.exact_word_docids.get(ctx.txn, word)?.is_some() {
|
||||
if ctx.index.contains_word(ctx.txn, word)? {
|
||||
zero_typo = Some(word_interned);
|
||||
}
|
||||
|
||||
if is_prefix && use_prefix_db.is_none() {
|
||||
find_zero_typo_prefix_derivations(
|
||||
word_interned,
|
||||
fst,
|
||||
&mut ctx.word_interner,
|
||||
|derived_word| {
|
||||
find_zero_typo_prefix_derivations(ctx, word_interned, |derived_word| {
|
||||
if prefix_of.len() < limits::MAX_PREFIX_COUNT {
|
||||
prefix_of.insert(derived_word);
|
||||
Ok(ControlFlow::Continue(()))
|
||||
} else {
|
||||
Ok(ControlFlow::Break(()))
|
||||
}
|
||||
},
|
||||
)?;
|
||||
})?;
|
||||
}
|
||||
let synonyms = ctx.index.synonyms(ctx.txn)?;
|
||||
let mut synonym_word_count = 0;
|
||||
@ -295,18 +281,13 @@ impl Interned<QueryTerm> {
|
||||
let mut one_typo_words = BTreeSet::new();
|
||||
|
||||
if *max_nbr_typos > 0 {
|
||||
find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
|
||||
match nbr_typos {
|
||||
ZeroOrOneTypo::Zero => {}
|
||||
ZeroOrOneTypo::One => {
|
||||
find_one_typo_derivations(ctx, original, is_prefix, |derived_word| {
|
||||
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
|
||||
one_typo_words.insert(derived_word);
|
||||
} else {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(ControlFlow::Continue(()))
|
||||
} else {
|
||||
Ok(ControlFlow::Break(()))
|
||||
}
|
||||
})?;
|
||||
}
|
||||
|
||||
@ -357,7 +338,7 @@ impl Interned<QueryTerm> {
|
||||
let mut two_typo_words = BTreeSet::new();
|
||||
|
||||
if *max_nbr_typos > 0 {
|
||||
find_zero_one_two_typo_derivations(
|
||||
find_one_two_typo_derivations(
|
||||
*original,
|
||||
*is_prefix,
|
||||
ctx.index.words_fst(ctx.txn)?,
|
||||
@ -370,7 +351,6 @@ impl Interned<QueryTerm> {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
match nbr_typos {
|
||||
NumberOfTypos::Zero => {}
|
||||
NumberOfTypos::One => {
|
||||
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
|
||||
one_typo_words.insert(derived_word);
|
||||
|
@ -4,6 +4,7 @@ This module tests the `geo_sort` ranking rule
|
||||
|
||||
use big_s::S;
|
||||
use heed::RoTxn;
|
||||
use itertools::Itertools;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
@ -18,7 +19,7 @@ fn create_index() -> TempIndex {
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
|
||||
s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("score") });
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
@ -95,6 +96,112 @@ fn test_geo_sort() {
|
||||
insta::assert_snapshot!(format!("{scores:#?}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_geo_sort_with_following_ranking_rules() {
|
||||
let index = create_index();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 1 }, { "id": 4 }, { "id": 3 }, { "id": 2 }, { "id": 5 },
|
||||
{ "id": 6, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 10 },
|
||||
{ "id": 7, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 9 },
|
||||
{ "id": 8, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 8 },
|
||||
{ "id": 9, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 7 },
|
||||
{ "id": 10, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score":6 },
|
||||
{ "id": 11, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 5 },
|
||||
{ "id": 12, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 10 },
|
||||
{ "id": 13, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 9 },
|
||||
{ "id": 14, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 8 },
|
||||
{ "id": 15, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 7 },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&rtxn, &index);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![
|
||||
AscDesc::Asc(Member::Geo([0., 0.])),
|
||||
AscDesc::Desc(Member::Field("score".to_string())),
|
||||
]);
|
||||
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
|
||||
insta::assert_snapshot!(format!("{ids:?}"), @"[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 4, 3, 2, 5]");
|
||||
insta::assert_snapshot!(format!("{scores:#?}"));
|
||||
|
||||
s.sort_criteria(vec![
|
||||
AscDesc::Desc(Member::Geo([0., 0.])),
|
||||
AscDesc::Desc(Member::Field("score".to_string())),
|
||||
]);
|
||||
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
|
||||
insta::assert_snapshot!(format!("{ids:?}"), @"[12, 13, 14, 15, 6, 7, 8, 9, 10, 11, 1, 4, 3, 2, 5]");
|
||||
insta::assert_snapshot!(format!("{scores:#?}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_geo_sort_reached_max_bucket_size() {
|
||||
let index = create_index();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 1 }, { "id": 4 }, { "id": 3 }, { "id": 2 }, { "id": 5 },
|
||||
{ "id": 6, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 10 },
|
||||
{ "id": 7, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 9 },
|
||||
{ "id": 8, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 8 },
|
||||
{ "id": 9, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 7 },
|
||||
{ "id": 10, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score":6 },
|
||||
{ "id": 11, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 5 },
|
||||
{ "id": 12, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 10 },
|
||||
{ "id": 13, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 9 },
|
||||
{ "id": 14, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 8 },
|
||||
{ "id": 15, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 7 },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&rtxn, &index);
|
||||
s.geo_max_bucket_size(2);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![
|
||||
AscDesc::Asc(Member::Geo([0., 0.])),
|
||||
AscDesc::Desc(Member::Field("score".to_string())),
|
||||
]);
|
||||
|
||||
/* We should not expect the results to obey the following ranking rules when the bucket size limit is reached,
|
||||
* nor should we expect Iteration and rtree to give exactly the same order for the same bucket in this case.*/
|
||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000));
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
let iterative_ids = collect_field_values(&index, &rtxn, "id", &documents_ids);
|
||||
|
||||
assert_eq!(iterative_ids.len(), 15);
|
||||
for id_str in &iterative_ids[0..6] {
|
||||
let id = id_str.parse::<u32>().unwrap();
|
||||
assert!((6..=11).contains(&id))
|
||||
}
|
||||
for id_str in &iterative_ids[6..10] {
|
||||
let id = id_str.parse::<u32>().unwrap();
|
||||
assert!((12..=15).contains(&id))
|
||||
}
|
||||
let no_geo_ids = iterative_ids[10..].iter().collect_vec();
|
||||
insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#);
|
||||
|
||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000));
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
let rtree_ids = collect_field_values(&index, &rtxn, "id", &documents_ids);
|
||||
|
||||
assert_eq!(rtree_ids.len(), 15);
|
||||
for id_str in &rtree_ids[0..6] {
|
||||
let id = id_str.parse::<u32>().unwrap();
|
||||
assert!((6..=11).contains(&id))
|
||||
}
|
||||
for id_str in &rtree_ids[6..10] {
|
||||
let id = id_str.parse::<u32>().unwrap();
|
||||
assert!((12..=15).contains(&id))
|
||||
}
|
||||
let no_geo_ids = rtree_ids[10..].iter().collect_vec();
|
||||
insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_geo_sort_around_the_edge_of_the_flat_earth() {
|
||||
let index = create_index();
|
||||
|
@ -0,0 +1,356 @@
|
||||
---
|
||||
source: crates/milli/src/search/new/tests/geo_sort.rs
|
||||
expression: "format!(\"{scores:#?}\")"
|
||||
---
|
||||
[
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(10.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(9.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(8.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(7.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(6.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(5.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(10.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(9.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(8.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(7.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
]
|
@ -0,0 +1,356 @@
|
||||
---
|
||||
source: crates/milli/src/search/new/tests/geo_sort.rs
|
||||
expression: "format!(\"{scores:#?}\")"
|
||||
---
|
||||
[
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(10.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(9.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(8.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(7.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(10.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(9.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(8.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(7.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(6.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(5.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
]
|
@ -28,6 +28,7 @@ pub use self::helpers::*;
|
||||
pub use self::transform::{Transform, TransformOutput};
|
||||
use super::facet::clear_facet_levels_based_on_settings_diff;
|
||||
use super::new::StdResult;
|
||||
use crate::database_stats::DatabaseStats;
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{PrefixSearch, PrefixSettings};
|
||||
@ -476,7 +477,8 @@ where
|
||||
|
||||
if !settings_diff.settings_update_only {
|
||||
// Update the stats of the documents database when there is a document update.
|
||||
self.index.update_documents_stats(self.wtxn, modified_docids)?;
|
||||
let stats = DatabaseStats::new(self.index.documents.remap_data_type(), self.wtxn)?;
|
||||
self.index.put_documents_stats(self.wtxn, stats)?;
|
||||
}
|
||||
// We write the field distribution into the main database
|
||||
self.index.put_field_distribution(self.wtxn, &field_distribution)?;
|
||||
|
@ -406,6 +406,71 @@ impl<'doc> Versions<'doc> {
|
||||
Ok(Some(Self::single(data)))
|
||||
}
|
||||
|
||||
pub fn multiple_with_edits(
|
||||
doc: Option<rhai::Map>,
|
||||
mut versions: impl Iterator<Item = Result<RawMap<'doc, FxBuildHasher>>>,
|
||||
engine: &rhai::Engine,
|
||||
edit_function: &rhai::AST,
|
||||
doc_alloc: &'doc bumpalo::Bump,
|
||||
) -> Result<Option<Option<Self>>> {
|
||||
let Some(data) = versions.next() else { return Ok(None) };
|
||||
|
||||
let mut doc = doc.unwrap_or_default();
|
||||
let mut data = data?;
|
||||
for version in versions {
|
||||
let version = version?;
|
||||
for (field, value) in version {
|
||||
data.insert(field, value);
|
||||
}
|
||||
|
||||
let mut scope = rhai::Scope::new();
|
||||
data.iter().for_each(|(k, v)| {
|
||||
doc.insert(k.into(), serde_json::from_str(v.get()).unwrap());
|
||||
});
|
||||
scope.push("doc", doc.clone());
|
||||
|
||||
let _ = engine.eval_ast_with_scope::<rhai::Dynamic>(&mut scope, edit_function).unwrap();
|
||||
data = RawMap::with_hasher_in(FxBuildHasher, doc_alloc);
|
||||
match scope.get_value::<rhai::Map>("doc") {
|
||||
Some(map) => {
|
||||
for (key, value) in map {
|
||||
let mut vec = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||
serde_json::to_writer(&mut vec, &value).unwrap();
|
||||
let key = doc_alloc.alloc_str(key.as_str());
|
||||
let raw_value = serde_json::from_slice(vec.into_bump_slice()).unwrap();
|
||||
data.insert(key, raw_value);
|
||||
}
|
||||
}
|
||||
// In case the deletes the document and it's not the last change
|
||||
// we simply set the document to an empty one and await the next change.
|
||||
None => (),
|
||||
}
|
||||
}
|
||||
|
||||
// We must also run the code after the last change
|
||||
let mut scope = rhai::Scope::new();
|
||||
data.iter().for_each(|(k, v)| {
|
||||
doc.insert(k.into(), serde_json::from_str(v.get()).unwrap());
|
||||
});
|
||||
scope.push("doc", doc);
|
||||
|
||||
let _ = engine.eval_ast_with_scope::<rhai::Dynamic>(&mut scope, edit_function).unwrap();
|
||||
data = RawMap::with_hasher_in(FxBuildHasher, doc_alloc);
|
||||
match scope.get_value::<rhai::Map>("doc") {
|
||||
Some(map) => {
|
||||
for (key, value) in map {
|
||||
let mut vec = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||
serde_json::to_writer(&mut vec, &value).unwrap();
|
||||
let key = doc_alloc.alloc_str(key.as_str());
|
||||
let raw_value = serde_json::from_slice(vec.into_bump_slice()).unwrap();
|
||||
data.insert(key, raw_value);
|
||||
}
|
||||
Ok(Some(Some(Self::single(data))))
|
||||
}
|
||||
None => Ok(Some(None)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self {
|
||||
Self { data: version }
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
use bumpalo::Bump;
|
||||
use heed::RoTxn;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::document::{
|
||||
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
|
||||
@ -10,7 +11,7 @@ use super::vector_document::{
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::documents::FieldIdMapper;
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::{DocumentId, Index, Result};
|
||||
use crate::{DocumentId, Index, InternalError, Result};
|
||||
|
||||
pub enum DocumentChange<'doc> {
|
||||
Deletion(Deletion<'doc>),
|
||||
@ -243,6 +244,29 @@ impl<'doc> Update<'doc> {
|
||||
Ok(has_deleted_fields)
|
||||
}
|
||||
|
||||
/// Returns `true` if the geo fields have changed.
|
||||
pub fn has_changed_for_geo_fields<'t, Mapper: FieldIdMapper>(
|
||||
&self,
|
||||
rtxn: &'t RoTxn,
|
||||
index: &'t Index,
|
||||
mapper: &'t Mapper,
|
||||
) -> Result<bool> {
|
||||
let current = self.current(rtxn, index, mapper)?;
|
||||
let current_geo = current.geo_field()?;
|
||||
let updated_geo = self.only_changed_fields().geo_field()?;
|
||||
match (current_geo, updated_geo) {
|
||||
(Some(current_geo), Some(updated_geo)) => {
|
||||
let current: Value =
|
||||
serde_json::from_str(current_geo.get()).map_err(InternalError::SerdeJson)?;
|
||||
let updated: Value =
|
||||
serde_json::from_str(updated_geo.get()).map_err(InternalError::SerdeJson)?;
|
||||
Ok(current != updated)
|
||||
}
|
||||
(None, None) => Ok(false),
|
||||
_ => Ok(true),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn only_changed_vectors(
|
||||
&self,
|
||||
doc_alloc: &'doc Bump,
|
||||
|
@ -117,7 +117,7 @@ impl FacetedDocidsExtractor {
|
||||
},
|
||||
),
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
let has_changed = inner.has_changed_for_fields(
|
||||
&mut |field_name| {
|
||||
match_faceted_field(
|
||||
field_name,
|
||||
@ -130,7 +130,10 @@ impl FacetedDocidsExtractor {
|
||||
rtxn,
|
||||
index,
|
||||
context.db_fields_ids_map,
|
||||
)? {
|
||||
)?;
|
||||
let has_changed_for_geo_fields =
|
||||
inner.has_changed_for_geo_fields(rtxn, index, context.db_fields_ids_map)?;
|
||||
if !has_changed && !has_changed_for_geo_fields {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
@ -121,6 +121,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
// do we have set embeddings?
|
||||
if let Some(embeddings) = new_vectors.embeddings {
|
||||
chunks.set_vectors(
|
||||
update.external_document_id(),
|
||||
update.docid(),
|
||||
embeddings
|
||||
.into_vec(&context.doc_alloc, embedder_name)
|
||||
@ -128,7 +129,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
document_id: update.external_document_id().to_string(),
|
||||
error: error.to_string(),
|
||||
})?,
|
||||
);
|
||||
)?;
|
||||
} else if new_vectors.regenerate {
|
||||
let new_rendered = prompt.render_document(
|
||||
update.external_document_id(),
|
||||
@ -209,6 +210,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
chunks.set_regenerate(insertion.docid(), new_vectors.regenerate);
|
||||
if let Some(embeddings) = new_vectors.embeddings {
|
||||
chunks.set_vectors(
|
||||
insertion.external_document_id(),
|
||||
insertion.docid(),
|
||||
embeddings
|
||||
.into_vec(&context.doc_alloc, embedder_name)
|
||||
@ -218,7 +220,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
.to_string(),
|
||||
error: error.to_string(),
|
||||
})?,
|
||||
);
|
||||
)?;
|
||||
} else if new_vectors.regenerate {
|
||||
let rendered = prompt.render_document(
|
||||
insertion.external_document_id(),
|
||||
@ -273,6 +275,7 @@ struct Chunks<'a, 'b, 'extractor> {
|
||||
embedder: &'a Embedder,
|
||||
embedder_id: u8,
|
||||
embedder_name: &'a str,
|
||||
dimensions: usize,
|
||||
prompt: &'a Prompt,
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
@ -297,6 +300,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
|
||||
let texts = BVec::with_capacity_in(capacity, doc_alloc);
|
||||
let ids = BVec::with_capacity_in(capacity, doc_alloc);
|
||||
let dimensions = embedder.dimensions();
|
||||
Self {
|
||||
texts,
|
||||
ids,
|
||||
@ -309,6 +313,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
embedder_name,
|
||||
user_provided,
|
||||
has_manual_generation: None,
|
||||
dimensions,
|
||||
}
|
||||
}
|
||||
|
||||
@ -490,7 +495,25 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
}
|
||||
}
|
||||
|
||||
fn set_vectors(&self, docid: DocumentId, embeddings: Vec<Embedding>) {
|
||||
fn set_vectors(
|
||||
&self,
|
||||
external_docid: &'a str,
|
||||
docid: DocumentId,
|
||||
embeddings: Vec<Embedding>,
|
||||
) -> Result<()> {
|
||||
for (embedding_index, embedding) in embeddings.iter().enumerate() {
|
||||
if embedding.len() != self.dimensions {
|
||||
return Err(UserError::InvalidIndexingVectorDimensions {
|
||||
expected: self.dimensions,
|
||||
found: embedding.len(),
|
||||
embedder_name: self.embedder_name.to_string(),
|
||||
document_id: external_docid.to_string(),
|
||||
embedding_index,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
self.sender.set_vectors(docid, self.embedder_id, embeddings).unwrap();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ use super::guess_primary_key::retrieve_or_guess_primary_key;
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::progress::{AtomicPayloadStep, Progress};
|
||||
use crate::update::new::document::Versions;
|
||||
use crate::update::new::indexer::update_by_function::obkv_to_rhaimap;
|
||||
use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::MostlySend;
|
||||
use crate::update::new::{Deletion, Insertion, Update};
|
||||
@ -157,7 +158,16 @@ impl<'pl> DocumentOperation<'pl> {
|
||||
.sort_unstable_by_key(|(_, po)| first_update_pointer(&po.operations).unwrap_or(0));
|
||||
|
||||
let docids_version_offsets = docids_version_offsets.into_bump_slice();
|
||||
Ok((DocumentOperationChanges { docids_version_offsets }, operations_stats, primary_key))
|
||||
let engine = rhai::Engine::new();
|
||||
// Make sure to correctly setup the engine and remove all settings
|
||||
let ast = index.execute_after_update(rtxn)?.map(|f| engine.compile(f).unwrap());
|
||||
let fidmap = index.fields_ids_map(rtxn)?;
|
||||
|
||||
Ok((
|
||||
DocumentOperationChanges { docids_version_offsets, engine, ast, fidmap },
|
||||
operations_stats,
|
||||
primary_key,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@ -418,7 +428,15 @@ impl<'pl> DocumentChanges<'pl> for DocumentOperationChanges<'pl> {
|
||||
'pl: 'doc,
|
||||
{
|
||||
let (external_doc, payload_operations) = item;
|
||||
payload_operations.merge(external_doc, &context.doc_alloc)
|
||||
payload_operations.merge(
|
||||
&context.rtxn,
|
||||
context.index,
|
||||
&self.fidmap,
|
||||
&self.engine,
|
||||
self.ast.as_ref(),
|
||||
external_doc,
|
||||
&context.doc_alloc,
|
||||
)
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
@ -427,6 +445,9 @@ impl<'pl> DocumentChanges<'pl> for DocumentOperationChanges<'pl> {
|
||||
}
|
||||
|
||||
pub struct DocumentOperationChanges<'pl> {
|
||||
engine: rhai::Engine,
|
||||
ast: Option<rhai::AST>,
|
||||
fidmap: FieldsIdsMap,
|
||||
docids_version_offsets: &'pl [(&'pl str, PayloadOperations<'pl>)],
|
||||
}
|
||||
|
||||
@ -489,10 +510,14 @@ impl<'pl> PayloadOperations<'pl> {
|
||||
}
|
||||
|
||||
/// Returns only the most recent version of a document based on the updates from the payloads.
|
||||
///
|
||||
/// This function is only meant to be used when doing a replacement and not an update.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn merge<'doc>(
|
||||
&self,
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
fidmap: &FieldsIdsMap,
|
||||
engine: &rhai::Engine,
|
||||
ast: Option<&rhai::AST>,
|
||||
external_doc: &'doc str,
|
||||
doc_alloc: &'doc Bump,
|
||||
) -> Result<Option<DocumentChange<'doc>>>
|
||||
@ -556,9 +581,34 @@ impl<'pl> PayloadOperations<'pl> {
|
||||
Ok(document)
|
||||
});
|
||||
|
||||
let Some(versions) = Versions::multiple(versions)? else { return Ok(None) };
|
||||
let versions = match ast {
|
||||
Some(ast) => {
|
||||
let doc = index
|
||||
.documents
|
||||
.get(rtxn, &self.docid)?
|
||||
.map(|obkv| obkv_to_rhaimap(obkv, fidmap))
|
||||
.transpose()?;
|
||||
match Versions::multiple_with_edits(doc, versions, engine, ast, doc_alloc)?
|
||||
{
|
||||
Some(Some(versions)) => Some(versions),
|
||||
Some(None) if self.is_new => return Ok(None),
|
||||
Some(None) => {
|
||||
return Ok(Some(DocumentChange::Deletion(Deletion::create(
|
||||
self.docid,
|
||||
external_doc,
|
||||
))));
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
None => Versions::multiple(versions)?,
|
||||
};
|
||||
|
||||
if self.is_new {
|
||||
let Some(versions) = versions else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
if self.is_new || ast.is_some() {
|
||||
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
||||
self.docid,
|
||||
external_doc,
|
||||
|
@ -13,6 +13,7 @@ use super::super::thread_local::{FullySend, ThreadLocal};
|
||||
use super::super::FacetFieldIdsDelta;
|
||||
use super::document_changes::{extract, DocumentChanges, IndexingContext};
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::progress::MergingWordCache;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::new::extract::EmbeddingExtractor;
|
||||
use crate::update::new::merger::merge_and_send_rtree;
|
||||
@ -96,6 +97,7 @@ where
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(IndexingStep::MergingFacetCaches);
|
||||
|
||||
facet_field_ids_delta = merge_and_send_facet_docids(
|
||||
caches,
|
||||
@ -117,7 +119,6 @@ where
|
||||
} = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
WordDocidsExtractors::run_extraction(
|
||||
document_changes,
|
||||
indexing_context,
|
||||
@ -126,9 +127,13 @@ where
|
||||
)?
|
||||
};
|
||||
|
||||
indexing_context.progress.update_progress(IndexingStep::MergingWordCaches);
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(MergingWordCache::WordDocids);
|
||||
|
||||
merge_and_send_docids(
|
||||
word_docids,
|
||||
index.word_docids.remap_types(),
|
||||
@ -142,6 +147,8 @@ where
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(MergingWordCache::WordFieldIdDocids);
|
||||
|
||||
merge_and_send_docids(
|
||||
word_fid_docids,
|
||||
index.word_fid_docids.remap_types(),
|
||||
@ -155,6 +162,8 @@ where
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(MergingWordCache::ExactWordDocids);
|
||||
|
||||
merge_and_send_docids(
|
||||
exact_word_docids,
|
||||
index.exact_word_docids.remap_types(),
|
||||
@ -168,6 +177,8 @@ where
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(MergingWordCache::WordPositionDocids);
|
||||
|
||||
merge_and_send_docids(
|
||||
word_position_docids,
|
||||
index.word_position_docids.remap_types(),
|
||||
@ -181,6 +192,8 @@ where
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(MergingWordCache::FieldIdWordCountDocids);
|
||||
|
||||
merge_and_send_docids(
|
||||
fid_word_count_docids,
|
||||
index.field_id_word_count_docids.remap_types(),
|
||||
@ -210,6 +223,7 @@ where
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(IndexingStep::MergingWordProximity);
|
||||
|
||||
merge_and_send_docids(
|
||||
caches,
|
||||
|
@ -234,7 +234,6 @@ where
|
||||
embedders,
|
||||
field_distribution,
|
||||
document_ids,
|
||||
modified_docids,
|
||||
)?;
|
||||
|
||||
Ok(congestion)
|
||||
|
@ -7,12 +7,13 @@ use itertools::{merge_join_by, EitherOrBoth};
|
||||
use super::document_changes::IndexingContext;
|
||||
use crate::facet::FacetType;
|
||||
use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
|
||||
use crate::progress::Progress;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::update::facet::new_incremental::FacetsUpdateIncremental;
|
||||
use crate::update::facet::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use crate::update::new::facet_search_builder::FacetSearchBuilder;
|
||||
use crate::update::new::merger::FacetFieldIdDelta;
|
||||
use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::steps::{IndexingStep, PostProcessingFacets, PostProcessingWords};
|
||||
use crate::update::new::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
|
||||
use crate::update::new::words_prefix_docids::{
|
||||
compute_exact_word_prefix_docids, compute_word_prefix_docids, compute_word_prefix_fid_docids,
|
||||
@ -33,11 +34,23 @@ where
|
||||
{
|
||||
let index = indexing_context.index;
|
||||
indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
|
||||
compute_facet_level_database(index, wtxn, facet_field_ids_delta, &mut global_fields_ids_map)?;
|
||||
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
||||
compute_facet_level_database(
|
||||
index,
|
||||
wtxn,
|
||||
facet_field_ids_delta,
|
||||
&mut global_fields_ids_map,
|
||||
indexing_context.progress,
|
||||
)?;
|
||||
compute_facet_search_database(index, wtxn, global_fields_ids_map, indexing_context.progress)?;
|
||||
indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
|
||||
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
|
||||
compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?;
|
||||
if let Some(prefix_delta) = compute_word_fst(index, wtxn, indexing_context.progress)? {
|
||||
compute_prefix_database(
|
||||
index,
|
||||
wtxn,
|
||||
prefix_delta,
|
||||
indexing_context.grenad_parameters,
|
||||
indexing_context.progress,
|
||||
)?;
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
@ -48,21 +61,32 @@ fn compute_prefix_database(
|
||||
wtxn: &mut RwTxn,
|
||||
prefix_delta: PrefixDelta,
|
||||
grenad_parameters: &GrenadParameters,
|
||||
progress: &Progress,
|
||||
) -> Result<()> {
|
||||
let PrefixDelta { modified, deleted } = prefix_delta;
|
||||
// Compute word prefix docids
|
||||
|
||||
progress.update_progress(PostProcessingWords::WordPrefixDocids);
|
||||
compute_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
|
||||
// Compute exact word prefix docids
|
||||
|
||||
progress.update_progress(PostProcessingWords::ExactWordPrefixDocids);
|
||||
compute_exact_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
|
||||
// Compute word prefix fid docids
|
||||
|
||||
progress.update_progress(PostProcessingWords::WordPrefixFieldIdDocids);
|
||||
compute_word_prefix_fid_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
|
||||
// Compute word prefix position docids
|
||||
|
||||
progress.update_progress(PostProcessingWords::WordPrefixPositionDocids);
|
||||
compute_word_prefix_position_docids(wtxn, index, &modified, &deleted, grenad_parameters)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing")]
|
||||
fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result<Option<PrefixDelta>> {
|
||||
fn compute_word_fst(
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn,
|
||||
progress: &Progress,
|
||||
) -> Result<Option<PrefixDelta>> {
|
||||
let rtxn = index.read_txn()?;
|
||||
progress.update_progress(PostProcessingWords::WordFst);
|
||||
|
||||
let words_fst = index.words_fst(&rtxn)?;
|
||||
let mut word_fst_builder = WordFstBuilder::new(&words_fst)?;
|
||||
let prefix_settings = index.prefix_settings(&rtxn)?;
|
||||
@ -112,8 +136,10 @@ fn compute_facet_search_database(
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn,
|
||||
global_fields_ids_map: GlobalFieldsIdsMap,
|
||||
progress: &Progress,
|
||||
) -> Result<()> {
|
||||
let rtxn = index.read_txn()?;
|
||||
progress.update_progress(PostProcessingFacets::FacetSearch);
|
||||
|
||||
// if the facet search is not enabled, we can skip the rest of the function
|
||||
if !index.facet_search(wtxn)? {
|
||||
@ -171,10 +197,16 @@ fn compute_facet_level_database(
|
||||
wtxn: &mut RwTxn,
|
||||
mut facet_field_ids_delta: FacetFieldIdsDelta,
|
||||
global_fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||
progress: &Progress,
|
||||
) -> Result<()> {
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?;
|
||||
for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() {
|
||||
let mut deltas: Vec<_> = facet_field_ids_delta.consume_facet_string_delta().collect();
|
||||
// We move all bulks at the front and incrementals (others) at the end.
|
||||
deltas.sort_by_key(|(_, delta)| if let FacetFieldIdDelta::Bulk = delta { 0 } else { 1 });
|
||||
|
||||
for (fid, delta) in deltas {
|
||||
// skip field ids that should not be facet leveled
|
||||
let Some(metadata) = global_fields_ids_map.metadata(fid) else {
|
||||
continue;
|
||||
@ -187,11 +219,13 @@ fn compute_facet_level_database(
|
||||
let _entered = span.enter();
|
||||
match delta {
|
||||
FacetFieldIdDelta::Bulk => {
|
||||
progress.update_progress(PostProcessingFacets::StringsBulk);
|
||||
tracing::debug!(%fid, "bulk string facet processing");
|
||||
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::String)
|
||||
.execute(wtxn)?
|
||||
}
|
||||
FacetFieldIdDelta::Incremental(delta_data) => {
|
||||
progress.update_progress(PostProcessingFacets::StringsIncremental);
|
||||
tracing::debug!(%fid, len=%delta_data.len(), "incremental string facet processing");
|
||||
FacetsUpdateIncremental::new(
|
||||
index,
|
||||
@ -207,16 +241,22 @@ fn compute_facet_level_database(
|
||||
}
|
||||
}
|
||||
|
||||
for (fid, delta) in facet_field_ids_delta.consume_facet_number_delta() {
|
||||
let mut deltas: Vec<_> = facet_field_ids_delta.consume_facet_number_delta().collect();
|
||||
// We move all bulks at the front and incrementals (others) at the end.
|
||||
deltas.sort_by_key(|(_, delta)| if let FacetFieldIdDelta::Bulk = delta { 0 } else { 1 });
|
||||
|
||||
for (fid, delta) in deltas {
|
||||
let span = tracing::trace_span!(target: "indexing::facet_field_ids", "number");
|
||||
let _entered = span.enter();
|
||||
match delta {
|
||||
FacetFieldIdDelta::Bulk => {
|
||||
progress.update_progress(PostProcessingFacets::NumbersBulk);
|
||||
tracing::debug!(%fid, "bulk number facet processing");
|
||||
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::Number)
|
||||
.execute(wtxn)?
|
||||
}
|
||||
FacetFieldIdDelta::Incremental(delta_data) => {
|
||||
progress.update_progress(PostProcessingFacets::NumbersIncremental);
|
||||
tracing::debug!(%fid, len=%delta_data.len(), "incremental number facet processing");
|
||||
FacetsUpdateIncremental::new(
|
||||
index,
|
||||
|
@ -189,7 +189,7 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
|
||||
}
|
||||
}
|
||||
|
||||
fn obkv_to_rhaimap(obkv: &KvReaderFieldId, fields_ids_map: &FieldsIdsMap) -> Result<rhai::Map> {
|
||||
pub fn obkv_to_rhaimap(obkv: &KvReaderFieldId, fields_ids_map: &FieldsIdsMap) -> Result<rhai::Map> {
|
||||
let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
|
||||
let map: Result<rhai::Map> = all_keys
|
||||
.iter()
|
||||
|
@ -7,6 +7,7 @@ use rand::SeedableRng as _;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::super::channel::*;
|
||||
use crate::database_stats::DatabaseStats;
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
@ -142,7 +143,6 @@ pub(super) fn update_index(
|
||||
embedders: EmbeddingConfigs,
|
||||
field_distribution: std::collections::BTreeMap<String, u64>,
|
||||
document_ids: roaring::RoaringBitmap,
|
||||
modified_docids: roaring::RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?;
|
||||
if let Some(new_primary_key) = new_primary_key {
|
||||
@ -153,7 +153,8 @@ pub(super) fn update_index(
|
||||
index.put_field_distribution(wtxn, &field_distribution)?;
|
||||
index.put_documents_ids(wtxn, &document_ids)?;
|
||||
index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
|
||||
index.update_documents_stats(wtxn, modified_docids)?;
|
||||
let stats = DatabaseStats::new(index.documents.remap_data_type(), wtxn)?;
|
||||
index.put_documents_stats(wtxn, stats)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -82,14 +82,8 @@ where
|
||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
let current = database.get(&rtxn, key)?;
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => {
|
||||
docids_sender.write(key, &bitmap)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Delete => {
|
||||
docids_sender.delete(key)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Write(bitmap) => docids_sender.write(key, &bitmap),
|
||||
Operation::Delete => docids_sender.delete(key),
|
||||
Operation::Ignore => Ok(()),
|
||||
}
|
||||
})
|
||||
@ -130,7 +124,6 @@ pub fn merge_and_send_facet_docids<'extractor>(
|
||||
Operation::Ignore => Ok(()),
|
||||
}
|
||||
})?;
|
||||
|
||||
Ok(facet_field_ids_delta)
|
||||
})
|
||||
.reduce(
|
||||
|
@ -1,11 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use crate::make_enum_progress;
|
||||
|
||||
use enum_iterator::Sequence;
|
||||
|
||||
use crate::progress::Step;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
|
||||
#[repr(u8)]
|
||||
make_enum_progress! {
|
||||
pub enum IndexingStep {
|
||||
PreparingPayloads,
|
||||
ExtractingDocuments,
|
||||
@ -13,6 +8,9 @@ pub enum IndexingStep {
|
||||
ExtractingWords,
|
||||
ExtractingWordProximity,
|
||||
ExtractingEmbeddings,
|
||||
MergingFacetCaches,
|
||||
MergingWordCaches,
|
||||
MergingWordProximity,
|
||||
WritingGeoPoints,
|
||||
WaitingForDatabaseWrites,
|
||||
WaitingForExtractors,
|
||||
@ -21,32 +19,24 @@ pub enum IndexingStep {
|
||||
PostProcessingWords,
|
||||
Finalizing,
|
||||
}
|
||||
|
||||
impl Step for IndexingStep {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
match self {
|
||||
IndexingStep::PreparingPayloads => "preparing update file",
|
||||
IndexingStep::ExtractingDocuments => "extracting documents",
|
||||
IndexingStep::ExtractingFacets => "extracting facets",
|
||||
IndexingStep::ExtractingWords => "extracting words",
|
||||
IndexingStep::ExtractingWordProximity => "extracting word proximity",
|
||||
IndexingStep::ExtractingEmbeddings => "extracting embeddings",
|
||||
IndexingStep::WritingGeoPoints => "writing geo points",
|
||||
IndexingStep::WaitingForDatabaseWrites => "waiting for database writes",
|
||||
IndexingStep::WaitingForExtractors => "waiting for extractors",
|
||||
IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
||||
IndexingStep::PostProcessingFacets => "post-processing facets",
|
||||
IndexingStep::PostProcessingWords => "post-processing words",
|
||||
IndexingStep::Finalizing => "finalizing",
|
||||
}
|
||||
.into()
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
*self as u32
|
||||
make_enum_progress! {
|
||||
pub enum PostProcessingFacets {
|
||||
StringsBulk,
|
||||
StringsIncremental,
|
||||
NumbersBulk,
|
||||
NumbersIncremental,
|
||||
FacetSearch,
|
||||
}
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
Self::CARDINALITY as u32
|
||||
make_enum_progress! {
|
||||
pub enum PostProcessingWords {
|
||||
WordFst,
|
||||
WordPrefixDocids,
|
||||
ExactWordPrefixDocids,
|
||||
WordPrefixFieldIdDocids,
|
||||
WordPrefixPositionDocids,
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::cell::RefCell;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use hashbrown::HashMap;
|
||||
use heed::types::Bytes;
|
||||
@ -217,7 +218,7 @@ impl WordPrefixIntegerDocids {
|
||||
index.push(PrefixIntegerEntry {
|
||||
prefix,
|
||||
pos,
|
||||
serialized_length: Some(buffer.len()),
|
||||
serialized_length: NonZeroUsize::new(buffer.len()),
|
||||
});
|
||||
file.write_all(buffer)?;
|
||||
}
|
||||
@ -243,7 +244,7 @@ impl WordPrefixIntegerDocids {
|
||||
key_buffer.extend_from_slice(&pos.to_be_bytes());
|
||||
match serialized_length {
|
||||
Some(serialized_length) => {
|
||||
buffer.resize(serialized_length, 0);
|
||||
buffer.resize(serialized_length.get(), 0);
|
||||
file.read_exact(&mut buffer)?;
|
||||
self.prefix_database.remap_data_type::<Bytes>().put(
|
||||
wtxn,
|
||||
@ -266,7 +267,7 @@ impl WordPrefixIntegerDocids {
|
||||
struct PrefixIntegerEntry<'a> {
|
||||
prefix: &'a str,
|
||||
pos: u16,
|
||||
serialized_length: Option<usize>,
|
||||
serialized_length: Option<NonZeroUsize>,
|
||||
}
|
||||
|
||||
/// TODO doc
|
||||
|
@ -183,6 +183,7 @@ pub struct Settings<'a, 't, 'i> {
|
||||
localized_attributes_rules: Setting<Vec<LocalizedAttributesRule>>,
|
||||
prefix_search: Setting<PrefixSearch>,
|
||||
facet_search: Setting<bool>,
|
||||
execute_after_update: Setting<String>,
|
||||
}
|
||||
|
||||
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
@ -220,6 +221,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
localized_attributes_rules: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
facet_search: Setting::NotSet,
|
||||
execute_after_update: Setting::NotSet,
|
||||
indexer_config,
|
||||
}
|
||||
}
|
||||
@ -442,6 +444,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.facet_search = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_execute_after_update(&mut self, value: String) {
|
||||
self.execute_after_update = Setting::Set(value);
|
||||
}
|
||||
|
||||
pub fn reset_execute_after_update(&mut self) {
|
||||
self.execute_after_update = Setting::Reset;
|
||||
}
|
||||
|
||||
#[tracing::instrument(
|
||||
level = "trace"
|
||||
skip(self, progress_callback, should_abort, settings_diff),
|
||||
@ -994,6 +1004,18 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
Ok(changed)
|
||||
}
|
||||
|
||||
fn update_execute_after_update(&mut self) -> Result<()> {
|
||||
match self.execute_after_update.as_ref() {
|
||||
Setting::Set(new) => {
|
||||
self.index.put_execute_after_update(self.wtxn, &new).map_err(Into::into)
|
||||
}
|
||||
Setting::Reset => {
|
||||
self.index.delete_execute_after_update(self.wtxn).map(drop).map_err(Into::into)
|
||||
}
|
||||
Setting::NotSet => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
fn update_embedding_configs(&mut self) -> Result<BTreeMap<String, EmbedderAction>> {
|
||||
match std::mem::take(&mut self.embedder_settings) {
|
||||
Setting::Set(configs) => self.update_embedding_configs_set(configs),
|
||||
@ -1245,6 +1267,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.update_proximity_precision()?;
|
||||
self.update_prefix_search()?;
|
||||
self.update_facet_search()?;
|
||||
self.update_execute_after_update()?;
|
||||
self.update_localized_attributes_rules()?;
|
||||
|
||||
let embedding_config_updates = self.update_embedding_configs()?;
|
||||
@ -1331,8 +1354,21 @@ impl InnerIndexSettingsDiff {
|
||||
|
||||
let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
|
||||
|
||||
let cache_user_defined_searchables = old_settings.user_defined_searchable_attributes
|
||||
!= new_settings.user_defined_searchable_attributes;
|
||||
// Check if any searchable field has been added or removed form the list,
|
||||
// Changing the order should not be considered as a change for reindexing.
|
||||
let cache_user_defined_searchables = match (
|
||||
&old_settings.user_defined_searchable_attributes,
|
||||
&new_settings.user_defined_searchable_attributes,
|
||||
) {
|
||||
(Some(old), Some(new)) => {
|
||||
let old: BTreeSet<_> = old.iter().collect();
|
||||
let new: BTreeSet<_> = new.iter().collect();
|
||||
|
||||
old != new
|
||||
}
|
||||
(None, None) => false,
|
||||
_otherwise => true,
|
||||
};
|
||||
|
||||
// if the user-defined searchables changed, then we need to reindex prompts.
|
||||
if cache_user_defined_searchables {
|
||||
|
@ -896,6 +896,7 @@ fn test_correct_settings_init() {
|
||||
localized_attributes_rules,
|
||||
prefix_search,
|
||||
facet_search,
|
||||
execute_after_update,
|
||||
} = settings;
|
||||
assert!(matches!(searchable_fields, Setting::NotSet));
|
||||
assert!(matches!(displayed_fields, Setting::NotSet));
|
||||
@ -923,6 +924,7 @@ fn test_correct_settings_init() {
|
||||
assert!(matches!(localized_attributes_rules, Setting::NotSet));
|
||||
assert!(matches!(prefix_search, Setting::NotSet));
|
||||
assert!(matches!(facet_search, Setting::NotSet));
|
||||
assert!(matches!(execute_after_update, Setting::NotSet));
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ byte-unit = { version = "5.1.6", default-features = false, features = [
|
||||
"byte",
|
||||
"serde",
|
||||
] }
|
||||
tokio = { version = "1.42.0", features = ["sync"] }
|
||||
tokio = { version = "1.43.1", features = ["sync"] }
|
||||
|
||||
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
|
||||
libproc = "0.14.10"
|
||||
|
@ -31,7 +31,7 @@ time = { version = "0.3.37", features = [
|
||||
"serde-human-readable",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.42.0", features = [
|
||||
tokio = { version = "1.43.1", features = [
|
||||
"rt",
|
||||
"net",
|
||||
"time",
|
||||
|
Reference in New Issue
Block a user