mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-28 17:00:32 +00:00
Compare commits
66 Commits
v1.3.0-rc.
...
nightly-te
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7b7a27a3b2 | ||
|
|
29adfc2f68 | ||
|
|
604d533b31 | ||
|
|
3dda93d50f | ||
|
|
117146ec4e | ||
|
|
884b4d47b1 | ||
|
|
023cb0c2de | ||
|
|
f391039a6f | ||
|
|
fcdd20b533 | ||
|
|
b45c36cd71 | ||
|
|
151c31c18f | ||
|
|
a8ad0902d3 | ||
|
|
e917dbdebb | ||
|
|
ba919b6123 | ||
|
|
5b0157c6c6 | ||
|
|
3b9a87c790 | ||
|
|
3a3414270d | ||
|
|
d06e0905db | ||
|
|
939b2fc6fd | ||
|
|
fae61372be | ||
|
|
d8b47b689e | ||
|
|
be72be7c0d | ||
|
|
88559a2d54 | ||
|
|
59201a7852 | ||
|
|
9e3e69373e | ||
|
|
29ab54b259 | ||
|
|
86d8bb3a3e | ||
|
|
0e2a5951b4 | ||
|
|
691a536893 | ||
|
|
df528b41d8 | ||
|
|
2452ec55b4 | ||
|
|
54ae1b5a67 | ||
|
|
3070a20580 | ||
|
|
0497f93494 | ||
|
|
2dfbb6813a | ||
|
|
8f589a5cce | ||
|
|
0b8bbd8750 | ||
|
|
eef95de30e | ||
|
|
13a13a4862 | ||
|
|
d5ab750627 | ||
|
|
2afd10f96d | ||
|
|
e691c92ed5 | ||
|
|
2d2619bd90 | ||
|
|
516d2df862 | ||
|
|
c76b488ab1 | ||
|
|
d383afc82b | ||
|
|
f9d94c5845 | ||
|
|
928ab2f9b1 | ||
|
|
7c18a9375f | ||
|
|
05a311f9be | ||
|
|
9b1b9b409e | ||
|
|
7f555f23e8 | ||
|
|
a0bfc9f63a | ||
|
|
3155264381 | ||
|
|
42400c381e | ||
|
|
08c7dab528 | ||
|
|
8590687515 | ||
|
|
8f5d127b1e | ||
|
|
2b4160ebb9 | ||
|
|
8ba1c8f88f | ||
|
|
8e7edf8ea7 | ||
|
|
9daccdf7f0 | ||
|
|
437ee55c57 | ||
|
|
b1717865ea | ||
|
|
176f716292 | ||
|
|
40ad19ba9e |
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
|
||||
8
.github/workflows/publish-binaries.yml
vendored
8
.github/workflows/publish-binaries.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
@@ -183,7 +183,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
|
||||
32
.github/workflows/test-suite.yml
vendored
32
.github/workflows/test-suite.yml
vendored
@@ -30,20 +30,20 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Run test with Rust stable
|
||||
- name: Setup test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run test with Rust nightly
|
||||
if: github.event_name == 'schedule'
|
||||
- name: Setup test with Rust nightly
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -78,12 +78,12 @@ jobs:
|
||||
args: --locked --release --all
|
||||
|
||||
test-all-features:
|
||||
name: Tests all features on cron schedule only
|
||||
name: Tests all features
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
if: github.event_name == 'schedule'
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
@@ -110,24 +110,24 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ubuntu:18.04
|
||||
if: github.event_name == 'schedule'
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz
|
||||
- name: Run cargo tree with default features and check lindera is pressent
|
||||
run: |
|
||||
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
|
||||
|
||||
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -165,7 +165,7 @@ jobs:
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
290
Cargo.lock
generated
290
Cargo.lock
generated
@@ -405,7 +405,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -416,7 +416,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -603,7 +603,7 @@ checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -700,9 +700,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "charabia"
|
||||
version = "0.8.1"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb49850f555eb71aa6fc6d4d79420e81f4d89fa56e0e9c0f6d19aace2f56c554"
|
||||
checksum = "57aa1b4a8dda126c03ebf2f7e31d16cfc8781c2fe80dedd1a33459efc3e07578"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"cow-utils",
|
||||
@@ -794,7 +794,7 @@ dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1021,9 +1021,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.2.1"
|
||||
version = "1.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad"
|
||||
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
|
||||
dependencies = [
|
||||
"csv-core",
|
||||
"itoa",
|
||||
@@ -1197,12 +1197,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "doc-comment"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.3.0"
|
||||
@@ -1342,7 +1336,7 @@ checksum = "eecf8589574ce9b895052fa12d69af7a233f99e6107f5cb8dd1044f2a17bfdcb"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1358,6 +1352,12 @@ dependencies = [
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.1"
|
||||
@@ -1537,7 +1537,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1679,7 +1679,7 @@ dependencies = [
|
||||
"futures-sink",
|
||||
"futures-util",
|
||||
"http",
|
||||
"indexmap",
|
||||
"indexmap 1.9.3",
|
||||
"slab",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -1701,15 +1701,6 @@ dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
|
||||
dependencies = [
|
||||
"ahash 0.7.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
@@ -1719,6 +1710,12 @@ dependencies = [
|
||||
"ahash 0.7.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
|
||||
|
||||
[[package]]
|
||||
name = "heapless"
|
||||
version = "0.7.16"
|
||||
@@ -1740,8 +1737,8 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "heed"
|
||||
version = "0.12.5"
|
||||
source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e"
|
||||
version = "0.12.7"
|
||||
source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"heed-traits",
|
||||
@@ -1758,12 +1755,12 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "heed-traits"
|
||||
version = "0.7.0"
|
||||
source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e"
|
||||
source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632"
|
||||
|
||||
[[package]]
|
||||
name = "heed-types"
|
||||
version = "0.7.2"
|
||||
source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e"
|
||||
source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"heed-traits",
|
||||
@@ -1802,22 +1799,6 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hnsw"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b9740ebf8769ec4ad6762cc951ba18f39bba6dfbc2fbbe46285f7539af79752"
|
||||
dependencies = [
|
||||
"ahash 0.7.6",
|
||||
"hashbrown 0.11.2",
|
||||
"libm",
|
||||
"num-traits",
|
||||
"rand_core",
|
||||
"serde",
|
||||
"smallvec",
|
||||
"space",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.9"
|
||||
@@ -1931,6 +1912,7 @@ dependencies = [
|
||||
"meilisearch-types",
|
||||
"nelson",
|
||||
"page_size 0.5.0",
|
||||
"puffin",
|
||||
"roaring",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -1952,6 +1934,16 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.14.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inout"
|
||||
version = "0.1.3"
|
||||
@@ -1986,6 +1978,21 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "instant-distance"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c619cdaa30bb84088963968bee12a45ea5fbbf355f2c021bcd15589f5ca494a"
|
||||
dependencies = [
|
||||
"num_cpus",
|
||||
"ordered-float",
|
||||
"parking_lot",
|
||||
"rand",
|
||||
"rayon",
|
||||
"serde",
|
||||
"serde-big-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "io-lifetimes"
|
||||
version = "1.0.11"
|
||||
@@ -2016,12 +2023,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
version = "0.4.8"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24fddda5af7e54bf7da53067d6e802dbcc381d0a8eef629df528e3ebf68755cb"
|
||||
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
|
||||
dependencies = [
|
||||
"hermit-abi 0.3.1",
|
||||
"rustix 0.38.2",
|
||||
"rustix 0.38.4",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
@@ -2164,9 +2171,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-cc-cedict-builder"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c6bf79b29a90bcd22036e494d6cc9ac3abe9ab604b21f3258ba6dc1ce501801"
|
||||
checksum = "2d2e8f2ca97ddf952fe340642511b9c14b373cb2eef711d526bb8ef2ca0969b8"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2183,9 +2190,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-compress"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f2e99e67736352bbb6ed1c273643975822505067ca32194b0981040bc50527a"
|
||||
checksum = "f72b460559bcbe8a9cee85ea4a5056133ed3abf373031191589236e656d65b59"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@@ -2194,9 +2201,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-core"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c3935e966409156f22cb4b334b21b0dce84b7aa1cad62214b466489d249c8e5"
|
||||
checksum = "f586eb8a9393c32d5525e0e9336a3727bd1329674740097126f3b0bff8a1a1ea"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2211,9 +2218,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-decompress"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7476406abb63c49d7f59c88b9b868ee8d2981495ea7e2c3ad129902f9916b3c6"
|
||||
checksum = "1fb1facd8da698072fcc7338bd757730db53d59f313f44dd583fa03681dcc0e1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@@ -2222,9 +2229,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-dictionary"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "808b7d2b3cabc25a4022526d484a4cfd1d5924dc76a26e0379707698841acef2"
|
||||
checksum = "ec7be7410b1da7017a8948986b87af67082f605e9a716f0989790d795d677f0c"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2242,9 +2249,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-builder"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "31f373a280958c930e5ee4a1e4db3a0ee0542afaf02d3b5cacb8cab4e298648e"
|
||||
checksum = "705d07f8a45d04fd95149f7ad41a26d1f9e56c9c00402be6f9dd05e3d88b99c6"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2263,9 +2270,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-neologd-builder"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92eff98e9ed1a7a412b91709c2343457a04ef02fa0c27c27e3a5892f5591eae9"
|
||||
checksum = "633a93983ba13fba42328311a501091bd4a7aff0c94ae9eaa9d4733dd2b0468a"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2284,9 +2291,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74c6d5bf7d8092bd6d10de7a5d74b70ea7cf234586235b0d6cdb903b05a6c9e2"
|
||||
checksum = "a428e0d316b6c86f51bd919479692bc41ad840dba266ebc044663970f431ea18"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2301,9 +2308,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic-builder"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0a4add6d3c1e41ec9e2690d33e287d0223fb59a30ccee4980c23f31368cae1e"
|
||||
checksum = "2a5288704c6b8a069c0a1705c38758e836497698b50453373ab3d56c6f9a7ef8"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2321,9 +2328,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-tokenizer"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb6a8acbd068019d1cdac7316f0dcb87f8e33ede2b13aa237f45114f9750afb8"
|
||||
checksum = "106ba439b2e87529d9bbedbb88d69f635baba1195c26502b308f55a85885fc81"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2336,9 +2343,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14abf0613d350b30d3b0406a33b1de8fa8d829f26516909421702174785991c8"
|
||||
checksum = "3399b6dcfe1701333451d184ff3c677f433b320153427b146360c9e4bd8cb816"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2353,9 +2360,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic-builder"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e204ed53d9bd63227d1e6a6c1f122ca039e00a8634ac32e7fb0281eeec8615c4"
|
||||
checksum = "b698227fdaeac32289173ab389b990d4eb00a40cbc9912020f69a0c491dabf55"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2391,9 +2398,9 @@ checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.3"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
|
||||
checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
|
||||
|
||||
[[package]]
|
||||
name = "lmdb-rkv-sys"
|
||||
@@ -2435,9 +2442,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.18"
|
||||
version = "0.4.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de"
|
||||
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
|
||||
|
||||
[[package]]
|
||||
name = "logging_timer"
|
||||
@@ -2461,6 +2468,12 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4_flex"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83"
|
||||
|
||||
[[package]]
|
||||
name = "manifest-dir-macros"
|
||||
version = "0.1.17"
|
||||
@@ -2470,7 +2483,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2527,7 +2540,7 @@ dependencies = [
|
||||
"hex",
|
||||
"http",
|
||||
"index-scheduler",
|
||||
"indexmap",
|
||||
"indexmap 1.9.3",
|
||||
"insta",
|
||||
"is-terminal",
|
||||
"itertools",
|
||||
@@ -2550,6 +2563,8 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
"platform-dirs",
|
||||
"prometheus",
|
||||
"puffin",
|
||||
"puffin_http",
|
||||
"rand",
|
||||
"rayon",
|
||||
"regex",
|
||||
@@ -2679,9 +2694,9 @@ dependencies = [
|
||||
"geoutils",
|
||||
"grenad",
|
||||
"heed",
|
||||
"hnsw",
|
||||
"indexmap",
|
||||
"indexmap 1.9.3",
|
||||
"insta",
|
||||
"instant-distance",
|
||||
"itertools",
|
||||
"json-depth-checker",
|
||||
"levenshtein_automata",
|
||||
@@ -2694,6 +2709,7 @@ dependencies = [
|
||||
"obkv",
|
||||
"once_cell",
|
||||
"ordered-float",
|
||||
"puffin",
|
||||
"rand",
|
||||
"rand_pcg",
|
||||
"rayon",
|
||||
@@ -2705,7 +2721,6 @@ dependencies = [
|
||||
"smallstr",
|
||||
"smallvec",
|
||||
"smartstring",
|
||||
"space",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"time",
|
||||
@@ -2866,9 +2881,9 @@ checksum = "f69e48cd7c8e5bb52a1da1287fdbfd877c32673176583ce664cd63b201aba385"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.17.1"
|
||||
version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "oorandom"
|
||||
@@ -3025,7 +3040,7 @@ dependencies = [
|
||||
"pest_meta",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3170,9 +3185,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.59"
|
||||
version = "1.0.64"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b"
|
||||
checksum = "78803b62cbf1f46fde80d7c0e803111524b9877184cfe7c3033659490ac7a7da"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
@@ -3214,10 +3229,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.28"
|
||||
name = "puffin"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
|
||||
checksum = "76425abd4e1a0ad4bd6995dd974b52f414fca9974171df8e3708b3e660d05a21"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"cfg-if",
|
||||
"instant",
|
||||
"lz4_flex",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "puffin_http"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"crossbeam-channel",
|
||||
"log",
|
||||
"puffin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5907a1b7c277254a8b15170f6e7c97cfa60ee7872a3217663bb81151e48184bb"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
@@ -3465,14 +3509,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.2"
|
||||
version = "0.38.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aabcb0461ebd01d6b79945797c27f8529082226cb630a9865a71870ff63532a4"
|
||||
checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
|
||||
dependencies = [
|
||||
"bitflags 2.3.3",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys 0.4.3",
|
||||
"linux-raw-sys 0.4.5",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
@@ -3578,13 +3622,22 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.163"
|
||||
version = "1.0.180"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
|
||||
checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde-big-array"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11fc7cc2c76d73e0f27ee52abbd64eec84d46f370c88371120433196934e4b7f"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde-cs"
|
||||
version = "0.2.4"
|
||||
@@ -3596,22 +3649,22 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.163"
|
||||
version = "1.0.180"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e"
|
||||
checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.96"
|
||||
version = "1.0.104"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
|
||||
checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"indexmap 2.0.0",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
@@ -3734,9 +3787,6 @@ name = "smallvec"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smartstring"
|
||||
@@ -3759,16 +3809,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "space"
|
||||
version = "0.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c5ab9701ae895386d13db622abf411989deff7109b13b46b6173bb4ce5c1d123"
|
||||
dependencies = [
|
||||
"doc-comment",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.5.2"
|
||||
@@ -3832,9 +3872,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.18"
|
||||
version = "2.0.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
|
||||
checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -3921,22 +3961,22 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.40"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
|
||||
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.40"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
|
||||
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4018,7 +4058,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4094,7 +4134,7 @@ version = "0.19.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"indexmap 1.9.3",
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
@@ -4343,7 +4383,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
@@ -4377,7 +4417,7 @@ checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.18",
|
||||
"syn 2.0.28",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
19
PROFILING.md
Normal file
19
PROFILING.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Profiling Meilisearch
|
||||
|
||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
|
||||
|
||||

|
||||
|
||||
## Profiling the Indexing Process
|
||||
|
||||
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||
|
||||
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
|
||||
|
||||
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
|
||||
|
||||
## Profiling the Search Process
|
||||
|
||||
We still need to take the time to profile the search side of the engine with Puffin. It would require time to profile the filtering phase, query parsing, creation, and execution. We could even profile the Actix HTTP server.
|
||||
|
||||
The only issue we see is the framing system. Puffin requires a global frame-based profiling phase, which collides with Meilisearch's ability to accept and answer multiple requests on different threads simultaneously.
|
||||
70
README.md
70
README.md
@@ -1,16 +1,20 @@
|
||||
<p align="center">
|
||||
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
|
||||
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
|
||||
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
|
||||
</a>
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
|
||||
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<h4 align="center">
|
||||
<a href="https://www.meilisearch.com">Website</a> |
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Website</a> |
|
||||
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
|
||||
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=engine&utm_medium=meilisearch">Meilisearch Cloud</a> |
|
||||
<a href="https://blog.meilisearch.com">Blog</a> |
|
||||
<a href="https://www.meilisearch.com/docs">Documentation</a> |
|
||||
<a href="https://www.meilisearch.com/docs/faq">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com">Discord</a>
|
||||
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Meilisearch Cloud</a> |
|
||||
<a href="https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Blog</a> |
|
||||
<a href="https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Documentation</a> |
|
||||
<a href="https://www.meilisearch.com/docs/faq?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Discord</a>
|
||||
</h4>
|
||||
|
||||
<p align="center">
|
||||
@@ -24,72 +28,72 @@
|
||||
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
||||
|
||||
<p align="center" name="demo">
|
||||
<a href="https://where2watch.meilisearch.com/#gh-light-mode-only" target="_blank">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
|
||||
<img src="assets/demo-light.gif#gh-light-mode-only" alt="A bright colored application for finding movies screening near the user">
|
||||
</a>
|
||||
<a href="https://where2watch.meilisearch.com/#gh-dark-mode-only" target="_blank">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-dark-mode-only" target="_blank">
|
||||
<img src="assets/demo-dark.gif#gh-dark-mode-only" alt="A dark colored application for finding movies screening near the user">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/).
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start) guide.
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
||||
|
||||
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
|
||||
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
|
||||
|
||||
## ⚡ Supercharge your Meilisearch experience
|
||||
|
||||
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=engine&utm_medium=meilisearch). No credit card required.
|
||||
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
|
||||
|
||||
## 🧰 SDKs & integration tools
|
||||
|
||||
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
|
||||
|
||||
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks).
|
||||
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-link).
|
||||
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-logos)
|
||||
|
||||
## ⚙️ Advanced usage
|
||||
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) close at hand.
|
||||
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens).
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
|
||||
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes).
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
|
||||
|
||||
## 📫 Get in touch!
|
||||
|
||||
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/)
|
||||
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
|
||||
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
|
||||
|
||||
BIN
assets/profiling-example.png
Normal file
BIN
assets/profiling-example.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.2 MiB |
@@ -14,7 +14,7 @@ license.workspace = true
|
||||
anyhow = "1.0.70"
|
||||
csv = "1.2.1"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -472,6 +472,77 @@ pub fn parse_filter(input: Span) -> IResult<FilterCondition> {
|
||||
terminated(|input| parse_expression(input, 0), eof)(input)
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Display for FilterCondition<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
FilterCondition::Not(filter) => {
|
||||
write!(f, "NOT ({filter})")
|
||||
}
|
||||
FilterCondition::Condition { fid, op } => {
|
||||
write!(f, "{fid} {op}")
|
||||
}
|
||||
FilterCondition::In { fid, els } => {
|
||||
write!(f, "{fid} IN[")?;
|
||||
for el in els {
|
||||
write!(f, "{el}, ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
FilterCondition::Or(els) => {
|
||||
write!(f, "OR[")?;
|
||||
for el in els {
|
||||
write!(f, "{el}, ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
FilterCondition::And(els) => {
|
||||
write!(f, "AND[")?;
|
||||
for el in els {
|
||||
write!(f, "{el}, ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius } => {
|
||||
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
|
||||
}
|
||||
FilterCondition::GeoBoundingBox {
|
||||
top_right_point: top_left_point,
|
||||
bottom_left_point: bottom_right_point,
|
||||
} => {
|
||||
write!(
|
||||
f,
|
||||
"_geoBoundingBox([{}, {}], [{}, {}])",
|
||||
top_left_point[0],
|
||||
top_left_point[1],
|
||||
bottom_right_point[0],
|
||||
bottom_right_point[1]
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> std::fmt::Display for Condition<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Condition::GreaterThan(token) => write!(f, "> {token}"),
|
||||
Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"),
|
||||
Condition::Equal(token) => write!(f, "= {token}"),
|
||||
Condition::NotEqual(token) => write!(f, "!= {token}"),
|
||||
Condition::Null => write!(f, "IS NULL"),
|
||||
Condition::Empty => write!(f, "IS EMPTY"),
|
||||
Condition::Exists => write!(f, "EXISTS"),
|
||||
Condition::LowerThan(token) => write!(f, "< {token}"),
|
||||
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
||||
Condition::Between { from, to } => write!(f, "{from} TO {to}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> std::fmt::Display for Token<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{{{}}}", self.value())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use super::*;
|
||||
@@ -852,74 +923,3 @@ pub mod tests {
|
||||
assert_eq!(token.value(), s);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Display for FilterCondition<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
FilterCondition::Not(filter) => {
|
||||
write!(f, "NOT ({filter})")
|
||||
}
|
||||
FilterCondition::Condition { fid, op } => {
|
||||
write!(f, "{fid} {op}")
|
||||
}
|
||||
FilterCondition::In { fid, els } => {
|
||||
write!(f, "{fid} IN[")?;
|
||||
for el in els {
|
||||
write!(f, "{el}, ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
FilterCondition::Or(els) => {
|
||||
write!(f, "OR[")?;
|
||||
for el in els {
|
||||
write!(f, "{el}, ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
FilterCondition::And(els) => {
|
||||
write!(f, "AND[")?;
|
||||
for el in els {
|
||||
write!(f, "{el}, ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius } => {
|
||||
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
|
||||
}
|
||||
FilterCondition::GeoBoundingBox {
|
||||
top_right_point: top_left_point,
|
||||
bottom_left_point: bottom_right_point,
|
||||
} => {
|
||||
write!(
|
||||
f,
|
||||
"_geoBoundingBox([{}, {}], [{}, {}])",
|
||||
top_left_point[0],
|
||||
top_left_point[1],
|
||||
bottom_right_point[0],
|
||||
bottom_right_point[1]
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> std::fmt::Display for Condition<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Condition::GreaterThan(token) => write!(f, "> {token}"),
|
||||
Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"),
|
||||
Condition::Equal(token) => write!(f, "= {token}"),
|
||||
Condition::NotEqual(token) => write!(f, "!= {token}"),
|
||||
Condition::Null => write!(f, "IS NULL"),
|
||||
Condition::Empty => write!(f, "IS EMPTY"),
|
||||
Condition::Exists => write!(f, "EXISTS"),
|
||||
Condition::LowerThan(token) => write!(f, "< {token}"),
|
||||
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
||||
Condition::Between { from, to } => write!(f, "{from} TO {to}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> std::fmt::Display for Token<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{{{}}}", self.value())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
puffin = "0.16.0"
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
||||
@@ -471,6 +471,8 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
||||
|
||||
puffin::profile_function!();
|
||||
|
||||
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
|
||||
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
||||
|
||||
@@ -575,6 +577,9 @@ impl IndexScheduler {
|
||||
self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?;
|
||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
puffin::profile_function!(format!("{:?}", batch));
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
@@ -1111,6 +1116,8 @@ impl IndexScheduler {
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
) -> Result<Vec<Task>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
match operation {
|
||||
IndexOperation::DocumentClear { mut tasks, .. } => {
|
||||
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
|
||||
|
||||
@@ -1053,6 +1053,8 @@ impl IndexScheduler {
|
||||
self.breakpoint(Breakpoint::Start);
|
||||
}
|
||||
|
||||
puffin::GlobalProfiler::lock().new_frame();
|
||||
|
||||
self.cleanup_task_queue()?;
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
|
||||
@@ -199,6 +199,30 @@ macro_rules! snapshot {
|
||||
};
|
||||
}
|
||||
|
||||
/// Create a string from the value by serializing it as Json, optionally
|
||||
/// redacting some parts of it.
|
||||
///
|
||||
/// The second argument to the macro can be an object expression for redaction.
|
||||
/// It's in the form { selector => replacement }. For more information about redactions
|
||||
/// refer to the redactions feature in the `insta` guide.
|
||||
#[macro_export]
|
||||
macro_rules! json_string {
|
||||
($value:expr, {$($k:expr => $v:expr),*$(,)?}) => {
|
||||
{
|
||||
let (_, snap) = meili_snap::insta::_prepare_snapshot_for_redaction!($value, {$($k => $v),*}, Json, File);
|
||||
snap
|
||||
}
|
||||
};
|
||||
($value:expr) => {{
|
||||
let value = meili_snap::insta::_macro_support::serialize_value(
|
||||
&$value,
|
||||
meili_snap::insta::_macro_support::SerializationFormat::Json,
|
||||
meili_snap::insta::_macro_support::SnapshotLocation::File
|
||||
);
|
||||
value
|
||||
}};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate as meili_snap;
|
||||
@@ -250,27 +274,3 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a string from the value by serializing it as Json, optionally
|
||||
/// redacting some parts of it.
|
||||
///
|
||||
/// The second argument to the macro can be an object expression for redaction.
|
||||
/// It's in the form { selector => replacement }. For more information about redactions
|
||||
/// refer to the redactions feature in the `insta` guide.
|
||||
#[macro_export]
|
||||
macro_rules! json_string {
|
||||
($value:expr, {$($k:expr => $v:expr),*$(,)?}) => {
|
||||
{
|
||||
let (_, snap) = meili_snap::insta::_prepare_snapshot_for_redaction!($value, {$($k => $v),*}, Json, File);
|
||||
snap
|
||||
}
|
||||
};
|
||||
($value:expr) => {{
|
||||
let value = meili_snap::insta::_macro_support::serialize_value(
|
||||
&$value,
|
||||
meili_snap::insta::_macro_support::SerializationFormat::Json,
|
||||
meili_snap::insta::_macro_support::SnapshotLocation::File
|
||||
);
|
||||
value
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::borrow::Borrow;
|
||||
use std::fmt::{self, Debug, Display};
|
||||
use std::fs::File;
|
||||
use std::io::{self, Seek, Write};
|
||||
@@ -42,7 +41,7 @@ impl Display for DocumentFormatError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Io(e) => write!(f, "{e}"),
|
||||
Self::MalformedPayload(me, b) => match me.borrow() {
|
||||
Self::MalformedPayload(me, b) => match me {
|
||||
Error::Json(se) => {
|
||||
let mut message = match se.classify() {
|
||||
Category::Data => {
|
||||
|
||||
@@ -58,7 +58,7 @@ lazy_static = "1.4.0"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.15.0"
|
||||
obkv = "0.2.0"
|
||||
@@ -69,6 +69,8 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
puffin = "0.16.0"
|
||||
puffin_http = { version = "0.13.0", optional = true }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
@@ -133,7 +135,18 @@ zip = { version = "0.6.4", optional = true }
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
analytics = ["segment"]
|
||||
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
|
||||
profile-with-puffin = ["dep:puffin_http"]
|
||||
mini-dashboard = [
|
||||
"actix-web-static-files",
|
||||
"static-files",
|
||||
"anyhow",
|
||||
"cargo_toml",
|
||||
"hex",
|
||||
"reqwest",
|
||||
"sha-1",
|
||||
"tempfile",
|
||||
"zip",
|
||||
]
|
||||
chinese = ["meilisearch-types/chinese"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
@@ -141,5 +154,5 @@ thai = ["meilisearch-types/thai"]
|
||||
greek = ["meilisearch-types/greek"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.7/build.zip"
|
||||
sha1 = "28b45bf772c84f9a6e16bc1689b393bfce8da7d6"
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
|
||||
sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"
|
||||
|
||||
@@ -30,6 +30,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
#[cfg(feature = "profile-with-puffin")]
|
||||
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
|
||||
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
|
||||
|
||||
anyhow::ensure!(
|
||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||
@@ -187,7 +191,7 @@ Anonymous telemetry:\t\"Enabled\""
|
||||
}
|
||||
|
||||
eprintln!();
|
||||
eprintln!("Check out Meilisearch Cloud!\thttps://cloud.meilisearch.com/login?utm_campaign=oss&utm_source=engine&utm_medium=cli");
|
||||
eprintln!("Check out Meilisearch Cloud!\thttps://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=engine&utm_medium=cli");
|
||||
eprintln!("Documentation:\t\t\thttps://www.meilisearch.com/docs");
|
||||
eprintln!("Source code:\t\t\thttps://github.com/meilisearch/meilisearch");
|
||||
eprintln!("Discord:\t\t\thttps://discord.meilisearch.com");
|
||||
|
||||
@@ -284,9 +284,6 @@ pub fn create_all_stats(
|
||||
used_database_size += index_scheduler.used_size()?;
|
||||
database_size += auth_controller.size()?;
|
||||
used_database_size += auth_controller.used_size()?;
|
||||
let update_file_size = index_scheduler.compute_update_file_size()?;
|
||||
database_size += update_file_size;
|
||||
used_database_size += update_file_size;
|
||||
|
||||
let stats = Stats { database_size, used_database_size, last_update: last_task, indexes };
|
||||
Ok(stats)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use meili_snap::snapshot;
|
||||
use once_cell::sync::Lazy;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
@@ -56,6 +57,54 @@ async fn simple_facet_search() {
|
||||
assert_eq!(response["facetHits"].as_array().unwrap().len(), 1);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn advanced_facet_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.update_settings_typo_tolerance(json!({ "enabled": false })).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["facetHits"].as_array().unwrap().len(), @"0");
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "àdventure"})).await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn more_advanced_facet_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.update_settings_typo_tolerance(json!({ "disableOnWords": ["adventre"] })).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["facetHits"].as_array().unwrap().len(), @"0");
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "adventure"})).await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn non_filterable_facet_search_error() {
|
||||
let server = Server::new().await;
|
||||
|
||||
62
meilisearch/tests/search/geo.rs
Normal file
62
meilisearch/tests/search/geo.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
use once_cell::sync::Lazy;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::common::Server;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Taco Truck",
|
||||
"address": "444 Salsa Street, Burritoville",
|
||||
"type": "Mexican",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": 34.0522,
|
||||
"lng": -118.2437
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "La Bella Italia",
|
||||
"address": "456 Elm Street, Townsville",
|
||||
"type": "Italian",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": "45.4777599",
|
||||
"lng": "9.1967508"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Crêpe Truck",
|
||||
"address": "2 Billig Avenue, Rouenville",
|
||||
"type": "French",
|
||||
"rating": 10
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn geo_sort_with_geo_strings() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||
index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "_geoRadius(45.472735, 9.184019, 10000)",
|
||||
"sort": ["_geoPoint(0.0, 0.0):asc"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
@@ -4,6 +4,7 @@
|
||||
mod errors;
|
||||
mod facet_search;
|
||||
mod formatted;
|
||||
mod geo;
|
||||
mod multi;
|
||||
mod pagination;
|
||||
mod restrict_searchable;
|
||||
@@ -819,8 +820,8 @@ async fn experimental_feature_score_details() {
|
||||
},
|
||||
"attribute": {
|
||||
"order": 3,
|
||||
"attribute_ranking_order_score": 1.0,
|
||||
"query_word_distance_score": 0.8095238095238095,
|
||||
"attributeRankingOrderScore": 1.0,
|
||||
"queryWordDistanceScore": 0.8095238095238095,
|
||||
"score": 0.9365079365079364
|
||||
},
|
||||
"exactness": {
|
||||
|
||||
@@ -17,7 +17,7 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.1", default-features = false }
|
||||
charabia = { version = "0.8.2", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
@@ -29,12 +29,11 @@ geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.4", default-features = false, features = [
|
||||
"tempfile",
|
||||
] }
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.6", default-features = false, features = [
|
||||
"lmdb",
|
||||
"sync-read-txn",
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
|
||||
"lmdb", "read-txn-no-tls"
|
||||
] }
|
||||
hnsw = { version = "0.11.0", features = ["serde1"] }
|
||||
indexmap = { version = "1.9.3", features = ["serde"] }
|
||||
instant-distance = { version = "0.6.1", features = ["with-serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memmap2 = "0.5.10"
|
||||
@@ -48,7 +47,6 @@ rstar = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
slice-group-by = "0.3.0"
|
||||
space = "0.17.0"
|
||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||
smallvec = "1.10.0"
|
||||
smartstring = "1.0.1"
|
||||
@@ -67,13 +65,16 @@ filter-parser = { path = "../filter-parser" }
|
||||
# documents words self-join
|
||||
itertools = "0.10.5"
|
||||
|
||||
# profiling
|
||||
puffin = "0.16.0"
|
||||
|
||||
# logging
|
||||
log = "0.4.17"
|
||||
logging_timer = "1.1.0"
|
||||
csv = "1.2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
big_s = "1.0.2"
|
||||
insta = "1.29.0"
|
||||
maplit = "1.0.2"
|
||||
|
||||
@@ -1,20 +1,36 @@
|
||||
use std::ops;
|
||||
|
||||
use instant_distance::Point;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use space::Metric;
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct DotProduct;
|
||||
use crate::normalize_vector;
|
||||
|
||||
impl Metric<Vec<f32>> for DotProduct {
|
||||
type Unit = u32;
|
||||
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
|
||||
pub struct NDotProductPoint(Vec<f32>);
|
||||
|
||||
// Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
|
||||
//
|
||||
// Here is a playground that validate the ordering of the bit representation of floats in range 0.0..=1.0:
|
||||
// <https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=6c59e31a3cc5036b32edf51e8937b56e>
|
||||
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
|
||||
let dist = 1.0 - dot_product_similarity(a, b);
|
||||
impl NDotProductPoint {
|
||||
pub fn new(point: Vec<f32>) -> Self {
|
||||
NDotProductPoint(normalize_vector(point))
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> Vec<f32> {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Deref for NDotProductPoint {
|
||||
type Target = [f32];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.0.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point for NDotProductPoint {
|
||||
fn distance(&self, other: &Self) -> f32 {
|
||||
let dist = 1.0 - dot_product_similarity(&self.0, &other.0);
|
||||
debug_assert!(!dist.is_nan());
|
||||
dist.to_bits()
|
||||
dist
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
27
milli/src/heed_codec/beu16_str_codec.rs
Normal file
27
milli/src/heed_codec/beu16_str_codec.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
pub struct BEU16StrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
|
||||
type DItem = (u16, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (n_bytes, str_bytes) = bytes.split_at(2);
|
||||
let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?;
|
||||
let s = str::from_utf8(str_bytes).ok()?;
|
||||
Some((n, s))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
|
||||
type EItem = (u16, &'a str);
|
||||
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(s.len() + 2);
|
||||
bytes.extend_from_slice(&n.to_be_bytes());
|
||||
bytes.extend_from_slice(s.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
mod beu16_str_codec;
|
||||
mod beu32_str_codec;
|
||||
mod byte_slice_ref;
|
||||
pub mod facet;
|
||||
@@ -14,6 +15,7 @@ mod str_str_u8_codec;
|
||||
pub use byte_slice_ref::ByteSliceRefCodec;
|
||||
pub use str_ref::StrRefCodec;
|
||||
|
||||
pub use self::beu16_str_codec::BEU16StrCodec;
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::fs::File;
|
||||
use std::mem::size_of;
|
||||
use std::path::Path;
|
||||
@@ -8,12 +8,11 @@ use charabia::{Language, Script};
|
||||
use heed::flags::Flags;
|
||||
use heed::types::*;
|
||||
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
|
||||
use rand_pcg::Pcg32;
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::distance::DotProduct;
|
||||
use crate::distance::NDotProductPoint;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::facet::FacetType;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
@@ -21,7 +20,9 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::heed_codec::{
|
||||
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
|
||||
};
|
||||
use crate::readable_slices::ReadableSlices;
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
@@ -31,7 +32,7 @@ use crate::{
|
||||
};
|
||||
|
||||
/// The HNSW data-structure that we serialize, fill and search in.
|
||||
pub type Hnsw = hnsw::Hnsw<DotProduct, Vec<f32>, Pcg32, 12, 24>;
|
||||
pub type Hnsw = instant_distance::Hnsw<NDotProductPoint>;
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
|
||||
@@ -96,6 +97,7 @@ pub mod db_name {
|
||||
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
|
||||
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
|
||||
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||
pub const FACET_ID_NORMALIZED_STRING_STRINGS: &str = "facet-id-normalized-string-strings";
|
||||
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
@@ -157,6 +159,8 @@ pub struct Index {
|
||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
||||
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id of the normalized-for-search string facets with their original versions.
|
||||
pub facet_id_normalized_string_strings: Database<BEU16StrCodec, SerdeJson<BTreeSet<String>>>,
|
||||
/// Maps the facet field id of the string facets with an FST containing all the facets values.
|
||||
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
|
||||
|
||||
@@ -181,7 +185,7 @@ impl Index {
|
||||
) -> Result<Index> {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(24);
|
||||
options.max_dbs(25);
|
||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||
|
||||
let env = options.open(path)?;
|
||||
@@ -211,6 +215,8 @@ impl Index {
|
||||
let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
|
||||
let facet_id_string_docids =
|
||||
env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
|
||||
let facet_id_normalized_string_strings =
|
||||
env.create_database(&mut wtxn, Some(FACET_ID_NORMALIZED_STRING_STRINGS))?;
|
||||
let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?;
|
||||
let facet_id_exists_docids =
|
||||
env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
|
||||
@@ -246,6 +252,7 @@ impl Index {
|
||||
field_id_word_count_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_normalized_string_strings,
|
||||
facet_id_string_fst,
|
||||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
|
||||
@@ -51,9 +51,10 @@ pub use self::error::{
|
||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||
pub use self::fields_ids_map::FieldsIdsMap;
|
||||
pub use self::heed_codec::{
|
||||
BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec,
|
||||
CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec,
|
||||
RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec, UncheckedU8StrStrCodec,
|
||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
||||
RoaringBitmapCodec, RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec,
|
||||
UncheckedU8StrStrCodec,
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{
|
||||
|
||||
@@ -84,7 +84,7 @@ impl ScoreDetails {
|
||||
// For now, fid is a virtual rule always followed by the "position" rule
|
||||
let fid_details = serde_json::json!({
|
||||
"order": order,
|
||||
"attribute_ranking_order_score": fid.local_score(),
|
||||
"attributeRankingOrderScore": fid.local_score(),
|
||||
});
|
||||
details_map.insert("attribute".into(), fid_details);
|
||||
order += 1;
|
||||
@@ -102,7 +102,7 @@ impl ScoreDetails {
|
||||
};
|
||||
|
||||
attribute_details
|
||||
.insert("query_word_distance_score".into(), position.local_score().into());
|
||||
.insert("queryWordDistanceScore".into(), position.local_score().into());
|
||||
let score = Rank::global_score([fid_details, *position].iter().copied());
|
||||
attribute_details.insert("score".into(), score.into());
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
use std::fmt;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use charabia::normalizer::NormalizerOption;
|
||||
use charabia::Normalize;
|
||||
use fst::automaton::{Automaton, Str};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||
@@ -14,8 +17,8 @@ use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::{
|
||||
execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index,
|
||||
Result, SearchContext, BEU16,
|
||||
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result,
|
||||
SearchContext, BEU16,
|
||||
};
|
||||
|
||||
// Building these factories is not free.
|
||||
@@ -301,29 +304,28 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
|
||||
match self.query.as_ref() {
|
||||
Some(query) => {
|
||||
let query = normalize_facet(query);
|
||||
let query = query.as_str();
|
||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
let query = query.normalize(&options);
|
||||
let query = query.as_ref();
|
||||
|
||||
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
|
||||
let field_authorizes_typos =
|
||||
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
|
||||
|
||||
if authorize_typos && field_authorizes_typos {
|
||||
let mut results = vec![];
|
||||
|
||||
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
|
||||
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: query };
|
||||
if let Some(FacetGroupValue { bitmap, .. }) =
|
||||
index.facet_id_string_docids.get(rtxn, &key)?
|
||||
{
|
||||
let count = search_candidates.intersection_len(&bitmap);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, query, bitmap.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
}
|
||||
let mut results = vec![];
|
||||
if fst.contains(query) {
|
||||
self.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
query,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?;
|
||||
}
|
||||
Ok(results)
|
||||
} else {
|
||||
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
|
||||
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
|
||||
@@ -338,60 +340,41 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
};
|
||||
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
let mut length = 0;
|
||||
let mut results = vec![];
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => {
|
||||
error!(
|
||||
"the facet value is missing from the facet database: {key:?}"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, value, docids.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
if self
|
||||
.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
value,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?
|
||||
.is_break()
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
Ok(results)
|
||||
}
|
||||
} else {
|
||||
let automaton = Str::new(query).starts_with();
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
let mut results = vec![];
|
||||
let mut length = 0;
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => {
|
||||
error!(
|
||||
"the facet value is missing from the facet database: {key:?}"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, value, docids.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
if self
|
||||
.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
value,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?
|
||||
.is_break()
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -401,7 +384,6 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
}
|
||||
None => {
|
||||
let mut results = vec![];
|
||||
let mut length = 0;
|
||||
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
|
||||
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
|
||||
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
|
||||
@@ -412,9 +394,8 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
|
||||
.unwrap_or_else(|| left_bound.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
if results.len() >= MAX_NUMBER_OF_FACETS {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -422,6 +403,50 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_original_facets_using_normalized(
|
||||
&self,
|
||||
fid: FieldId,
|
||||
value: &str,
|
||||
query: &str,
|
||||
search_candidates: &RoaringBitmap,
|
||||
results: &mut Vec<FacetValueHit>,
|
||||
) -> Result<ControlFlow<()>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let database = index.facet_id_normalized_string_strings;
|
||||
let key = (fid, value);
|
||||
let original_strings = match database.get(rtxn, &key)? {
|
||||
Some(original_strings) => original_strings,
|
||||
None => {
|
||||
error!("the facet value is missing from the facet database: {key:?}");
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
};
|
||||
for original in original_strings {
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => {
|
||||
error!("the facet value is missing from the facet database: {key:?}");
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, &original, docids.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
}
|
||||
if results.len() >= MAX_NUMBER_OF_FACETS {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
|
||||
|
||||
@@ -100,7 +100,7 @@ fn facet_number_values<'a>(
|
||||
}
|
||||
|
||||
/// Return an iterator over each string value in the given field of the given document.
|
||||
fn facet_string_values<'a>(
|
||||
pub fn facet_string_values<'a>(
|
||||
docid: u32,
|
||||
field_id: u16,
|
||||
index: &Index,
|
||||
|
||||
@@ -6,6 +6,7 @@ use heed::{RoPrefix, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
|
||||
use super::facet_string_values;
|
||||
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
@@ -157,23 +158,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
let mut documents = self
|
||||
.geo_candidates
|
||||
.iter()
|
||||
.map(|id| -> Result<_> {
|
||||
Ok((
|
||||
id,
|
||||
[
|
||||
facet_number_values(id, lat, ctx.index, ctx.txn)?
|
||||
.next()
|
||||
.expect("A geo faceted document doesn't contain any lat")?
|
||||
.0
|
||||
.2,
|
||||
facet_number_values(id, lng, ctx.index, ctx.txn)?
|
||||
.next()
|
||||
.expect("A geo faceted document doesn't contain any lng")?
|
||||
.0
|
||||
.2,
|
||||
],
|
||||
))
|
||||
})
|
||||
.map(|id| -> Result<_> { Ok((id, geo_value(id, lat, lng, ctx.index, ctx.txn)?)) })
|
||||
.collect::<Result<Vec<(u32, [f64; 2])>>>()?;
|
||||
// computing the distance between two points is expensive thus we cache the result
|
||||
documents
|
||||
@@ -185,6 +170,37 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts the lat and long values from a single document.
|
||||
///
|
||||
/// If it is not able to find it in the facet number index it will extract it
|
||||
/// from the facet string index and parse it as f64 (as the geo extraction behaves).
|
||||
fn geo_value(
|
||||
docid: u32,
|
||||
field_lat: u16,
|
||||
field_lng: u16,
|
||||
index: &Index,
|
||||
rtxn: &RoTxn,
|
||||
) -> Result<[f64; 2]> {
|
||||
let extract_geo = |geo_field: u16| -> Result<f64> {
|
||||
match facet_number_values(docid, geo_field, index, rtxn)?.next() {
|
||||
Some(Ok(((_, _, geo), ()))) => Ok(geo),
|
||||
Some(Err(e)) => Err(e.into()),
|
||||
None => match facet_string_values(docid, geo_field, index, rtxn)?.next() {
|
||||
Some(Ok((_, geo))) => {
|
||||
Ok(geo.parse::<f64>().expect("cannot parse geo field as f64"))
|
||||
}
|
||||
Some(Err(e)) => Err(e.into()),
|
||||
None => panic!("A geo faceted document doesn't contain any lat or lng"),
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
let lat = extract_geo(field_lat)?;
|
||||
let lng = extract_geo(field_lng)?;
|
||||
|
||||
Ok([lat, lng])
|
||||
}
|
||||
|
||||
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
fn id(&self) -> String {
|
||||
"geo_sort".to_owned()
|
||||
|
||||
@@ -28,7 +28,7 @@ use db_cache::DatabaseCache;
|
||||
use exact_attribute::ExactAttribute;
|
||||
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
|
||||
use heed::RoTxn;
|
||||
use hnsw::Searcher;
|
||||
use instant_distance::Search;
|
||||
use interner::{DedupInterner, Interner};
|
||||
pub use logger::visual::VisualSearchLogger;
|
||||
pub use logger::{DefaultSearchLogger, SearchLogger};
|
||||
@@ -40,18 +40,18 @@ use ranking_rules::{
|
||||
use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
|
||||
use roaring::RoaringBitmap;
|
||||
use sort::Sort;
|
||||
use space::Neighbor;
|
||||
|
||||
use self::distinct::facet_string_values;
|
||||
use self::geo_sort::GeoSort;
|
||||
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
use crate::distance::NDotProductPoint;
|
||||
use crate::error::FieldIdMapMissingEntry;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
use crate::{
|
||||
normalize_vector, AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy,
|
||||
UserError, BEU32,
|
||||
AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32,
|
||||
};
|
||||
|
||||
/// A structure used throughout the execution of a search query.
|
||||
@@ -444,29 +444,31 @@ pub fn execute_search(
|
||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||
|
||||
if let Some(vector) = vector {
|
||||
let mut searcher = Searcher::new();
|
||||
let hnsw = ctx.index.vector_hnsw(ctx.txn)?.unwrap_or_default();
|
||||
let ef = hnsw.len().min(100);
|
||||
let mut dest = vec![Neighbor { index: 0, distance: 0 }; ef];
|
||||
let vector = normalize_vector(vector.clone());
|
||||
let neighbors = hnsw.nearest(&vector, ef, &mut searcher, &mut dest[..]);
|
||||
let mut search = Search::default();
|
||||
let docids = match ctx.index.vector_hnsw(ctx.txn)? {
|
||||
Some(hnsw) => {
|
||||
let vector = NDotProductPoint::new(vector.clone());
|
||||
let neighbors = hnsw.search(&vector, &mut search);
|
||||
|
||||
let mut docids = Vec::new();
|
||||
let mut uniq_docids = RoaringBitmap::new();
|
||||
for Neighbor { index, distance: _ } in neighbors.iter() {
|
||||
let index = BEU32::new(*index as u32);
|
||||
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
|
||||
if universe.contains(docid) && uniq_docids.insert(docid) {
|
||||
docids.push(docid);
|
||||
if docids.len() == (from + length) {
|
||||
break;
|
||||
let mut docids = Vec::new();
|
||||
let mut uniq_docids = RoaringBitmap::new();
|
||||
for instant_distance::Item { distance: _, pid, point: _ } in neighbors {
|
||||
let index = BEU32::new(pid.into_inner());
|
||||
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
|
||||
if universe.contains(docid) && uniq_docids.insert(docid) {
|
||||
docids.push(docid);
|
||||
if docids.len() == (from + length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// return the nearest documents that are also part of the candidates
|
||||
// along with a dummy list of scores that are useless in this context.
|
||||
let docids: Vec<_> = docids.into_iter().skip(from).take(length).collect();
|
||||
// return the nearest documents that are also part of the candidates
|
||||
// along with a dummy list of scores that are useless in this context.
|
||||
docids.into_iter().skip(from).take(length).collect()
|
||||
}
|
||||
None => Vec::new(),
|
||||
};
|
||||
|
||||
return Ok(PartialSearchResult {
|
||||
candidates: universe,
|
||||
|
||||
@@ -15,6 +15,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
}
|
||||
|
||||
pub fn execute(self) -> Result<u64> {
|
||||
puffin::profile_function!();
|
||||
|
||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||
let Index {
|
||||
env: _env,
|
||||
@@ -34,6 +36,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
script_language_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_normalized_string_strings,
|
||||
facet_id_string_fst,
|
||||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
@@ -92,6 +95,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
word_prefix_fid_docids.clear(self.wtxn)?;
|
||||
script_language_docids.clear(self.wtxn)?;
|
||||
facet_id_f64_docids.clear(self.wtxn)?;
|
||||
facet_id_normalized_string_strings.clear(self.wtxn)?;
|
||||
facet_id_string_fst.clear(self.wtxn)?;
|
||||
facet_id_exists_docids.clear(self.wtxn)?;
|
||||
facet_id_is_null_docids.clear(self.wtxn)?;
|
||||
|
||||
@@ -4,10 +4,9 @@ use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use fst::IntoStreamer;
|
||||
use heed::types::{ByteSlice, DecodeIgnore, Str, UnalignedSlice};
|
||||
use heed::{BytesDecode, BytesEncode, Database, RwIter};
|
||||
use hnsw::Searcher;
|
||||
use instant_distance::PointId;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use space::KnnPoints;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::facet::delete::FacetsDelete;
|
||||
@@ -110,6 +109,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
Some(docid)
|
||||
}
|
||||
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
|
||||
self.execute_inner()?;
|
||||
|
||||
@@ -237,6 +238,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
word_prefix_fid_docids,
|
||||
facet_id_f64_docids: _,
|
||||
facet_id_string_docids: _,
|
||||
facet_id_normalized_string_strings: _,
|
||||
facet_id_string_fst: _,
|
||||
field_id_docid_facet_f64s: _,
|
||||
field_id_docid_facet_strings: _,
|
||||
@@ -436,24 +438,24 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
|
||||
// An ugly and slow way to remove the vectors from the HNSW
|
||||
// It basically reconstructs the HNSW from scratch without editing the current one.
|
||||
let current_hnsw = self.index.vector_hnsw(self.wtxn)?.unwrap_or_default();
|
||||
if !current_hnsw.is_empty() {
|
||||
let mut new_hnsw = Hnsw::default();
|
||||
let mut searcher = Searcher::new();
|
||||
let mut new_vector_id_docids = Vec::new();
|
||||
|
||||
if let Some(current_hnsw) = self.index.vector_hnsw(self.wtxn)? {
|
||||
let mut points = Vec::new();
|
||||
let mut docids = Vec::new();
|
||||
for result in vector_id_docid.iter(self.wtxn)? {
|
||||
let (vector_id, docid) = result?;
|
||||
if !self.to_delete_docids.contains(docid.get()) {
|
||||
let vector = current_hnsw.get_point(vector_id.get() as usize).clone();
|
||||
let vector_id = new_hnsw.insert(vector, &mut searcher);
|
||||
new_vector_id_docids.push((vector_id as u32, docid));
|
||||
let pid = PointId::from(vector_id.get());
|
||||
let vector = current_hnsw[pid].clone();
|
||||
points.push(vector);
|
||||
docids.push(docid);
|
||||
}
|
||||
}
|
||||
|
||||
let (new_hnsw, pids) = Hnsw::builder().build_hnsw(points);
|
||||
|
||||
vector_id_docid.clear(self.wtxn)?;
|
||||
for (vector_id, docid) in new_vector_id_docids {
|
||||
vector_id_docid.put(self.wtxn, &BEU32::new(vector_id), &docid)?;
|
||||
for (pid, docid) in pids.into_iter().zip(docids) {
|
||||
vector_id_docid.put(self.wtxn, &BEU32::new(pid.into_inner()), &docid)?;
|
||||
}
|
||||
self.index.put_vector_hnsw(self.wtxn, &new_hnsw)?;
|
||||
}
|
||||
|
||||
@@ -76,9 +76,14 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8;
|
||||
pub const FACET_GROUP_SIZE: u8 = 4;
|
||||
pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs::File;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use heed::types::DecodeIgnore;
|
||||
use charabia::normalizer::{Normalize, NormalizerOption};
|
||||
use grenad::{CompressionType, SortAlgorithm};
|
||||
use heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
|
||||
use heed::BytesEncode;
|
||||
use log::debug;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -87,7 +92,9 @@ use super::FacetsUpdateBulk;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::{Index, Result, BEU16};
|
||||
use crate::update::index_documents::create_sorter;
|
||||
use crate::update::merge_btreeset_string;
|
||||
use crate::{BEU16StrCodec, Index, Result, BEU16};
|
||||
|
||||
pub mod bulk;
|
||||
pub mod delete;
|
||||
@@ -159,26 +166,69 @@ impl<'i> FacetsUpdate<'i> {
|
||||
incremental_update.execute(wtxn)?;
|
||||
}
|
||||
|
||||
// We clear the list of normalized-for-search facets
|
||||
// and the previous FSTs to compute everything from scratch
|
||||
self.index.facet_id_normalized_string_strings.clear(wtxn)?;
|
||||
self.index.facet_id_string_fst.clear(wtxn)?;
|
||||
|
||||
// As we can't use the same write transaction to read and write in two different databases
|
||||
// we must create a temporary sorter that we will write into LMDB afterward.
|
||||
// As multiple unnormalized facet values can become the same normalized facet value
|
||||
// we must merge them together.
|
||||
let mut sorter = create_sorter(
|
||||
SortAlgorithm::Unstable,
|
||||
merge_btreeset_string,
|
||||
CompressionType::None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
// We iterate on the list of original, semi-normalized, facet values
|
||||
// and normalize them for search, inserting them in LMDB in any given order.
|
||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
|
||||
for result in database.iter(wtxn)? {
|
||||
let (facet_group_key, ()) = result?;
|
||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
let normalized_facet = left_bound.normalize(&options);
|
||||
let set = BTreeSet::from_iter(std::iter::once(left_bound));
|
||||
let key = (field_id, normalized_facet.as_ref());
|
||||
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
|
||||
let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?;
|
||||
sorter.insert(key, val)?;
|
||||
}
|
||||
}
|
||||
|
||||
// In this loop we don't need to take care of merging bitmaps
|
||||
// as the grenad sorter already merged them for us.
|
||||
let mut merger_iter = sorter.into_stream_merger_iter()?;
|
||||
while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
|
||||
self.index
|
||||
.facet_id_normalized_string_strings
|
||||
.remap_types::<ByteSlice, ByteSlice>()
|
||||
.put(wtxn, key_bytes, btreeset_bytes)?;
|
||||
}
|
||||
|
||||
// We compute one FST by string facet
|
||||
let mut text_fsts = vec![];
|
||||
let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
|
||||
let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
|
||||
let database =
|
||||
self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
|
||||
for result in database.iter(wtxn)? {
|
||||
let (facet_group_key, _) = result?;
|
||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
current_fst = match current_fst.take() {
|
||||
Some((fid, fst_builder)) if fid != field_id => {
|
||||
let fst = fst_builder.into_set();
|
||||
text_fsts.push((fid, fst));
|
||||
Some((field_id, fst::SetBuilder::memory()))
|
||||
}
|
||||
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
|
||||
None => Some((field_id, fst::SetBuilder::memory())),
|
||||
};
|
||||
|
||||
if let Some((_, fst_builder)) = current_fst.as_mut() {
|
||||
fst_builder.insert(left_bound)?;
|
||||
let ((field_id, normalized_facet), _) = result?;
|
||||
current_fst = match current_fst.take() {
|
||||
Some((fid, fst_builder)) if fid != field_id => {
|
||||
let fst = fst_builder.into_set();
|
||||
text_fsts.push((fid, fst));
|
||||
Some((field_id, fst::SetBuilder::memory()))
|
||||
}
|
||||
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
|
||||
None => Some((field_id, fst::SetBuilder::memory())),
|
||||
};
|
||||
|
||||
if let Some((_, fst_builder)) = current_fst.as_mut() {
|
||||
fst_builder.insert(normalized_facet)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -187,9 +237,6 @@ impl<'i> FacetsUpdate<'i> {
|
||||
text_fsts.push((field_id, fst));
|
||||
}
|
||||
|
||||
// We remove all of the previous FSTs that were in this database
|
||||
self.index.facet_id_string_fst.clear(wtxn)?;
|
||||
|
||||
// We write those FSTs in LMDB now
|
||||
for (field_id, fst) in text_fsts {
|
||||
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
|
||||
|
||||
@@ -31,6 +31,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
autogenerate_docids: bool,
|
||||
reader: DocumentsBatchReader<R>,
|
||||
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
|
||||
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
|
||||
|
||||
@@ -30,6 +30,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
stop_words: Option<&fst::Set<&[u8]>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_positions_per_attributes = max_positions_per_attributes
|
||||
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -20,6 +20,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_number: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut facet_number_docids_sorter = create_sorter(
|
||||
|
||||
@@ -18,6 +18,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut facet_string_docids_sorter = create_sorter(
|
||||
|
||||
@@ -34,6 +34,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
) -> Result<ExtractedFacetValues> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
||||
|
||||
@@ -22,6 +22,8 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_word_count_docids_sorter = create_sorter(
|
||||
|
||||
@@ -19,6 +19,8 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
primary_key_id: FieldId,
|
||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
||||
@@ -19,6 +19,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
primary_key_id: FieldId,
|
||||
vectors_fid: FieldId,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
||||
@@ -27,6 +27,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
exact_attributes: &HashSet<FieldId>,
|
||||
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_docids_sorter = create_sorter(
|
||||
|
||||
@@ -15,6 +15,8 @@ pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_fid_docids_sorter = create_sorter(
|
||||
|
||||
@@ -21,6 +21,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_pair_proximity_docids_sorter = create_sorter(
|
||||
|
||||
@@ -18,6 +18,8 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_position_docids_sorter = create_sorter(
|
||||
|
||||
@@ -52,6 +52,8 @@ pub(crate) fn data_from_obkv_documents(
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
exact_attributes: HashSet<FieldId>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
original_obkv_chunks
|
||||
.par_bridge()
|
||||
.map(|original_documents_chunk| {
|
||||
@@ -238,11 +240,13 @@ fn spawn_extraction_task<FE, FS, M>(
|
||||
M::Output: Send,
|
||||
{
|
||||
rayon::spawn(move || {
|
||||
puffin::profile_scope!("extract_multiple_chunks", name);
|
||||
let chunks: Result<M> =
|
||||
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
|
||||
rayon::spawn(move || match chunks {
|
||||
Ok(chunks) => {
|
||||
debug!("merge {} database", name);
|
||||
puffin::profile_scope!("merge_multiple_chunks", name);
|
||||
let reader = chunks.merge(merge_fn, &indexer);
|
||||
let _ = lmdb_writer_sx.send(reader.map(serialize_fn));
|
||||
}
|
||||
|
||||
@@ -214,6 +214,7 @@ pub fn sorter_into_lmdb_database(
|
||||
sorter: Sorter<MergeFn>,
|
||||
merge: MergeFn,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
debug!("Writing MTBL sorter...");
|
||||
let before = Instant::now();
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io;
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
@@ -44,6 +45,27 @@ pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Resul
|
||||
}
|
||||
}
|
||||
|
||||
pub fn merge_btreeset_string<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
||||
if values.len() == 1 {
|
||||
Ok(values[0].clone())
|
||||
} else {
|
||||
// TODO improve the perf by using a `#[borrow] Cow<str>`.
|
||||
let strings: BTreeSet<String> = values
|
||||
.iter()
|
||||
.map(AsRef::as_ref)
|
||||
.map(serde_json::from_slice::<BTreeSet<String>>)
|
||||
.map(StdResult::unwrap)
|
||||
.reduce(|mut current, new| {
|
||||
for x in new {
|
||||
current.insert(x);
|
||||
}
|
||||
current
|
||||
})
|
||||
.unwrap();
|
||||
Ok(Cow::Owned(serde_json::to_vec(&strings).unwrap()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
||||
Ok(values[0].clone())
|
||||
}
|
||||
|
||||
@@ -13,9 +13,9 @@ pub use grenad_helpers::{
|
||||
GrenadParameters, MergeableReader,
|
||||
};
|
||||
pub use merge_functions::{
|
||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps,
|
||||
merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs, serialize_roaring_bitmap,
|
||||
MergeFn,
|
||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
|
||||
merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
|
||||
serialize_roaring_bitmap, MergeFn,
|
||||
};
|
||||
|
||||
use crate::MAX_WORD_LENGTH;
|
||||
|
||||
@@ -26,7 +26,7 @@ pub use self::enrich::{
|
||||
};
|
||||
pub use self::helpers::{
|
||||
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
|
||||
fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||
fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||
sorter_into_lmdb_database, valid_lmdb_key, writer_into_reader, ClonableMmap, MergeFn,
|
||||
};
|
||||
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
||||
@@ -137,6 +137,8 @@ where
|
||||
mut self,
|
||||
reader: DocumentsBatchReader<R>,
|
||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
// Early return when there is no document to add
|
||||
if reader.is_empty() {
|
||||
return Ok((self, Ok(0)));
|
||||
@@ -175,6 +177,8 @@ where
|
||||
mut self,
|
||||
to_delete: Vec<String>,
|
||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
// Early return when there is no document to add
|
||||
if to_delete.is_empty() {
|
||||
return Ok((self, Ok(0)));
|
||||
@@ -194,6 +198,8 @@ where
|
||||
|
||||
#[logging_timer::time("IndexDocuments::{}")]
|
||||
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
if self.added_documents == 0 {
|
||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
|
||||
@@ -232,6 +238,8 @@ where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
let TransformOutput {
|
||||
primary_key,
|
||||
fields_ids_map,
|
||||
@@ -322,6 +330,7 @@ where
|
||||
|
||||
// Run extraction pipeline in parallel.
|
||||
pool.install(|| {
|
||||
puffin::profile_scope!("extract_and_send_grenad_chunks");
|
||||
// split obkv file into several chunks
|
||||
let original_chunk_iter =
|
||||
grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
|
||||
@@ -477,6 +486,8 @@ where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
// Merged databases are already been indexed, we start from this count;
|
||||
let mut databases_seen = MERGED_DATABASE_COUNT;
|
||||
|
||||
@@ -511,26 +522,36 @@ where
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||
let current_prefix_fst;
|
||||
let common_prefix_fst_words_tmp;
|
||||
let common_prefix_fst_words: Vec<_>;
|
||||
let new_prefix_fst_words;
|
||||
let del_prefix_fst_words;
|
||||
|
||||
// We retrieve the common words between the previous and new prefix word fst.
|
||||
let common_prefix_fst_words = fst_stream_into_vec(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).intersection(),
|
||||
);
|
||||
let common_prefix_fst_words: Vec<_> = common_prefix_fst_words
|
||||
.as_slice()
|
||||
.linear_group_by_key(|x| x.chars().next().unwrap())
|
||||
.collect();
|
||||
{
|
||||
puffin::profile_scope!("compute_prefix_diffs");
|
||||
|
||||
// We retrieve the newly added words between the previous and new prefix word fst.
|
||||
let new_prefix_fst_words = fst_stream_into_vec(
|
||||
current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
|
||||
);
|
||||
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||
|
||||
// We compute the set of prefixes that are no more part of the prefix fst.
|
||||
let del_prefix_fst_words = fst_stream_into_hashset(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).difference(),
|
||||
);
|
||||
// We retrieve the common words between the previous and new prefix word fst.
|
||||
common_prefix_fst_words_tmp = fst_stream_into_vec(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).intersection(),
|
||||
);
|
||||
common_prefix_fst_words = common_prefix_fst_words_tmp
|
||||
.as_slice()
|
||||
.linear_group_by_key(|x| x.chars().next().unwrap())
|
||||
.collect();
|
||||
|
||||
// We retrieve the newly added words between the previous and new prefix word fst.
|
||||
new_prefix_fst_words = fst_stream_into_vec(
|
||||
current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
|
||||
);
|
||||
|
||||
// We compute the set of prefixes that are no more part of the prefix fst.
|
||||
del_prefix_fst_words = fst_stream_into_hashset(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).difference(),
|
||||
);
|
||||
}
|
||||
|
||||
databases_seen += 1;
|
||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
@@ -668,6 +689,8 @@ fn execute_word_prefix_docids(
|
||||
common_prefix_fst_words: &[&[String]],
|
||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let cursor = reader.into_cursor()?;
|
||||
let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
|
||||
builder.chunk_compression_type = indexer_config.chunk_compression_type;
|
||||
|
||||
@@ -558,6 +558,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
let primary_key = self
|
||||
.index
|
||||
.primary_key(wtxn)?
|
||||
|
||||
@@ -9,22 +9,19 @@ use charabia::{Language, Script};
|
||||
use grenad::MergerBuilder;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::RwTxn;
|
||||
use hnsw::Searcher;
|
||||
use roaring::RoaringBitmap;
|
||||
use space::KnnPoints;
|
||||
|
||||
use super::helpers::{
|
||||
self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
|
||||
};
|
||||
use super::{ClonableMmap, MergeFn};
|
||||
use crate::distance::NDotProductPoint;
|
||||
use crate::error::UserError;
|
||||
use crate::facet::FacetType;
|
||||
use crate::index::Hnsw;
|
||||
use crate::update::facet::FacetsUpdate;
|
||||
use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at};
|
||||
use crate::{
|
||||
lat_lng_to_xyz, normalize_vector, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result,
|
||||
BEU32,
|
||||
};
|
||||
use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, BEU32};
|
||||
|
||||
pub(crate) enum TypedChunk {
|
||||
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
||||
@@ -49,6 +46,66 @@ pub(crate) enum TypedChunk {
|
||||
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||
}
|
||||
|
||||
impl TypedChunk {
|
||||
pub fn to_debug_string(&self) -> String {
|
||||
match self {
|
||||
TypedChunk::FieldIdDocidFacetStrings(grenad) => {
|
||||
format!("FieldIdDocidFacetStrings {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdDocidFacetNumbers(grenad) => {
|
||||
format!("FieldIdDocidFacetNumbers {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::Documents(grenad) => {
|
||||
format!("Documents {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdWordcountDocids(grenad) => {
|
||||
format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::NewDocumentsIds(grenad) => {
|
||||
format!("NewDocumentsIds {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => format!(
|
||||
"WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {} }}",
|
||||
word_docids_reader.len(),
|
||||
exact_word_docids_reader.len()
|
||||
),
|
||||
TypedChunk::WordPositionDocids(grenad) => {
|
||||
format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::WordFidDocids(grenad) => {
|
||||
format!("WordFidDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::WordPairProximityDocids(grenad) => {
|
||||
format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetStringDocids(grenad) => {
|
||||
format!("FieldIdFacetStringDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetNumberDocids(grenad) => {
|
||||
format!("FieldIdFacetNumberDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetExistsDocids(grenad) => {
|
||||
format!("FieldIdFacetExistsDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetIsNullDocids(grenad) => {
|
||||
format!("FieldIdFacetIsNullDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetIsEmptyDocids(grenad) => {
|
||||
format!("FieldIdFacetIsEmptyDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::GeoPoints(grenad) => {
|
||||
format!("GeoPoints {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::VectorPoints(grenad) => {
|
||||
format!("VectorPoints {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::ScriptLanguageDocids(grenad) => {
|
||||
format!("ScriptLanguageDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
||||
/// Return new documents seen.
|
||||
pub(crate) fn write_typed_chunk_into_index(
|
||||
@@ -57,6 +114,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn: &mut RwTxn,
|
||||
index_is_empty: bool,
|
||||
) -> Result<(RoaringBitmap, bool)> {
|
||||
puffin::profile_function!(typed_chunk.to_debug_string());
|
||||
|
||||
let mut is_merged_database = false;
|
||||
match typed_chunk {
|
||||
TypedChunk::Documents(obkv_documents_iter) => {
|
||||
@@ -230,17 +289,20 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
||||
}
|
||||
TypedChunk::VectorPoints(vector_points) => {
|
||||
let mut hnsw = index.vector_hnsw(wtxn)?.unwrap_or_default();
|
||||
let mut searcher = Searcher::new();
|
||||
|
||||
let mut expected_dimensions = match index.vector_id_docid.iter(wtxn)?.next() {
|
||||
Some(result) => {
|
||||
let (vector_id, _) = result?;
|
||||
Some(hnsw.get_point(vector_id.get() as usize).len())
|
||||
}
|
||||
None => None,
|
||||
let (pids, mut points): (Vec<_>, Vec<_>) = match index.vector_hnsw(wtxn)? {
|
||||
Some(hnsw) => hnsw.iter().map(|(pid, point)| (pid, point.clone())).unzip(),
|
||||
None => Default::default(),
|
||||
};
|
||||
|
||||
// Convert the PointIds into DocumentIds
|
||||
let mut docids = Vec::new();
|
||||
for pid in pids {
|
||||
let docid =
|
||||
index.vector_id_docid.get(wtxn, &BEU32::new(pid.into_inner()))?.unwrap();
|
||||
docids.push(docid.get());
|
||||
}
|
||||
|
||||
let mut expected_dimensions = points.get(0).map(|p| p.len());
|
||||
let mut cursor = vector_points.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
// convert the key back to a u32 (4 bytes)
|
||||
@@ -256,12 +318,26 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
return Err(UserError::InvalidVectorDimensions { expected, found })?;
|
||||
}
|
||||
|
||||
let vector = normalize_vector(vector);
|
||||
let vector_id = hnsw.insert(vector, &mut searcher) as u32;
|
||||
index.vector_id_docid.put(wtxn, &BEU32::new(vector_id), &BEU32::new(docid))?;
|
||||
points.push(NDotProductPoint::new(vector));
|
||||
docids.push(docid);
|
||||
}
|
||||
log::debug!("There are {} entries in the HNSW so far", hnsw.len());
|
||||
index.put_vector_hnsw(wtxn, &hnsw)?;
|
||||
|
||||
assert_eq!(docids.len(), points.len());
|
||||
|
||||
let hnsw_length = points.len();
|
||||
let (new_hnsw, pids) = Hnsw::builder().build_hnsw(points);
|
||||
|
||||
index.vector_id_docid.clear(wtxn)?;
|
||||
for (docid, pid) in docids.into_iter().zip(pids) {
|
||||
index.vector_id_docid.put(
|
||||
wtxn,
|
||||
&BEU32::new(pid.into_inner()),
|
||||
&BEU32::new(docid),
|
||||
)?;
|
||||
}
|
||||
|
||||
log::debug!("There are {} entries in the HNSW so far", hnsw_length);
|
||||
index.put_vector_hnsw(wtxn, &new_hnsw)?;
|
||||
}
|
||||
TypedChunk::ScriptLanguageDocids(hash_pair) => {
|
||||
let mut buffer = Vec::new();
|
||||
@@ -336,6 +412,8 @@ where
|
||||
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
||||
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
|
||||
{
|
||||
puffin::profile_function!(format!("number of entries: {}", data.len()));
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let database = database.remap_types::<ByteSlice, ByteSlice>();
|
||||
|
||||
@@ -378,6 +456,8 @@ where
|
||||
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
||||
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
|
||||
{
|
||||
puffin::profile_function!(format!("number of entries: {}", data.len()));
|
||||
|
||||
if !index_is_empty {
|
||||
return write_entries_into_database(
|
||||
data,
|
||||
|
||||
@@ -4,8 +4,9 @@ pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDele
|
||||
pub use self::facet::bulk::FacetsUpdateBulk;
|
||||
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
|
||||
pub use self::index_documents::{
|
||||
merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId,
|
||||
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, MergeFn,
|
||||
merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||
DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||
MergeFn,
|
||||
};
|
||||
pub use self::indexer_config::IndexerConfig;
|
||||
pub use self::prefix_word_pairs::{
|
||||
|
||||
@@ -50,6 +50,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
|
||||
common_prefix_fst_words: &[&'a [String]],
|
||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
index_word_prefix_database(
|
||||
self.wtxn,
|
||||
self.index.word_pair_proximity_docids,
|
||||
|
||||
@@ -27,6 +27,8 @@ pub fn index_prefix_word_database(
|
||||
chunk_compression_type: CompressionType,
|
||||
chunk_compression_level: Option<u32>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_proximity = max_proximity - 1;
|
||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||
|
||||
|
||||
@@ -191,6 +191,7 @@ pub fn index_word_prefix_database(
|
||||
chunk_compression_type: CompressionType,
|
||||
chunk_compression_level: Option<u32>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||
|
||||
// Make a prefix trie from the common prefixes that are shorter than self.max_prefix_length
|
||||
|
||||
@@ -303,6 +303,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
// if the settings are set before any document update, we don't need to do anything, and
|
||||
// will set the primary key during the first document addition.
|
||||
|
||||
@@ -45,6 +45,8 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
|
||||
common_prefix_fst_words: &[&[String]],
|
||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
// It is forbidden to keep a mutable reference into the database
|
||||
// and write into it at the same time, therefore we write into another file.
|
||||
let mut prefix_docids_sorter = create_sorter(
|
||||
|
||||
@@ -50,6 +50,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
|
||||
common_prefix_fst_words: &[&[String]],
|
||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
|
||||
|
||||
let mut prefix_integer_docids_sorter = create_sorter(
|
||||
|
||||
@@ -42,6 +42,8 @@ impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
|
||||
|
||||
#[logging_timer::time("WordsPrefixesFst::{}")]
|
||||
pub fn execute(self) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let words_fst = self.index.words_fst(self.wtxn)?;
|
||||
|
||||
let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];
|
||||
|
||||
Reference in New Issue
Block a user