mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-08 05:35:42 +00:00
Compare commits
255 Commits
prototype-
...
export-gre
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0f589b9bcd | ||
|
|
82b43e9a7f | ||
|
|
15dafde21d | ||
|
|
290f6d15e7 | ||
|
|
c02d585f5b | ||
|
|
023c2d755f | ||
|
|
407ad753ed | ||
|
|
285aa15d2f | ||
|
|
bf43a3f60a | ||
|
|
2c88131bb1 | ||
|
|
35aa9d5904 | ||
|
|
cfb3e6b51f | ||
|
|
1502382316 | ||
|
|
ef994d84d0 | ||
|
|
1b74010e9e | ||
|
|
08af0e690c | ||
|
|
d71b77f18b | ||
|
|
c443ed7e3f | ||
|
|
db722d201a | ||
|
|
91eb67e981 | ||
|
|
902d700a24 | ||
|
|
f70a615ed9 | ||
|
|
7ff722b72e | ||
|
|
bcf7909bba | ||
|
|
ceb211c515 | ||
|
|
f3c34d5b8c | ||
|
|
4de2db6786 | ||
|
|
661baa716b | ||
|
|
02dcaf07db | ||
|
|
d78ada07b5 | ||
|
|
bc097d90cb | ||
|
|
b393823f36 | ||
|
|
e773dfa9ba | ||
|
|
f158e96fe7 | ||
|
|
e23ec4886d | ||
|
|
7793ba67a4 | ||
|
|
80774148fd | ||
|
|
bf5cea8b10 | ||
|
|
38e1c40f38 | ||
|
|
afc0585c1c | ||
|
|
0e7a411d4d | ||
|
|
0f327f2821 | ||
|
|
77254765e8 | ||
|
|
ce6e6ec2c5 | ||
|
|
91a8f74763 | ||
|
|
abaa72e2bf | ||
|
|
3c3a258a22 | ||
|
|
73e66d5a97 | ||
|
|
b8da117b9c | ||
|
|
5e52107474 | ||
|
|
bcf1c4dae5 | ||
|
|
50f84d43f5 | ||
|
|
f76cc0806e | ||
|
|
2f1abd2c03 | ||
|
|
dedc91e2cf | ||
|
|
a61d8c59ff | ||
|
|
6e23040464 | ||
|
|
8febbf64ce | ||
|
|
b141c82a04 | ||
|
|
cc79cd0b04 | ||
|
|
256538ccb9 | ||
|
|
ca8990394e | ||
|
|
83fb2949c3 | ||
|
|
6cf703387d | ||
|
|
771861599b | ||
|
|
7e47cea0c4 | ||
|
|
5d7061682e | ||
|
|
02e6c8a440 | ||
|
|
89401d097b | ||
|
|
72ebac1fbb | ||
|
|
a616a1d37b | ||
|
|
3e120619fa | ||
|
|
a1caac9bfb | ||
|
|
88d03c56ab | ||
|
|
32ee05ccef | ||
|
|
74c180267e | ||
|
|
517f5332d6 | ||
|
|
9ac5750096 | ||
|
|
7ae4013478 | ||
|
|
fb705116a6 | ||
|
|
053306c0e7 | ||
|
|
84235a63df | ||
|
|
29f8300ac7 | ||
|
|
05edd85d75 | ||
|
|
9eeb75d501 | ||
|
|
4792651462 | ||
|
|
58c3501b54 | ||
|
|
ff76d8f21a | ||
|
|
698ea5139d | ||
|
|
880e790bff | ||
|
|
fbf5f2a392 | ||
|
|
1555870088 | ||
|
|
9f8f3105d5 | ||
|
|
318843aacd | ||
|
|
6d111139b5 | ||
|
|
dff2707471 | ||
|
|
c57f7f7379 | ||
|
|
b968616a99 | ||
|
|
c1bf33a112 | ||
|
|
ddc2b7129a | ||
|
|
b6fc181993 | ||
|
|
388fce9e46 | ||
|
|
d35fe43fd5 | ||
|
|
f692021bfc | ||
|
|
1b90778bf5 | ||
|
|
66ae81a909 | ||
|
|
4aa4a15dc9 | ||
|
|
4b4e8ea2a4 | ||
|
|
84f49d76cd | ||
|
|
afb0e8eab9 | ||
|
|
b5b2333a05 | ||
|
|
40fa0b4df6 | ||
|
|
ab4d614599 | ||
|
|
262b20fdba | ||
|
|
9020606c45 | ||
|
|
0887186ecf | ||
|
|
7d190d8078 | ||
|
|
3b8a9597e2 | ||
|
|
f275554982 | ||
|
|
d997ea1f01 | ||
|
|
50e1d34c66 | ||
|
|
406531c991 | ||
|
|
01e2c3d6bb | ||
|
|
cfaa522d68 | ||
|
|
0c8d1644a6 | ||
|
|
5e0268d40e | ||
|
|
9f9ad4cc05 | ||
|
|
3ee7682fa7 | ||
|
|
7f125bfb12 | ||
|
|
5869ca7716 | ||
|
|
7a89abd2a0 | ||
|
|
d9d0419845 | ||
|
|
5dc8d9e9bf | ||
|
|
9e12a91afb | ||
|
|
8e016fbfeb | ||
|
|
1ccde9bf0b | ||
|
|
34e814f400 | ||
|
|
857cd09285 | ||
|
|
a6fa0b97ec | ||
|
|
552127021f | ||
|
|
38abfec611 | ||
|
|
84a5c304fc | ||
|
|
e93d36d5b9 | ||
|
|
95f8e21533 | ||
|
|
b4d7d80ad9 | ||
|
|
68f197624e | ||
|
|
b79b03d4e2 | ||
|
|
86270e6878 | ||
|
|
81b6128b29 | ||
|
|
5f5a486895 | ||
|
|
5f4fc6c955 | ||
|
|
1f5e8fc072 | ||
|
|
3f3462ab62 | ||
|
|
93363b0201 | ||
|
|
97bb1ff9e2 | ||
|
|
5ee1378856 | ||
|
|
e27b850b09 | ||
|
|
f75f22e026 | ||
|
|
6203f4acef | ||
|
|
12edc2c20a | ||
|
|
94b9f3b310 | ||
|
|
5204c0b60b | ||
|
|
e73cd692db | ||
|
|
29b453346b | ||
|
|
c4bb435374 | ||
|
|
da99a04eb3 | ||
|
|
54ae6951eb | ||
|
|
2bcff2ea46 | ||
|
|
1275e72e0b | ||
|
|
658ec6e0a4 | ||
|
|
43e822e802 | ||
|
|
ee54d3171e | ||
|
|
a0e713c4e7 | ||
|
|
d4cb0a885b | ||
|
|
f52dee2b3b | ||
|
|
0bf879fb88 | ||
|
|
6ff81de401 | ||
|
|
2e4c9651df | ||
|
|
ec9649c922 | ||
|
|
9123370e90 | ||
|
|
14b396d302 | ||
|
|
393216bf30 | ||
|
|
e249e4db7b | ||
|
|
de2ca7006e | ||
|
|
333ce12eb2 | ||
|
|
fb9db1eba6 | ||
|
|
fa2b96b9a5 | ||
|
|
19736cefe8 | ||
|
|
4fb25b8782 | ||
|
|
c83a33017e | ||
|
|
be72326c0a | ||
|
|
547379abb0 | ||
|
|
0b2fff27f2 | ||
|
|
3adbc2b942 | ||
|
|
fbea721378 | ||
|
|
391eb72137 | ||
|
|
d78ad51082 | ||
|
|
1956045a06 | ||
|
|
b2193e612f | ||
|
|
942d49314c | ||
|
|
9a846e82bc | ||
|
|
9df8cfc013 | ||
|
|
d868131bb7 | ||
|
|
248aaa6d45 | ||
|
|
50d6317ec0 | ||
|
|
b734bd9891 | ||
|
|
9800d5a103 | ||
|
|
7c4ed07617 | ||
|
|
3a99a555a2 | ||
|
|
9e1b458010 | ||
|
|
2aede03bc2 | ||
|
|
e741bc1c62 | ||
|
|
6425996e36 | ||
|
|
eb5cb91da2 | ||
|
|
87bba98bd8 | ||
|
|
217105b7da | ||
|
|
1b7c164a55 | ||
|
|
f3f3944469 | ||
|
|
93dcbf598d | ||
|
|
ac68f33194 | ||
|
|
9991152bbe | ||
|
|
a4536b1381 | ||
|
|
5b51cb04af | ||
|
|
3c1a14f1cd | ||
|
|
b8e4709dfa | ||
|
|
806e5b6899 | ||
|
|
61bd2fb7a9 | ||
|
|
e0cc775dc4 | ||
|
|
12940d79a9 | ||
|
|
922a640188 | ||
|
|
abbe131084 | ||
|
|
d4715e0c4d | ||
|
|
11e2a2c1aa | ||
|
|
65e49b7092 | ||
|
|
e56f160032 | ||
|
|
687d92f217 | ||
|
|
fb539f61fe | ||
|
|
cb4ebe163e | ||
|
|
dde3a04679 | ||
|
|
13c2c6c16b | ||
|
|
21bcf32109 | ||
|
|
35e1981488 | ||
|
|
e0f712b9d3 | ||
|
|
56571f762a | ||
|
|
005800634d | ||
|
|
976af4fa8f | ||
|
|
99fec27788 | ||
|
|
afa8f273a8 | ||
|
|
4b644f6bc0 | ||
|
|
7e259cb0d2 | ||
|
|
0fbc1511d7 | ||
|
|
c95d68e244 | ||
|
|
3b3fa38f27 | ||
|
|
d6c2ee15a9 | ||
|
|
dc07790133 |
2
.cargo/config.toml
Normal file
2
.cargo/config.toml
Normal file
@@ -0,0 +1,2 @@
|
||||
[alias]
|
||||
xtask = "run --package xtask --"
|
||||
17
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
17
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
@@ -27,6 +27,23 @@ Related spec: WIP
|
||||
- [ ] If prototype validated, merge changes into `main`
|
||||
- [ ] Update the spec
|
||||
|
||||
### Reminders when modifying the Setting API
|
||||
|
||||
<!--- Special steps to remind when adding a new index setting -->
|
||||
|
||||
- [ ] Ensure the new setting route is at least tested by the [`test_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/tests/settings/get_settings.rs#L276)
|
||||
- [ ] Ensure Analytics are fully implemented
|
||||
- [ ] `/settings/my-new-setting` configurated in the [`make_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L141-L165)
|
||||
- [ ] global `/settings` route configurated in the [`update_all` function](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L655-L751)
|
||||
- [ ] Ensure the dump serializing is consistent with the `/settings` route serializing, e.g., enums case can be different (`camelCase` in route and `PascalCase` in the dump)
|
||||
|
||||
#### Special cases when adding a setting for an experimental feature
|
||||
|
||||
- [ ] ⚠️ API stability: The setting does not appear on the main settings route when the feature has never been enabled (e.g. mark it `Unset` when returned from the index in this situation. See [an example](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch-types/src/settings.rs#L608))
|
||||
- [ ] The setting cannot be set when the feature is disabled, either by the main settings route or the subroute (see [`validate_settings` function](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch/src/routes/indexes/settings.rs#L811))
|
||||
- [ ] If possible, the setting is reset when the feature is disabled (hard if it requires reindexing)
|
||||
|
||||
## Impacted teams
|
||||
|
||||
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
|
||||
<!---@meilisearch/docs-team when there is any API change, e.g. settings addition-->
|
||||
|
||||
2
.github/workflows/publish-docker-images.yml
vendored
2
.github/workflows/publish-docker-images.yml
vendored
@@ -97,7 +97,7 @@ jobs:
|
||||
- name: Send CI information to Cloud team
|
||||
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
|
||||
if: github.event_name == 'push'
|
||||
uses: peter-evans/repository-dispatch@v2
|
||||
uses: peter-evans/repository-dispatch@v3
|
||||
with:
|
||||
token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
repository: meilisearch/meilisearch-cloud
|
||||
|
||||
38
.github/workflows/sdks-tests.yml
vendored
38
.github/workflows/sdks-tests.yml
vendored
@@ -22,7 +22,7 @@ jobs:
|
||||
outputs:
|
||||
docker-image: ${{ steps.define-image.outputs.docker-image }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Define the Docker image we need to use
|
||||
id: define-image
|
||||
run: |
|
||||
@@ -46,11 +46,11 @@ jobs:
|
||||
MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-dotnet
|
||||
- name: Setup .NET Core
|
||||
uses: actions/setup-dotnet@v3
|
||||
uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: "6.0.x"
|
||||
- name: Install dependencies
|
||||
@@ -75,12 +75,12 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-dart
|
||||
- uses: dart-lang/setup-dart@v1
|
||||
with:
|
||||
sdk: 3.1.1
|
||||
sdk: 'latest'
|
||||
- name: Install dependencies
|
||||
run: dart pub get
|
||||
- name: Run integration tests
|
||||
@@ -100,10 +100,10 @@ jobs:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v4
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-go
|
||||
- name: Get dependencies
|
||||
@@ -129,11 +129,11 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-java
|
||||
- name: Set up Java
|
||||
uses: actions/setup-java@v3
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
java-version: 8
|
||||
distribution: 'zulu'
|
||||
@@ -156,7 +156,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-php
|
||||
- name: Install PHP
|
||||
@@ -220,11 +220,11 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-python
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
- name: Install pipenv
|
||||
uses: dschep/install-pipenv-action@v1
|
||||
- name: Install dependencies
|
||||
@@ -245,7 +245,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-ruby
|
||||
- name: Set up Ruby 3
|
||||
@@ -270,7 +270,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-rust
|
||||
- name: Build
|
||||
@@ -291,7 +291,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-swift
|
||||
- name: Run tests
|
||||
@@ -314,7 +314,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js-plugins
|
||||
- name: Setup node
|
||||
@@ -345,7 +345,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-rails
|
||||
- name: Set up Ruby 3
|
||||
@@ -369,7 +369,7 @@ jobs:
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: meilisearch/meilisearch-symfony
|
||||
- name: Install PHP
|
||||
|
||||
24
.github/workflows/test-suite.yml
vendored
24
.github/workflows/test-suite.yml
vendored
@@ -66,6 +66,10 @@ jobs:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -78,7 +82,7 @@ jobs:
|
||||
args: --locked --release --all
|
||||
|
||||
test-all-features:
|
||||
name: Tests all features
|
||||
name: Tests almost all features
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
@@ -94,16 +98,12 @@ jobs:
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run cargo build with all features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --workspace --locked --release --all-features
|
||||
- name: Run cargo test with all features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --workspace --locked --release --all-features
|
||||
- name: Run cargo build with almost all features
|
||||
run: |
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
- name: Run cargo test with almost all features
|
||||
run: |
|
||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
|
||||
test-disabled-tokenization:
|
||||
name: Test disabled tokenization
|
||||
@@ -164,7 +164,7 @@ jobs:
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: 1.71.1
|
||||
toolchain: 1.75.0
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
|
||||
@@ -75,6 +75,12 @@ If you get a "Too many open files" error you might want to increase the open fil
|
||||
ulimit -Sn 3000
|
||||
```
|
||||
|
||||
#### Build tools
|
||||
|
||||
Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) workflow to provide some build tools.
|
||||
|
||||
Run `cargo xtask --help` from the root of the repository to find out what is available.
|
||||
|
||||
## Git Guidelines
|
||||
|
||||
### Git Branches
|
||||
|
||||
2169
Cargo.lock
generated
2169
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -16,11 +16,16 @@ members = [
|
||||
"json-depth-checker",
|
||||
"benchmarks",
|
||||
"fuzzers",
|
||||
"tracing-trace",
|
||||
"xtask",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.5.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
version = "1.7.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
readme = "README.md"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Compile
|
||||
FROM rust:alpine3.16 AS compiler
|
||||
FROM rust:1.75.0-alpine3.18 AS compiler
|
||||
|
||||
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
||||
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019-2022 Meili SAS
|
||||
Copyright (c) 2019-2024 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
10
README.md
10
README.md
@@ -41,10 +41,10 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
|
||||
## ✨ Features
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
@@ -61,8 +61,6 @@ You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
||||
|
||||
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
|
||||
|
||||
## ⚡ Supercharge your Meilisearch experience
|
||||
|
||||
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
|
||||
@@ -101,7 +99,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
|
||||
|
||||
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
|
||||
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
|
||||
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
|
||||
- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
|
||||
Thank you for your support!
|
||||
|
||||
|
||||
@@ -106,7 +106,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "meilisearch_index_count{job=\"meilisearch\", instance=\"$instance\"}",
|
||||
"expr": "meilisearch_index_count{job=\"$job\", instance=\"$instance\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
@@ -165,7 +165,7 @@
|
||||
"type": "prometheus"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"expr": "meilisearch_index_docs_count{job=\"meilisearch\", index=\"$Index\", instance=\"$instance\"}",
|
||||
"expr": "meilisearch_index_docs_count{job=\"$job\", index=\"$Index\", instance=\"$instance\"}",
|
||||
"hide": false,
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -228,7 +228,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))",
|
||||
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[1h]))",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
@@ -288,7 +288,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))",
|
||||
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[24h]))",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
@@ -348,7 +348,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))",
|
||||
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[30d]))",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
@@ -447,7 +447,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
|
||||
"expr": "meilisearch_db_size_bytes{job=\"$job\", instance=\"$instance\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "Database size on disk",
|
||||
"range": true,
|
||||
@@ -458,7 +458,7 @@
|
||||
"type": "prometheus"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"expr": "meilisearch_used_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
|
||||
"expr": "meilisearch_used_db_size_bytes{job=\"$job\", instance=\"$instance\"}",
|
||||
"hide": false,
|
||||
"legendFormat": "Used bytes",
|
||||
"range": true,
|
||||
@@ -553,7 +553,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])",
|
||||
"expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"$job\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])",
|
||||
"interval": "",
|
||||
"legendFormat": "{{method}} {{path}}",
|
||||
"range": true,
|
||||
@@ -646,7 +646,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])",
|
||||
"expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"$job\"}[5m])",
|
||||
"interval": "",
|
||||
"legendFormat": "{{method}} {{path}}",
|
||||
"range": true,
|
||||
@@ -744,7 +744,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))",
|
||||
"expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"$job\"}[30s]))",
|
||||
"format": "heatmap",
|
||||
"interval": "",
|
||||
"legendFormat": "{{le}}",
|
||||
@@ -854,7 +854,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"statuses\"}",
|
||||
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"statuses\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "{{value}} ",
|
||||
"range": true,
|
||||
@@ -947,7 +947,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"types\"}",
|
||||
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"types\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "{{value}} ",
|
||||
"range": true,
|
||||
@@ -1040,7 +1040,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"indexes\"}",
|
||||
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"indexes\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "{{value}} ",
|
||||
"range": true,
|
||||
@@ -1161,7 +1161,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "rate(process_cpu_seconds_total{job=\"meilisearch\", instance=\"$instance\"}[1m])",
|
||||
"expr": "rate(process_cpu_seconds_total{job=\"$job\", instance=\"$instance\"}[1m])",
|
||||
"interval": "",
|
||||
"legendFormat": "process",
|
||||
"range": true,
|
||||
@@ -1264,7 +1264,7 @@
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "process_resident_memory_bytes{job=\"meilisearch\", instance=\"$instance\"} / 1024 / 1024",
|
||||
"expr": "process_resident_memory_bytes{job=\"$job\", instance=\"$instance\"} / 1024 / 1024",
|
||||
"interval": "",
|
||||
"legendFormat": "process",
|
||||
"range": true,
|
||||
@@ -1342,6 +1342,33 @@
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "meilisearch",
|
||||
"value": "meilisearch"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus"
|
||||
},
|
||||
"definition": "label_values(job)",
|
||||
"description": "Prometheus job_name from scrape config (default is meilisearch)",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Job",
|
||||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(job)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@@ -11,24 +11,24 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.70"
|
||||
csv = "1.2.1"
|
||||
anyhow = "1.0.79"
|
||||
csv = "1.3.0"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
mimalloc = { version = "0.1.39", default-features = false }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.1"
|
||||
roaring = "0.10.2"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.70"
|
||||
bytes = "1.4.0"
|
||||
anyhow = "1.0.79"
|
||||
bytes = "1.5.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.25"
|
||||
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
flate2 = "1.0.28"
|
||||
reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
||||
@@ -129,3 +129,6 @@ experimental_enable_metrics = false
|
||||
|
||||
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
|
||||
experimental_reduce_indexing_memory_usage = false
|
||||
|
||||
# Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
|
||||
# experimental_max_number_of_batched_tasks = 100
|
||||
|
||||
@@ -11,22 +11,22 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.70"
|
||||
flate2 = "1.0.25"
|
||||
http = "0.2.9"
|
||||
log = "0.4.17"
|
||||
anyhow = "1.0.79"
|
||||
flate2 = "1.0.28"
|
||||
http = "0.2.11"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.17.1"
|
||||
regex = "1.7.3"
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
once_cell = "1.19.0"
|
||||
regex = "1.10.2"
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
tar = "0.4.40"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -276,6 +276,7 @@ pub(crate) mod test {
|
||||
),
|
||||
}),
|
||||
pagination: Setting::NotSet,
|
||||
embedders: Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
};
|
||||
settings.check()
|
||||
|
||||
@@ -120,7 +120,7 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
|
||||
criterion.as_ref().map(ToString::to_string)
|
||||
}
|
||||
Err(()) => {
|
||||
log::warn!(
|
||||
tracing::warn!(
|
||||
"Could not import the following ranking rule: `{}`.",
|
||||
ranking_rule
|
||||
);
|
||||
@@ -152,11 +152,11 @@ impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
|
||||
use v2::updates::UpdateStatus as UpdateStatusV2;
|
||||
Some(match source {
|
||||
UpdateStatusV1::Enqueued { content } => {
|
||||
log::warn!(
|
||||
tracing::warn!(
|
||||
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
|
||||
content.update_id
|
||||
);
|
||||
log::warn!("Task will be skipped in the queue of imported tasks.");
|
||||
tracing::warn!("Task will be skipped in the queue of imported tasks.");
|
||||
|
||||
return None;
|
||||
}
|
||||
@@ -229,7 +229,7 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
|
||||
Some(match source {
|
||||
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
|
||||
v1::update::UpdateType::Customs => {
|
||||
log::warn!("Ignoring task with type 'Customs' that is no longer supported");
|
||||
tracing::warn!("Ignoring task with type 'Customs' that is no longer supported");
|
||||
return None;
|
||||
}
|
||||
v1::update::UpdateType::DocumentsAddition { .. } => {
|
||||
@@ -296,7 +296,7 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
|
||||
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
|
||||
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
|
||||
v1::settings::RankingRule::WordsPosition => {
|
||||
log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
|
||||
tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
|
||||
None
|
||||
}
|
||||
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),
|
||||
|
||||
@@ -146,8 +146,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
|
||||
started_processing_at: processing.started_processing_at,
|
||||
}),
|
||||
Err(e) => {
|
||||
log::warn!("Error with task {}: {}", processing.from.update_id, e);
|
||||
log::warn!("Task will be marked as `Failed`.");
|
||||
tracing::warn!("Error with task {}: {}", processing.from.update_id, e);
|
||||
tracing::warn!("Task will be marked as `Failed`.");
|
||||
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
|
||||
from: v3::updates::Processing {
|
||||
from: v3::updates::Enqueued {
|
||||
@@ -172,8 +172,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
|
||||
enqueued_at: enqueued.enqueued_at,
|
||||
}),
|
||||
Err(e) => {
|
||||
log::warn!("Error with task {}: {}", enqueued.update_id, e);
|
||||
log::warn!("Task will be marked as `Failed`.");
|
||||
tracing::warn!("Error with task {}: {}", enqueued.update_id, e);
|
||||
tracing::warn!("Task will be marked as `Failed`.");
|
||||
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
|
||||
from: v3::updates::Processing {
|
||||
from: v3::updates::Enqueued {
|
||||
@@ -353,7 +353,7 @@ impl From<String> for v3::Code {
|
||||
"malformed_payload" => v3::Code::MalformedPayload,
|
||||
"missing_payload" => v3::Code::MissingPayload,
|
||||
other => {
|
||||
log::warn!("Unknown error code {}", other);
|
||||
tracing::warn!("Unknown error code {}", other);
|
||||
v3::Code::UnretrievableErrorCode
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,20 +76,20 @@ impl CompatV3ToV4 {
|
||||
let index_uid = match index_uid {
|
||||
Some(uid) => uid,
|
||||
None => {
|
||||
log::warn!(
|
||||
tracing::warn!(
|
||||
"Error while importing the update {}.",
|
||||
task.update.id()
|
||||
);
|
||||
log::warn!(
|
||||
tracing::warn!(
|
||||
"The index associated to the uuid `{}` could not be retrieved.",
|
||||
task.uuid.to_string()
|
||||
);
|
||||
if task.update.is_finished() {
|
||||
// we're fucking with his history but not his data, that's ok-ish.
|
||||
log::warn!("The index-uuid will be set as `unknown`.");
|
||||
tracing::warn!("The index-uuid will be set as `unknown`.");
|
||||
String::from("unknown")
|
||||
} else {
|
||||
log::warn!("The task will be ignored.");
|
||||
tracing::warn!("The task will be ignored.");
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -305,7 +305,7 @@ impl From<v4::ResponseError> for v5::ResponseError {
|
||||
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
|
||||
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
|
||||
other => {
|
||||
log::warn!("Unknown error code {}", other);
|
||||
tracing::warn!("Unknown error code {}", other);
|
||||
v5::Code::UnretrievableErrorCode
|
||||
}
|
||||
};
|
||||
|
||||
@@ -304,7 +304,7 @@ impl From<v5::ResponseError> for v6::ResponseError {
|
||||
"immutable_field" => v6::Code::BadRequest,
|
||||
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
|
||||
other => {
|
||||
log::warn!("Unknown error code {}", other);
|
||||
tracing::warn!("Unknown error code {}", other);
|
||||
v6::Code::UnretrievableErrorCode
|
||||
}
|
||||
};
|
||||
@@ -329,7 +329,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
new_ranking_rules.push(new_rule);
|
||||
}
|
||||
Err(_) => {
|
||||
log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
|
||||
tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -378,6 +378,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
v5::Setting::Reset => v6::Setting::Reset,
|
||||
v5::Setting::NotSet => v6::Setting::NotSet,
|
||||
},
|
||||
embedders: v6::Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
|
||||
pub enum Code {
|
||||
// index related error
|
||||
|
||||
@@ -95,6 +95,7 @@ impl fmt::Display for ErrorType {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
|
||||
pub enum Code {
|
||||
// index related error
|
||||
|
||||
@@ -31,6 +31,7 @@ impl ResponseError {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
#[derive(Deserialize, Debug, Clone, Copy)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
pub enum Code {
|
||||
|
||||
@@ -2,10 +2,10 @@ use std::fs::{self, File};
|
||||
use std::io::{BufRead, BufReader, ErrorKind};
|
||||
use std::path::Path;
|
||||
|
||||
use log::debug;
|
||||
pub use meilisearch_types::milli;
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::Document;
|
||||
|
||||
@@ -11,9 +11,9 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
faux = "0.1.9"
|
||||
faux = "0.1.10"
|
||||
|
||||
@@ -13,8 +13,8 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
nom = "7.1.3"
|
||||
nom_locate = "4.1.0"
|
||||
unescaper = "0.1.2"
|
||||
nom_locate = "4.2.0"
|
||||
unescaper = "0.1.3"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.29.0"
|
||||
insta = "1.34.0"
|
||||
|
||||
@@ -11,10 +11,10 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.3.0", features = ["derive"] }
|
||||
clap = { version = "4.3.0", features = ["derive"] }
|
||||
fastrand = "2.0.0"
|
||||
arbitrary = { version = "1.3.2", features = ["derive"] }
|
||||
clap = { version = "4.4.17", features = ["derive"] }
|
||||
fastrand = "2.0.1"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
tempfile = "3.5.0"
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
tempfile = "3.9.0"
|
||||
|
||||
@@ -11,30 +11,36 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.70"
|
||||
anyhow = "1.0.79"
|
||||
bincode = "1.3.3"
|
||||
csv = "1.2.1"
|
||||
csv = "1.3.0"
|
||||
derive_builder = "0.12.0"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "1.4.0"
|
||||
enum-iterator = "1.5.0"
|
||||
file-store = { path = "../file-store" }
|
||||
log = "0.4.17"
|
||||
flate2 = "1.0.28"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tracing = "0.1.40"
|
||||
ureq = "2.9.1"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.2"
|
||||
insta = { version = "1.29.0", features = ["json", "redactions"] }
|
||||
crossbeam = "0.8.4"
|
||||
insta = { version = "1.34.0", features = ["json", "redactions"] }
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
||||
|
||||
@@ -24,7 +24,6 @@ use std::fs::{self, File};
|
||||
use std::io::BufWriter;
|
||||
|
||||
use dump::IndexMetadata;
|
||||
use log::{debug, error, info, trace};
|
||||
use meilisearch_types::error::Code;
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
@@ -60,7 +59,7 @@ pub(crate) enum Batch {
|
||||
/// The list of tasks that were processing when this task cancelation appeared.
|
||||
previous_processing_tasks: RoaringBitmap,
|
||||
},
|
||||
TaskDeletion(Task),
|
||||
TaskDeletions(Vec<Task>),
|
||||
SnapshotCreation(Vec<Task>),
|
||||
Dump(Task),
|
||||
IndexOperation {
|
||||
@@ -146,13 +145,12 @@ impl Batch {
|
||||
pub fn ids(&self) -> Vec<TaskId> {
|
||||
match self {
|
||||
Batch::TaskCancelation { task, .. }
|
||||
| Batch::TaskDeletion(task)
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::IndexUpdate { task, .. } => vec![task.uid],
|
||||
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
|
||||
tasks.iter().map(|task| task.uid).collect()
|
||||
}
|
||||
Batch::SnapshotCreation(tasks)
|
||||
| Batch::TaskDeletions(tasks)
|
||||
| Batch::IndexDeletion { tasks, .. } => tasks.iter().map(|task| task.uid).collect(),
|
||||
Batch::IndexOperation { op, .. } => match op {
|
||||
IndexOperation::DocumentOperation { tasks, .. }
|
||||
| IndexOperation::Settings { tasks, .. }
|
||||
@@ -180,7 +178,7 @@ impl Batch {
|
||||
use Batch::*;
|
||||
match self {
|
||||
TaskCancelation { .. }
|
||||
| TaskDeletion(_)
|
||||
| TaskDeletions(_)
|
||||
| SnapshotCreation(_)
|
||||
| Dump(_)
|
||||
| IndexSwap { .. } => None,
|
||||
@@ -199,7 +197,7 @@ impl fmt::Display for Batch {
|
||||
let tasks = self.ids();
|
||||
match self {
|
||||
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
|
||||
Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
|
||||
Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?,
|
||||
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
|
||||
Batch::Dump(_) => f.write_str("Dump")?,
|
||||
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
|
||||
@@ -515,6 +513,7 @@ impl IndexScheduler {
|
||||
/// 3. We get the *next* snapshot to process.
|
||||
/// 4. We get the *next* dump to process.
|
||||
/// 5. We get the *next* tasks to process for a specific index.
|
||||
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
|
||||
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
||||
@@ -539,9 +538,9 @@ impl IndexScheduler {
|
||||
|
||||
// 2. we get the next task to delete
|
||||
let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
|
||||
if let Some(task_id) = to_delete.min() {
|
||||
let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
return Ok(Some(Batch::TaskDeletion(task)));
|
||||
if !to_delete.is_empty() {
|
||||
let tasks = self.get_existing_tasks(rtxn, to_delete)?;
|
||||
return Ok(Some(Batch::TaskDeletions(tasks)));
|
||||
}
|
||||
|
||||
// 3. we batch the snapshot.
|
||||
@@ -584,7 +583,9 @@ impl IndexScheduler {
|
||||
let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;
|
||||
|
||||
// If autobatching is disabled we only take one task at a time.
|
||||
let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
|
||||
// Otherwise, we take only a maximum of tasks to create batches.
|
||||
let tasks_limit =
|
||||
if self.autobatching_enabled { self.max_number_of_batched_tasks } else { 1 };
|
||||
|
||||
let enqueued = index_tasks
|
||||
.into_iter()
|
||||
@@ -618,6 +619,7 @@ impl IndexScheduler {
|
||||
/// The list of tasks that were processed. The metadata of each task in the returned
|
||||
/// list is updated accordingly, with the exception of the its date fields
|
||||
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
|
||||
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
|
||||
pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
@@ -667,9 +669,10 @@ impl IndexScheduler {
|
||||
Ok(()) => {
|
||||
for content_uuid in canceled_tasks_content_uuids {
|
||||
if let Err(error) = self.delete_update_file(content_uuid) {
|
||||
error!(
|
||||
"We failed deleting the content file indentified as {}: {}",
|
||||
content_uuid, error
|
||||
tracing::error!(
|
||||
file_content_uuid = %content_uuid,
|
||||
%error,
|
||||
"Failed deleting content file"
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -679,31 +682,43 @@ impl IndexScheduler {
|
||||
|
||||
Ok(vec![task])
|
||||
}
|
||||
Batch::TaskDeletion(mut task) => {
|
||||
Batch::TaskDeletions(mut tasks) => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
let matched_tasks =
|
||||
let mut matched_tasks = RoaringBitmap::new();
|
||||
|
||||
for task in tasks.iter() {
|
||||
if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
|
||||
tasks
|
||||
matched_tasks |= tasks;
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Succeeded;
|
||||
let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let deleted_tasks_count = self.delete_matched_tasks(&mut wtxn, matched_tasks)?;
|
||||
let deleted_tasks_count = deleted_tasks.intersection_len(tasks);
|
||||
deleted_tasks -= tasks;
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
match &mut task.details {
|
||||
Some(Details::TaskDeletion {
|
||||
matched_tasks: _,
|
||||
deleted_tasks,
|
||||
original_filter: _,
|
||||
}) => {
|
||||
*deleted_tasks = Some(deleted_tasks_count);
|
||||
match &mut task.details {
|
||||
Some(Details::TaskDeletion {
|
||||
matched_tasks: _,
|
||||
deleted_tasks,
|
||||
original_filter: _,
|
||||
}) => {
|
||||
*deleted_tasks = Some(deleted_tasks_count);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
wtxn.commit()?;
|
||||
Ok(vec![task])
|
||||
Ok(tasks)
|
||||
}
|
||||
Batch::SnapshotCreation(mut tasks) => {
|
||||
fs::create_dir_all(&self.snapshots_path)?;
|
||||
@@ -934,8 +949,8 @@ impl IndexScheduler {
|
||||
};
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
*self.currently_updating_index.write().unwrap() =
|
||||
Some((index_uid.clone(), index.clone()));
|
||||
self.index_mapper
|
||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||
@@ -956,7 +971,10 @@ impl IndexScheduler {
|
||||
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => error!("Could not write the stats of the index {}", e),
|
||||
Err(e) => tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
),
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
@@ -984,7 +1002,7 @@ impl IndexScheduler {
|
||||
builder.set_primary_key(primary_key);
|
||||
let must_stop_processing = self.must_stop_processing.clone();
|
||||
builder.execute(
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
index_wtxn.commit()?;
|
||||
@@ -1011,7 +1029,10 @@ impl IndexScheduler {
|
||||
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => error!("Could not write the stats of the index {}", e),
|
||||
Err(e) => tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
),
|
||||
}
|
||||
|
||||
Ok(vec![task])
|
||||
@@ -1130,6 +1151,11 @@ impl IndexScheduler {
|
||||
///
|
||||
/// ## Return
|
||||
/// The list of processed tasks.
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip(self, index_wtxn, index),
|
||||
target = "indexing::scheduler"
|
||||
)]
|
||||
fn apply_index_operation<'i>(
|
||||
&self,
|
||||
index_wtxn: &mut RwTxn<'i>,
|
||||
@@ -1190,7 +1216,7 @@ impl IndexScheduler {
|
||||
milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||
builder.set_primary_key(primary_key);
|
||||
builder.execute(
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.clone().get(),
|
||||
)?;
|
||||
primary_key_has_been_set = true;
|
||||
@@ -1200,12 +1226,16 @@ impl IndexScheduler {
|
||||
|
||||
let config = IndexDocumentsConfig { update_method: method, ..Default::default() };
|
||||
|
||||
let embedder_configs = index.embedding_configs(index_wtxn)?;
|
||||
// TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
|
||||
let embedders = self.embedders(embedder_configs)?;
|
||||
|
||||
let mut builder = milli::update::IndexDocuments::new(
|
||||
index_wtxn,
|
||||
index,
|
||||
indexer_config,
|
||||
config,
|
||||
|indexing_step| trace!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::trace!(?indexing_step, "Update"),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
|
||||
@@ -1218,6 +1248,8 @@ impl IndexScheduler {
|
||||
let (new_builder, user_result) = builder.add_documents(reader)?;
|
||||
builder = new_builder;
|
||||
|
||||
builder = builder.with_embedders(embedders.clone());
|
||||
|
||||
let received_documents =
|
||||
if let Some(Details::DocumentAdditionOrUpdate {
|
||||
received_documents,
|
||||
@@ -1275,7 +1307,7 @@ impl IndexScheduler {
|
||||
|
||||
if !tasks.iter().all(|res| res.error.is_some()) {
|
||||
let addition = builder.execute()?;
|
||||
info!("document addition done: {:?}", addition);
|
||||
tracing::info!(indexing_result = ?addition, "document indexing done");
|
||||
} else if primary_key_has_been_set {
|
||||
// Everything failed but we've set a primary key.
|
||||
// We need to remove it.
|
||||
@@ -1283,7 +1315,7 @@ impl IndexScheduler {
|
||||
milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||
builder.reset_primary_key();
|
||||
builder.execute(
|
||||
|indexing_step| trace!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::trace!(update = ?indexing_step),
|
||||
|| must_stop_processing.clone().get(),
|
||||
)?;
|
||||
}
|
||||
@@ -1343,9 +1375,6 @@ impl IndexScheduler {
|
||||
|
||||
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
|
||||
let checked_settings = settings.clone().check();
|
||||
if checked_settings.proximity_precision.set().is_some() {
|
||||
self.features.features().check_proximity_precision()?;
|
||||
}
|
||||
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
|
||||
apply_settings_to_builder(&checked_settings, &mut builder);
|
||||
|
||||
@@ -1356,7 +1385,7 @@ impl IndexScheduler {
|
||||
|
||||
let must_stop_processing = self.must_stop_processing.clone();
|
||||
builder.execute(
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
|
||||
@@ -1430,7 +1459,11 @@ impl IndexScheduler {
|
||||
/// Delete each given task from all the databases (if it is deleteable).
|
||||
///
|
||||
/// Return the number of tasks that were actually deleted.
|
||||
fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
|
||||
fn delete_matched_tasks(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
matched_tasks: &RoaringBitmap,
|
||||
) -> Result<RoaringBitmap> {
|
||||
// 1. Remove from this list the tasks that we are not allowed to delete
|
||||
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
|
||||
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
||||
@@ -1495,7 +1528,7 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(to_delete_tasks.len())
|
||||
Ok(to_delete_tasks)
|
||||
}
|
||||
|
||||
/// Cancel each given task from all the databases (if it is cancelable).
|
||||
@@ -1564,7 +1597,7 @@ fn delete_document_by_filter<'a>(
|
||||
index,
|
||||
indexer_config,
|
||||
config,
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
|
||||
|
||||
@@ -30,19 +30,6 @@ impl RoFeatures {
|
||||
self.runtime
|
||||
}
|
||||
|
||||
pub fn check_score_details(&self) -> Result<()> {
|
||||
if self.runtime.score_details {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Computing score details",
|
||||
feature: "score details",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/674",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_metrics(&self) -> Result<()> {
|
||||
if self.runtime.metrics {
|
||||
Ok(())
|
||||
@@ -56,12 +43,25 @@ impl RoFeatures {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_vector(&self) -> Result<()> {
|
||||
pub fn check_logs_route(&self) -> Result<()> {
|
||||
if self.runtime.logs_route {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "getting logs through the `/logs/stream` route",
|
||||
feature: "logs route",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/721",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.vector_store {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Passing `vector` as a query parameter",
|
||||
disabled_action,
|
||||
feature: "vector store",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/677",
|
||||
}
|
||||
@@ -81,19 +81,6 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_proximity_precision(&self) -> Result<()> {
|
||||
if self.runtime.proximity_precision {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Using `proximityPrecision` index setting",
|
||||
feature: "proximity precision",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/710",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
@@ -107,6 +94,7 @@ impl FeatureData {
|
||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: instance_features.metrics || persisted_features.metrics,
|
||||
logs_route: instance_features.logs_route || persisted_features.logs_route,
|
||||
..persisted_features
|
||||
}));
|
||||
|
||||
|
||||
@@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
use std::{fs, thread};
|
||||
|
||||
use log::error;
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{FieldDistribution, Index};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use tracing::error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use self::index_map::IndexMap;
|
||||
@@ -69,6 +69,10 @@ pub struct IndexMapper {
|
||||
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
|
||||
enable_mdb_writemap: bool,
|
||||
pub indexer_config: Arc<IndexerConfig>,
|
||||
|
||||
/// A few types of long running batches of tasks that act on a single index set this field
|
||||
/// so that a handle to the index is available from other threads (search) in an optimized manner.
|
||||
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
|
||||
}
|
||||
|
||||
/// Whether the index is available for use or is forbidden to be inserted back in the index map
|
||||
@@ -151,6 +155,7 @@ impl IndexMapper {
|
||||
index_growth_amount,
|
||||
enable_mdb_writemap,
|
||||
indexer_config: Arc::new(indexer_config),
|
||||
currently_updating_index: Default::default(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -303,6 +308,14 @@ impl IndexMapper {
|
||||
|
||||
/// Return an index, may open it if it wasn't already opened.
|
||||
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
self.currently_updating_index.read().unwrap().as_ref()
|
||||
{
|
||||
if current_name == name {
|
||||
return Ok(current_index.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let uuid = self
|
||||
.index_mapping
|
||||
.get(rtxn, name)?
|
||||
@@ -474,4 +487,8 @@ impl IndexMapper {
|
||||
pub fn indexer_config(&self) -> &IndexerConfig {
|
||||
&self.indexer_config
|
||||
}
|
||||
|
||||
pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
|
||||
*self.currently_updating_index.write().unwrap() = index;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,16 +30,19 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
index_mapper,
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks: _,
|
||||
puffin_frame: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
auth_path: _,
|
||||
version_file_path: _,
|
||||
webhook_url: _,
|
||||
webhook_authorization_header: _,
|
||||
test_breakpoint_sdr: _,
|
||||
planned_failures: _,
|
||||
run_loop_iteration: _,
|
||||
currently_updating_index: _,
|
||||
embedders: _,
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
|
||||
@@ -34,6 +34,7 @@ pub type TaskId = u32;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader, Read};
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
@@ -45,6 +46,8 @@ use dump::{KindDump, TaskDump, UpdateFile};
|
||||
pub use error::Error;
|
||||
pub use features::RoFeatures;
|
||||
use file_store::FileStore;
|
||||
use flate2::bufread::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
@@ -52,7 +55,9 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
||||
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use puffin::FrameView;
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -169,8 +174,8 @@ impl ProcessingTasks {
|
||||
}
|
||||
|
||||
/// Set the processing tasks to an empty list
|
||||
fn stop_processing(&mut self) {
|
||||
self.processing = RoaringBitmap::new();
|
||||
fn stop_processing(&mut self) -> RoaringBitmap {
|
||||
std::mem::take(&mut self.processing)
|
||||
}
|
||||
|
||||
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
||||
@@ -240,6 +245,10 @@ pub struct IndexSchedulerOptions {
|
||||
pub snapshots_path: PathBuf,
|
||||
/// The path to the folder containing the dumps.
|
||||
pub dumps_path: PathBuf,
|
||||
/// The URL on which we must send the tasks statuses
|
||||
pub webhook_url: Option<String>,
|
||||
/// The value we will send into the Authorization HTTP header on the webhook URL
|
||||
pub webhook_authorization_header: Option<String>,
|
||||
/// The maximum size, in bytes, of the task index.
|
||||
pub task_db_size: usize,
|
||||
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
|
||||
@@ -258,6 +267,9 @@ pub struct IndexSchedulerOptions {
|
||||
/// The maximum number of tasks stored in the task queue before starting
|
||||
/// to auto schedule task deletions.
|
||||
pub max_number_of_tasks: usize,
|
||||
/// If the autobatcher is allowed to automatically batch tasks
|
||||
/// it will only batch this defined number of tasks at once.
|
||||
pub max_number_of_batched_tasks: usize,
|
||||
/// The experimental features enabled for this instance.
|
||||
pub instance_features: InstanceTogglableFeatures,
|
||||
}
|
||||
@@ -316,6 +328,14 @@ pub struct IndexScheduler {
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
|
||||
/// The maximum number of tasks that will be batched together.
|
||||
pub(crate) max_number_of_batched_tasks: usize,
|
||||
|
||||
/// The webhook url we should send tasks to after processing every batches.
|
||||
pub(crate) webhook_url: Option<String>,
|
||||
/// The Authorization header to send to the webhook URL.
|
||||
pub(crate) webhook_authorization_header: Option<String>,
|
||||
|
||||
/// A frame to output the indexation profiling files to disk.
|
||||
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
|
||||
|
||||
@@ -331,9 +351,7 @@ pub struct IndexScheduler {
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
/// A few types of long running batches of tasks that act on a single index set this field
|
||||
/// so that a handle to the index is available from other threads (search) in an optimized manner.
|
||||
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
|
||||
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
|
||||
|
||||
// ================= test
|
||||
// The next entry is dedicated to the tests.
|
||||
@@ -373,12 +391,15 @@ impl IndexScheduler {
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
|
||||
puffin_frame: self.puffin_frame.clone(),
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
currently_updating_index: self.currently_updating_index.clone(),
|
||||
webhook_url: self.webhook_url.clone(),
|
||||
webhook_authorization_header: self.webhook_authorization_header.clone(),
|
||||
embedders: self.embedders.clone(),
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
||||
#[cfg(test)]
|
||||
@@ -471,11 +492,14 @@ impl IndexScheduler {
|
||||
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
|
||||
dumps_path: options.dumps_path,
|
||||
snapshots_path: options.snapshots_path,
|
||||
auth_path: options.auth_path,
|
||||
version_file_path: options.version_file_path,
|
||||
currently_updating_index: Arc::new(RwLock::new(None)),
|
||||
webhook_url: options.webhook_url,
|
||||
webhook_authorization_header: options.webhook_authorization_header,
|
||||
embedders: Default::default(),
|
||||
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr,
|
||||
@@ -511,17 +535,17 @@ impl IndexScheduler {
|
||||
let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) {
|
||||
DEFAULT_BUDGET
|
||||
} else {
|
||||
log::debug!("determining budget with dichotomic search");
|
||||
tracing::debug!("determining budget with dichotomic search");
|
||||
utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| {
|
||||
Self::is_good_heed(tasks_path, map_size)
|
||||
})
|
||||
};
|
||||
|
||||
log::debug!("memmap budget: {budget}B");
|
||||
tracing::debug!("memmap budget: {budget}B");
|
||||
let mut budget = budget / 2;
|
||||
if task_db_size > (budget / 2) {
|
||||
task_db_size = clamp_to_page_size(budget * 2 / 5);
|
||||
log::debug!(
|
||||
tracing::debug!(
|
||||
"Decreasing max size of task DB to {task_db_size}B due to constrained memory space"
|
||||
);
|
||||
}
|
||||
@@ -531,13 +555,13 @@ impl IndexScheduler {
|
||||
let budget = budget;
|
||||
let task_db_size = task_db_size;
|
||||
|
||||
log::debug!("index budget: {budget}B");
|
||||
tracing::debug!("index budget: {budget}B");
|
||||
let mut index_count = budget / base_map_size;
|
||||
if index_count < 2 {
|
||||
// take a bit less than half than the budget to make sure we can always afford to open an index
|
||||
let map_size = (budget * 2) / 5;
|
||||
// single index of max budget
|
||||
log::debug!("1 index of {map_size}B can be opened simultaneously.");
|
||||
tracing::debug!("1 index of {map_size}B can be opened simultaneously.");
|
||||
return IndexBudget { map_size, index_count: 1, task_db_size };
|
||||
}
|
||||
// give us some space for an additional index when the cache is already full
|
||||
@@ -546,7 +570,7 @@ impl IndexScheduler {
|
||||
if index_count > max_index_count {
|
||||
index_count = max_index_count;
|
||||
}
|
||||
log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
|
||||
tracing::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
|
||||
IndexBudget { map_size: base_map_size, index_count, task_db_size }
|
||||
}
|
||||
|
||||
@@ -593,7 +617,7 @@ impl IndexScheduler {
|
||||
Ok(TickOutcome::TickAgain(_)) => (),
|
||||
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
|
||||
Err(e) => {
|
||||
log::error!("{e}");
|
||||
tracing::error!("{e}");
|
||||
// Wait one second when an irrecoverable error occurs.
|
||||
if !e.is_recoverable() {
|
||||
std::thread::sleep(Duration::from_secs(1));
|
||||
@@ -610,15 +634,15 @@ impl IndexScheduler {
|
||||
let mut file = match File::create(format!("{}.puffin", now)) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
log::error!("{e}");
|
||||
tracing::error!("{e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(e) = frame_view.save_to_writer(&mut file) {
|
||||
log::error!("{e}");
|
||||
tracing::error!("{e}");
|
||||
}
|
||||
if let Err(e) = file.sync_all() {
|
||||
log::error!("{e}");
|
||||
tracing::error!("{e}");
|
||||
}
|
||||
// We erase this frame view as it is no more useful. We want to
|
||||
// measure the new frames now that we exported the previous ones.
|
||||
@@ -658,13 +682,6 @@ impl IndexScheduler {
|
||||
/// If you need to fetch information from or perform an action on all indexes,
|
||||
/// see the `try_for_each_index` function.
|
||||
pub fn index(&self, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
self.currently_updating_index.read().unwrap().as_ref()
|
||||
{
|
||||
if current_name == name {
|
||||
return Ok(current_index.clone());
|
||||
}
|
||||
}
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, name)
|
||||
}
|
||||
@@ -1145,7 +1162,7 @@ impl IndexScheduler {
|
||||
};
|
||||
|
||||
// Reset the currently updating index to relinquish the index handle
|
||||
*self.currently_updating_index.write().unwrap() = None;
|
||||
self.index_mapper.set_currently_updating_index(None);
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
||||
@@ -1173,10 +1190,10 @@ impl IndexScheduler {
|
||||
self.update_task(&mut wtxn, &task)
|
||||
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
|
||||
if let Err(e) = self.delete_persisted_task_data(&task) {
|
||||
log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
|
||||
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
|
||||
}
|
||||
}
|
||||
log::info!("A batch of tasks was successfully completed.");
|
||||
tracing::info!("A batch of tasks was successfully completed.");
|
||||
}
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli(milli::Error::InternalError(
|
||||
@@ -1230,7 +1247,7 @@ impl IndexScheduler {
|
||||
self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;
|
||||
|
||||
if let Err(e) = self.delete_persisted_task_data(&task) {
|
||||
log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
|
||||
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
|
||||
}
|
||||
self.update_task(&mut wtxn, &task)
|
||||
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
|
||||
@@ -1238,19 +1255,99 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
let processed = self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
// We shouldn't crash the tick function if we can't send data to the webhook.
|
||||
let _ = self.notify_webhook(&processed);
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::AfterProcessing);
|
||||
|
||||
Ok(TickOutcome::TickAgain(processed_tasks))
|
||||
}
|
||||
|
||||
/// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
|
||||
fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
|
||||
if let Some(ref url) = self.webhook_url {
|
||||
struct TaskReader<'a, 'b> {
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
tasks: &'b mut roaring::bitmap::Iter<'b>,
|
||||
buffer: Vec<u8>,
|
||||
written: usize,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Read for TaskReader<'a, 'b> {
|
||||
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
if self.buffer.is_empty() {
|
||||
match self.tasks.next() {
|
||||
None => return Ok(0),
|
||||
Some(task_id) => {
|
||||
let task = self
|
||||
.index_scheduler
|
||||
.get_task(self.rtxn, task_id)
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
|
||||
.ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
Error::CorruptedTaskQueue,
|
||||
)
|
||||
})?;
|
||||
|
||||
serde_json::to_writer(
|
||||
&mut self.buffer,
|
||||
&TaskView::from_task(&task),
|
||||
)?;
|
||||
self.buffer.push(b'\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut to_write = &self.buffer[self.written..];
|
||||
let wrote = io::copy(&mut to_write, &mut buf)?;
|
||||
self.written += wrote as usize;
|
||||
|
||||
// we wrote everything and must refresh our buffer on the next call
|
||||
if self.written == self.buffer.len() {
|
||||
self.written = 0;
|
||||
self.buffer.clear();
|
||||
}
|
||||
|
||||
Ok(wrote as usize)
|
||||
}
|
||||
}
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
let task_reader = TaskReader {
|
||||
rtxn: &rtxn,
|
||||
index_scheduler: self,
|
||||
tasks: &mut updated.into_iter(),
|
||||
buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
|
||||
written: 0,
|
||||
};
|
||||
|
||||
// let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
|
||||
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
|
||||
let request = ureq::post(url).set("Content-Encoding", "gzip");
|
||||
let request = match &self.webhook_authorization_header {
|
||||
Some(header) => request.set("Authorization", header),
|
||||
None => request,
|
||||
};
|
||||
|
||||
if let Err(e) = request.send(reader) {
|
||||
tracing::error!("While sending data to the webhook: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register a task to cleanup the task queue if needed
|
||||
fn cleanup_task_queue(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
@@ -1270,12 +1367,12 @@ impl IndexScheduler {
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
if to_delete.len() < 2 {
|
||||
log::warn!("The task queue is almost full, but no task can be deleted yet.");
|
||||
tracing::warn!("The task queue is almost full, but no task can be deleted yet.");
|
||||
// the only thing we can do is hope that the user tasks are going to finish
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
log::info!(
|
||||
tracing::info!(
|
||||
"The task queue is almost full. Deleting the oldest {} finished tasks.",
|
||||
to_delete.len()
|
||||
);
|
||||
@@ -1325,6 +1422,40 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: consider using a type alias or a struct embedder/template
|
||||
pub fn embedders(
|
||||
&self,
|
||||
embedding_configs: Vec<(String, milli::vector::EmbeddingConfig)>,
|
||||
) -> Result<EmbeddingConfigs> {
|
||||
let res: Result<_> = embedding_configs
|
||||
.into_iter()
|
||||
.map(|(name, milli::vector::EmbeddingConfig { embedder_options, prompt })| {
|
||||
let prompt =
|
||||
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
|
||||
// optimistically return existing embedder
|
||||
{
|
||||
let embedders = self.embedders.read().unwrap();
|
||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||
return Ok((name, (embedder.clone(), prompt)));
|
||||
}
|
||||
}
|
||||
|
||||
// add missing embedder
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(embedder_options.clone())
|
||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||
.map_err(meilisearch_types::milli::Error::from)?,
|
||||
);
|
||||
{
|
||||
let mut embedders = self.embedders.write().unwrap();
|
||||
embedders.insert(embedder_options, embedder.clone());
|
||||
}
|
||||
Ok((name, (embedder, prompt)))
|
||||
})
|
||||
.collect();
|
||||
res.map(EmbeddingConfigs::new)
|
||||
}
|
||||
|
||||
/// Blocks the thread until the test handle asks to progress to/through this breakpoint.
|
||||
///
|
||||
/// Two messages are sent through the channel for each breakpoint.
|
||||
@@ -1630,6 +1761,8 @@ mod tests {
|
||||
indexes_path: tempdir.path().join("indexes"),
|
||||
snapshots_path: tempdir.path().join("snapshots"),
|
||||
dumps_path: tempdir.path().join("dumps"),
|
||||
webhook_url: None,
|
||||
webhook_authorization_header: None,
|
||||
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||
enable_mdb_writemap: false,
|
||||
@@ -1638,6 +1771,7 @@ mod tests {
|
||||
indexer_config,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: usize::MAX,
|
||||
instance_features: Default::default(),
|
||||
};
|
||||
configuration(&mut options);
|
||||
@@ -2110,10 +2244,7 @@ mod tests {
|
||||
.unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
for _ in 0..2 {
|
||||
handle.advance_one_successful_batch();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
}
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed");
|
||||
}
|
||||
|
||||
@@ -34,12 +34,10 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} }
|
||||
[timestamp] [3,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [2,3,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
[timestamp] [2,3,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000001
|
||||
|
||||
@@ -11,6 +11,6 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
insta = { version = "^1.29.0", features = ["json", "redactions"] }
|
||||
insta = { version = "^1.34.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
once_cell = "1.17"
|
||||
once_cell = "1.19"
|
||||
|
||||
@@ -11,16 +11,16 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.21.0"
|
||||
enum-iterator = "1.4.0"
|
||||
base64 = "0.21.7"
|
||||
enum-iterator = "1.5.0"
|
||||
hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
sha2 = "0.10.6"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
@@ -11,31 +11,31 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.3.1", default-features = false }
|
||||
anyhow = "1.0.70"
|
||||
actix-web = { version = "4.5.1", default-features = false }
|
||||
anyhow = "1.0.79"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.2.1"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"] }
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
enum-iterator = "1.4.0"
|
||||
csv = "1.3.0"
|
||||
deserr = { version = "0.6.1", features = ["actix-web"] }
|
||||
either = { version = "1.9.0", features = ["serde"] }
|
||||
enum-iterator = "1.5.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.25"
|
||||
flate2 = "1.0.28"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.7.1"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = "1.0.95"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = "1.27"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
serde_json = "1.0.111"
|
||||
tar = "0.4.40"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = "1.35"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.29.0"
|
||||
insta = "1.34.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
[features]
|
||||
|
||||
@@ -188,3 +188,4 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError);
|
||||
merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
||||
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
||||
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
||||
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
||||
|
||||
@@ -222,6 +222,8 @@ InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidEmbedder , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidHybridQuery , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -233,6 +235,7 @@ InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -256,6 +259,7 @@ InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -295,15 +299,20 @@ MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||
MissingIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||
MissingMasterKey , Auth , UNAUTHORIZED ;
|
||||
MissingPayload , InvalidRequest , BAD_REQUEST ;
|
||||
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
|
||||
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
|
||||
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
|
||||
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
|
||||
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
|
||||
TaskNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
|
||||
TooManyVectors , InvalidRequest , BAD_REQUEST ;
|
||||
UnretrievableDocument , Internal , BAD_REQUEST ;
|
||||
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE
|
||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
|
||||
// Experimental features
|
||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST
|
||||
}
|
||||
|
||||
impl ErrorCode for JoinError {
|
||||
@@ -336,6 +345,16 @@ impl ErrorCode for milli::Error {
|
||||
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
|
||||
Code::InvalidDocumentId
|
||||
}
|
||||
UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
|
||||
UserError::InvalidFieldForSource { .. }
|
||||
| UserError::MissingFieldForSource { .. }
|
||||
| UserError::InvalidOpenAiModel { .. }
|
||||
| UserError::InvalidOpenAiModelDimensions { .. }
|
||||
| UserError::InvalidOpenAiModelDimensionsMax { .. }
|
||||
| UserError::InvalidSettingsDimensions { .. }
|
||||
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||
UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
|
||||
Code::IndexPrimaryKeyMultipleCandidatesFound
|
||||
@@ -353,11 +372,15 @@ impl ErrorCode for milli::Error {
|
||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
|
||||
UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
|
||||
UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
|
||||
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
||||
UserError::SortError(_) => Code::InvalidSearchSort,
|
||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||
Code::InvalidSettingsTypoTolerance
|
||||
}
|
||||
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
|
||||
UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -445,6 +468,15 @@ impl fmt::Display for DeserrParseIntError {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
|
||||
@@ -3,14 +3,14 @@ use serde::{Deserialize, Serialize};
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase", default)]
|
||||
pub struct RuntimeTogglableFeatures {
|
||||
pub score_details: bool,
|
||||
pub vector_store: bool,
|
||||
pub metrics: bool,
|
||||
pub logs_route: bool,
|
||||
pub export_puffin_reports: bool,
|
||||
pub proximity_precision: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
pub struct InstanceTogglableFeatures {
|
||||
pub metrics: bool,
|
||||
pub logs_route: bool,
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ pub mod index_uid_pattern;
|
||||
pub mod keys;
|
||||
pub mod settings;
|
||||
pub mod star_or;
|
||||
pub mod task_view;
|
||||
pub mod tasks;
|
||||
pub mod versioning;
|
||||
pub use milli::{heed, Index};
|
||||
|
||||
@@ -199,6 +199,10 @@ pub struct Settings<T> {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsPagination>)]
|
||||
pub pagination: Setting<PaginationSettings>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
|
||||
pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
|
||||
|
||||
#[serde(skip)]
|
||||
#[deserr(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
@@ -222,6 +226,7 @@ impl Settings<Checked> {
|
||||
typo_tolerance: Setting::Reset,
|
||||
faceting: Setting::Reset,
|
||||
pagination: Setting::Reset,
|
||||
embedders: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@@ -243,6 +248,7 @@ impl Settings<Checked> {
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
embedders,
|
||||
..
|
||||
} = self;
|
||||
|
||||
@@ -262,6 +268,7 @@ impl Settings<Checked> {
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
embedders,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@@ -307,9 +314,25 @@ impl Settings<Unchecked> {
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
faceting: self.faceting,
|
||||
pagination: self.pagination,
|
||||
embedders: self.embedders,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate(self) -> Result<Self, milli::Error> {
|
||||
self.validate_embedding_settings()
|
||||
}
|
||||
|
||||
fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
|
||||
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
|
||||
for (name, config) in configs.iter_mut() {
|
||||
let config_to_check = std::mem::take(config);
|
||||
let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
|
||||
*config = checked_config
|
||||
}
|
||||
self.embedders = Setting::Set(configs);
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -490,6 +513,12 @@ pub fn apply_settings_to_builder(
|
||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.embedders.clone() {
|
||||
Setting::Set(value) => builder.set_embedder_settings(value),
|
||||
Setting::Reset => builder.reset_embedder_settings(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(
|
||||
@@ -571,6 +600,13 @@ pub fn settings(
|
||||
),
|
||||
};
|
||||
|
||||
let embedders: BTreeMap<_, _> = index
|
||||
.embedding_configs(rtxn)?
|
||||
.into_iter()
|
||||
.map(|(name, config)| (name, Setting::Set(config.into())))
|
||||
.collect();
|
||||
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
|
||||
|
||||
Ok(Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
@@ -591,14 +627,12 @@ pub fn settings(
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
proximity_precision: match proximity_precision {
|
||||
Some(precision) => Setting::Set(precision),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()),
|
||||
synonyms: Setting::Set(synonyms),
|
||||
typo_tolerance: Setting::Set(typo_tolerance),
|
||||
faceting: Setting::Set(faceting),
|
||||
pagination: Setting::Set(pagination),
|
||||
embedders,
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
@@ -699,27 +733,28 @@ impl From<RankingRuleView> for Criterion {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
|
||||
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub enum ProximityPrecisionView {
|
||||
WordScale,
|
||||
AttributeScale,
|
||||
#[default]
|
||||
ByWord,
|
||||
ByAttribute,
|
||||
}
|
||||
|
||||
impl From<ProximityPrecision> for ProximityPrecisionView {
|
||||
fn from(value: ProximityPrecision) -> Self {
|
||||
match value {
|
||||
ProximityPrecision::WordScale => ProximityPrecisionView::WordScale,
|
||||
ProximityPrecision::AttributeScale => ProximityPrecisionView::AttributeScale,
|
||||
ProximityPrecision::ByWord => ProximityPrecisionView::ByWord,
|
||||
ProximityPrecision::ByAttribute => ProximityPrecisionView::ByAttribute,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl From<ProximityPrecisionView> for ProximityPrecision {
|
||||
fn from(value: ProximityPrecisionView) -> Self {
|
||||
match value {
|
||||
ProximityPrecisionView::WordScale => ProximityPrecision::WordScale,
|
||||
ProximityPrecisionView::AttributeScale => ProximityPrecision::AttributeScale,
|
||||
ProximityPrecisionView::ByWord => ProximityPrecision::ByWord,
|
||||
ProximityPrecisionView::ByAttribute => ProximityPrecision::ByAttribute,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -747,6 +782,7 @@ pub(crate) mod test {
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::NotSet,
|
||||
pagination: Setting::NotSet,
|
||||
embedders: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
@@ -772,6 +808,7 @@ pub(crate) mod test {
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::NotSet,
|
||||
pagination: Setting::NotSet,
|
||||
embedders: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
|
||||
139
meilisearch-types/src/task_view.rs
Normal file
139
meilisearch-types/src/task_view.rs
Normal file
@@ -0,0 +1,139 @@
|
||||
use serde::Serialize;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
pub uid: TaskId,
|
||||
#[serde(default)]
|
||||
pub index_uid: Option<String>,
|
||||
pub status: Status,
|
||||
#[serde(rename = "type")]
|
||||
pub kind: Kind,
|
||||
pub canceled_by: Option<TaskId>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub details: Option<DetailsView>,
|
||||
pub error: Option<ResponseError>,
|
||||
#[serde(serialize_with = "serialize_duration", default)]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub started_at: Option<OffsetDateTime>,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
pub fn from_task(task: &Task) -> TaskView {
|
||||
TaskView {
|
||||
uid: task.uid,
|
||||
index_uid: task.index_uid().map(ToOwned::to_owned),
|
||||
status: task.status,
|
||||
kind: task.kind.as_kind(),
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details.clone().map(DetailsView::from),
|
||||
error: task.error.clone(),
|
||||
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DetailsView {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub received_documents: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub indexed_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub primary_key: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub provided_ids: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub matched_tasks: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub canceled_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub original_filter: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub dump_uid: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(flatten)]
|
||||
pub settings: Option<Box<Settings<Unchecked>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub swaps: Option<Vec<IndexSwap>>,
|
||||
}
|
||||
|
||||
impl From<Details> for DetailsView {
|
||||
fn from(details: Details) -> Self {
|
||||
match details {
|
||||
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
|
||||
DetailsView {
|
||||
received_documents: Some(received_documents),
|
||||
indexed_documents: Some(indexed_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::SettingsUpdate { settings } => {
|
||||
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexInfo { primary_key } => {
|
||||
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
|
||||
}
|
||||
Details::DocumentDeletion {
|
||||
provided_ids: received_document_ids,
|
||||
deleted_documents,
|
||||
} => DetailsView {
|
||||
provided_ids: Some(received_document_ids),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
original_filter: Some(None),
|
||||
..DetailsView::default()
|
||||
},
|
||||
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
|
||||
DetailsView {
|
||||
provided_ids: Some(0),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::ClearAll { deleted_documents } => {
|
||||
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
|
||||
}
|
||||
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
canceled_tasks: Some(canceled_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
deleted_tasks: Some(deleted_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::Dump { dump_uid } => {
|
||||
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexSwap { swaps } => {
|
||||
DetailsView { swaps: Some(swaps), ..Default::default() }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,123 +13,126 @@ license.workspace = true
|
||||
default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.6.4"
|
||||
actix-http = { version = "3.3.1", default-features = false, features = [
|
||||
actix-cors = "0.7.0"
|
||||
actix-http = { version = "3.6.0", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls",
|
||||
"rustls-0_21",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.3.1", default-features = false, features = [
|
||||
actix-web = { version = "4.5.1", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls",
|
||||
"rustls-0_21",
|
||||
] }
|
||||
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
|
||||
anyhow = { version = "1.0.70", features = ["backtrace"] }
|
||||
anyhow = { version = "1.0.79", features = ["backtrace"] }
|
||||
async-stream = "0.3.5"
|
||||
async-trait = "0.1.68"
|
||||
bstr = "1.4.0"
|
||||
async-trait = "0.1.77"
|
||||
bstr = "1.9.0"
|
||||
byte-unit = { version = "4.0.19", default-features = false, features = [
|
||||
"std",
|
||||
"serde",
|
||||
] }
|
||||
bytes = "1.4.0"
|
||||
clap = { version = "4.2.1", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"] }
|
||||
bytes = "1.5.0"
|
||||
clap = { version = "4.4.17", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.11"
|
||||
deserr = { version = "0.6.1", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.8.1"
|
||||
env_logger = "0.10.0"
|
||||
either = "1.9.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.25"
|
||||
flate2 = "1.0.28"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.28"
|
||||
futures-util = "0.3.28"
|
||||
http = "0.2.9"
|
||||
futures = "0.3.30"
|
||||
futures-util = "0.3.30"
|
||||
http = "0.2.11"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "2.0.0", features = ["serde"] }
|
||||
is-terminal = "0.4.8"
|
||||
indexmap = { version = "2.1.0", features = ["serde"] }
|
||||
is-terminal = "0.4.10"
|
||||
itertools = "0.11.0"
|
||||
jsonwebtoken = "8.3.0"
|
||||
jsonwebtoken = "9.2.0"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
mimalloc = { version = "0.1.39", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.15.0"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.17.1"
|
||||
ordered-float = "3.7.0"
|
||||
num_cpus = "1.16.0"
|
||||
obkv = "0.2.1"
|
||||
once_cell = "1.19.0"
|
||||
ordered-float = "4.2.0"
|
||||
parking_lot = "0.12.1"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
pin-project-lite = "0.2.13"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
reqwest = { version = "0.11.16", features = [
|
||||
rayon = "1.8.0"
|
||||
regex = "1.10.2"
|
||||
reqwest = { version = "0.11.23", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
rustls = "0.20.8"
|
||||
rustls = "0.21.6"
|
||||
rustls-pemfile = "1.0.2"
|
||||
segment = { version = "0.2.2", optional = true }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
sha2 = "0.10.6"
|
||||
siphasher = "0.3.10"
|
||||
slice-group-by = "0.3.0"
|
||||
segment = { version = "0.2.3", optional = true }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
siphasher = "1.0.0"
|
||||
slice-group-by = "0.3.1"
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
sysinfo = "0.29.7"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = [
|
||||
sysinfo = "0.30.5"
|
||||
tar = "0.4.40"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.27.0", features = ["full"] }
|
||||
tokio-stream = "0.1.12"
|
||||
toml = "0.7.3"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
walkdir = "2.3.3"
|
||||
tokio = { version = "1.35.1", features = ["full"] }
|
||||
tokio-stream = "0.1.14"
|
||||
toml = "0.8.8"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
walkdir = "2.4.0"
|
||||
yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.2.0"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.0", features = ["serde"] }
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = "0.3.18"
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.9"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.8.0"
|
||||
actix-rt = "2.9.0"
|
||||
assert-json-diff = "2.0.2"
|
||||
brotli = "3.3.4"
|
||||
insta = "1.29.0"
|
||||
manifest-dir-macros = "0.1.16"
|
||||
brotli = "3.4.0"
|
||||
insta = "1.34.0"
|
||||
manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.3"
|
||||
urlencoding = "2.1.2"
|
||||
temp-env = "0.3.6"
|
||||
urlencoding = "2.1.3"
|
||||
yaup = "0.2.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.70", optional = true }
|
||||
cargo_toml = { version = "0.15.2", optional = true }
|
||||
anyhow = { version = "1.0.79", optional = true }
|
||||
cargo_toml = { version = "0.18.0", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.11.16", features = [
|
||||
reqwest = { version = "0.11.23", features = [
|
||||
"blocking",
|
||||
"rustls-tls",
|
||||
], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
tempfile = { version = "3.5.0", optional = true }
|
||||
tempfile = { version = "3.9.0", optional = true }
|
||||
vergen = { version = "7.5.1", default-features = false, features = ["git"] }
|
||||
zip = { version = "0.6.4", optional = true }
|
||||
zip = { version = "0.6.6", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
@@ -153,5 +156,5 @@ greek = ["meilisearch-types/greek"]
|
||||
khmer = ["meilisearch-types/khmer"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
|
||||
sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
|
||||
sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff"
|
||||
|
||||
@@ -18,7 +18,7 @@ use segment::message::{Identify, Track, User};
|
||||
use segment::{AutoBatcher, Batcher, HttpClient};
|
||||
use serde::Serialize;
|
||||
use serde_json::{json, Value};
|
||||
use sysinfo::{DiskExt, System, SystemExt};
|
||||
use sysinfo::{Disks, System};
|
||||
use time::OffsetDateTime;
|
||||
use tokio::select;
|
||||
use tokio::sync::mpsc::{self, Receiver, Sender};
|
||||
@@ -36,7 +36,7 @@ use crate::routes::{create_all_stats, Stats};
|
||||
use crate::search::{
|
||||
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::Opt;
|
||||
|
||||
@@ -250,7 +250,9 @@ impl super::Analytics for SegmentAnalytics {
|
||||
struct Infos {
|
||||
env: String,
|
||||
experimental_enable_metrics: bool,
|
||||
experimental_enable_logs_route: bool,
|
||||
experimental_reduce_indexing_memory_usage: bool,
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
dump_dir: bool,
|
||||
@@ -263,6 +265,8 @@ struct Infos {
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
http_addr: bool,
|
||||
http_payload_size_limit: Byte,
|
||||
task_queue_webhook: bool,
|
||||
task_webhook_authorization_header: bool,
|
||||
log_level: String,
|
||||
max_indexing_memory: MaxMemory,
|
||||
max_indexing_threads: MaxThreads,
|
||||
@@ -284,10 +288,14 @@ impl From<Opt> for Infos {
|
||||
let Opt {
|
||||
db_path,
|
||||
experimental_enable_metrics,
|
||||
experimental_enable_logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
http_addr,
|
||||
master_key: _,
|
||||
env,
|
||||
task_webhook_url,
|
||||
task_webhook_authorization_header,
|
||||
max_index_size: _,
|
||||
max_task_db_size: _,
|
||||
http_payload_size_limit,
|
||||
@@ -327,6 +335,7 @@ impl From<Opt> for Infos {
|
||||
Self {
|
||||
env,
|
||||
experimental_enable_metrics,
|
||||
experimental_enable_logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
@@ -340,6 +349,9 @@ impl From<Opt> for Infos {
|
||||
ignore_snapshot_if_db_exists,
|
||||
http_addr: http_addr != default_http_addr(),
|
||||
http_payload_size_limit,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
task_queue_webhook: task_webhook_url.is_some(),
|
||||
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
|
||||
log_level: log_level.to_string(),
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
@@ -377,16 +389,17 @@ impl Segment {
|
||||
fn compute_traits(opt: &Opt, stats: Stats) -> Value {
|
||||
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
|
||||
static SYSTEM: Lazy<Value> = Lazy::new(|| {
|
||||
let disks = Disks::new_with_refreshed_list();
|
||||
let mut sys = System::new_all();
|
||||
sys.refresh_all();
|
||||
let kernel_version =
|
||||
sys.kernel_version().and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
|
||||
let kernel_version = System::kernel_version()
|
||||
.and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
|
||||
json!({
|
||||
"distribution": sys.name(),
|
||||
"distribution": System::name(),
|
||||
"kernel_version": kernel_version,
|
||||
"cores": sys.cpus().len(),
|
||||
"ram_size": sys.total_memory(),
|
||||
"disk_size": sys.disks().iter().map(|disk| disk.total_space()).max(),
|
||||
"disk_size": disks.iter().map(|disk| disk.total_space()).max(),
|
||||
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
|
||||
})
|
||||
});
|
||||
@@ -583,6 +596,11 @@ pub struct SearchAggregator {
|
||||
// vector
|
||||
// The maximum number of floats in a vector request
|
||||
max_vector_size: usize,
|
||||
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
|
||||
semantic_ratio: bool,
|
||||
// Whether a non-default embedder was specified
|
||||
embedder: bool,
|
||||
hybrid: bool,
|
||||
|
||||
// every time a search is done, we increment the counter linked to the used settings
|
||||
matching_strategy: HashMap<String, usize>,
|
||||
@@ -636,6 +654,7 @@ impl SearchAggregator {
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@@ -709,6 +728,12 @@ impl SearchAggregator {
|
||||
ret.show_ranking_score = *show_ranking_score;
|
||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||
|
||||
if let Some(hybrid) = hybrid {
|
||||
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
||||
ret.embedder = hybrid.embedder.is_some();
|
||||
ret.hybrid = true;
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
@@ -762,6 +787,9 @@ impl SearchAggregator {
|
||||
facets_total_number_of_facets,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
embedder,
|
||||
hybrid,
|
||||
} = other;
|
||||
|
||||
if self.timestamp.is_none() {
|
||||
@@ -807,6 +835,9 @@ impl SearchAggregator {
|
||||
|
||||
// vector
|
||||
self.max_vector_size = self.max_vector_size.max(max_vector_size);
|
||||
self.semantic_ratio |= semantic_ratio;
|
||||
self.hybrid |= hybrid;
|
||||
self.embedder |= embedder;
|
||||
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(max_limit);
|
||||
@@ -875,6 +906,9 @@ impl SearchAggregator {
|
||||
facets_total_number_of_facets,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
embedder,
|
||||
hybrid,
|
||||
} = self;
|
||||
|
||||
if total_received == 0 {
|
||||
@@ -914,6 +948,11 @@ impl SearchAggregator {
|
||||
"vector": {
|
||||
"max_vector_size": max_vector_size,
|
||||
},
|
||||
"hybrid": {
|
||||
"enabled": hybrid,
|
||||
"semantic_ratio": semantic_ratio,
|
||||
"embedder": embedder,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": max_limit,
|
||||
"max_offset": max_offset,
|
||||
@@ -1009,6 +1048,7 @@ impl MultiSearchAggregator {
|
||||
crop_marker: _,
|
||||
matching_strategy: _,
|
||||
attributes_to_search_on: _,
|
||||
hybrid: _,
|
||||
} = query;
|
||||
|
||||
index_uid.as_str()
|
||||
@@ -1155,6 +1195,7 @@ impl FacetSearchAggregator {
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@@ -1168,7 +1209,8 @@ impl FacetSearchAggregator {
|
||||
|| vector.is_some()
|
||||
|| filter.is_some()
|
||||
|| *matching_strategy != MatchingStrategy::default()
|
||||
|| attributes_to_search_on.is_some();
|
||||
|| attributes_to_search_on.is_some()
|
||||
|| hybrid.is_some();
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
@@ -12,6 +12,8 @@ pub enum MeilisearchHttpError {
|
||||
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
|
||||
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
|
||||
MissingContentType(Vec<String>),
|
||||
#[error("The `/logs/stream` route is currently in use by someone else.")]
|
||||
AlreadyUsedLogRoute,
|
||||
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
|
||||
CsvDelimiterWithWrongContentType(String),
|
||||
#[error(
|
||||
@@ -51,12 +53,15 @@ pub enum MeilisearchHttpError {
|
||||
DocumentFormat(#[from] DocumentFormatError),
|
||||
#[error(transparent)]
|
||||
Join(#[from] JoinError),
|
||||
#[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
|
||||
MissingSearchHybrid,
|
||||
}
|
||||
|
||||
impl ErrorCode for MeilisearchHttpError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
|
||||
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
|
||||
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
|
||||
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
|
||||
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
|
||||
@@ -74,6 +79,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,6 @@ use error::PayloadError;
|
||||
use extractors::payload::PayloadConfig;
|
||||
use http::header::CONTENT_TYPE;
|
||||
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
||||
use log::error;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
|
||||
@@ -39,6 +38,8 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
|
||||
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
|
||||
pub use option::Opt;
|
||||
use option::ScheduleSnapshot;
|
||||
use tracing::{error, info_span};
|
||||
use tracing_subscriber::filter::Targets;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
@@ -86,10 +87,21 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
|
||||
pub type LogRouteHandle =
|
||||
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
|
||||
|
||||
pub type LogRouteType = tracing_subscriber::filter::Filtered<
|
||||
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
|
||||
Targets,
|
||||
tracing_subscriber::Registry,
|
||||
>;
|
||||
|
||||
pub fn create_app(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
opt: Opt,
|
||||
logs: LogRouteHandle,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
enable_dashboard: bool,
|
||||
) -> actix_web::App<
|
||||
@@ -108,6 +120,7 @@ pub fn create_app(
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
&opt,
|
||||
logs,
|
||||
analytics.clone(),
|
||||
)
|
||||
})
|
||||
@@ -123,11 +136,49 @@ pub fn create_app(
|
||||
.allow_any_method()
|
||||
.max_age(86_400), // 24h
|
||||
)
|
||||
.wrap(actix_web::middleware::Logger::default())
|
||||
.wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new())
|
||||
.wrap(actix_web::middleware::Compress::default())
|
||||
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
|
||||
}
|
||||
|
||||
struct AwebTracingLogger;
|
||||
|
||||
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
|
||||
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
|
||||
use tracing::field::Empty;
|
||||
|
||||
let conn_info = request.connection_info();
|
||||
let headers = request.headers();
|
||||
let user_agent = headers
|
||||
.get(http::header::USER_AGENT)
|
||||
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
|
||||
.unwrap_or_default();
|
||||
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
|
||||
}
|
||||
|
||||
fn on_request_end<B: MessageBody>(
|
||||
span: tracing::Span,
|
||||
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
|
||||
) {
|
||||
match &outcome {
|
||||
Ok(response) => {
|
||||
let code: i32 = response.response().status().as_u16().into();
|
||||
span.record("status_code", code);
|
||||
|
||||
if let Some(error) = response.response().error() {
|
||||
// use the status code already constructed for the outgoing HTTP response
|
||||
span.record("error", &tracing::field::display(error.as_response_error()));
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
let code: i32 = error.error_response().status().as_u16().into();
|
||||
span.record("status_code", code);
|
||||
span.record("error", &tracing::field::display(error.as_response_error()));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
enum OnFailure {
|
||||
RemoveDb,
|
||||
KeepDb,
|
||||
@@ -228,12 +279,15 @@ fn open_or_create_database_unchecked(
|
||||
indexes_path: opt.db_path.join("indexes"),
|
||||
snapshots_path: opt.snapshot_dir.clone(),
|
||||
dumps_path: opt.dump_dir.clone(),
|
||||
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
|
||||
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
|
||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features,
|
||||
@@ -277,15 +331,15 @@ fn import_dump(
|
||||
let mut dump_reader = dump::DumpReader::open(reader)?;
|
||||
|
||||
if let Some(date) = dump_reader.date() {
|
||||
log::info!(
|
||||
"Importing a dump of meilisearch `{:?}` from the {}",
|
||||
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
date
|
||||
tracing::info!(
|
||||
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
%date,
|
||||
"Importing a dump of meilisearch"
|
||||
);
|
||||
} else {
|
||||
log::info!(
|
||||
"Importing a dump of meilisearch `{:?}`",
|
||||
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
tracing::info!(
|
||||
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
"Importing a dump of meilisearch",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -319,7 +373,7 @@ fn import_dump(
|
||||
for index_reader in dump_reader.indexes()? {
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
log::info!("Importing index `{}`.", metadata.uid);
|
||||
tracing::info!("Importing index `{}`.", metadata.uid);
|
||||
|
||||
let date = Some((metadata.created_at, metadata.updated_at));
|
||||
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
|
||||
@@ -333,14 +387,15 @@ fn import_dump(
|
||||
}
|
||||
|
||||
// 4.2 Import the settings.
|
||||
log::info!("Importing the settings.");
|
||||
tracing::info!("Importing the settings.");
|
||||
let settings = index_reader.settings()?;
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
|
||||
builder
|
||||
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
|
||||
|
||||
// 4.3 Import the documents.
|
||||
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
log::info!("Importing the documents.");
|
||||
tracing::info!("Importing the documents.");
|
||||
let file = tempfile::tempfile()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||
for document in index_reader.documents()? {
|
||||
@@ -362,15 +417,16 @@ fn import_dump(
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
},
|
||||
|indexing_step| log::trace!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
||||
|| false,
|
||||
)?;
|
||||
|
||||
let (builder, user_result) = builder.add_documents(reader)?;
|
||||
log::info!("{} documents found.", user_result?);
|
||||
let user_result = user_result?;
|
||||
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
|
||||
builder.execute()?;
|
||||
wtxn.commit()?;
|
||||
log::info!("All documents successfully imported.");
|
||||
tracing::info!("All documents successfully imported.");
|
||||
}
|
||||
|
||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||
@@ -388,6 +444,7 @@ pub fn configure_data(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth: Data<AuthController>,
|
||||
opt: &Opt,
|
||||
logs: LogRouteHandle,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) {
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
|
||||
@@ -395,6 +452,7 @@ pub fn configure_data(
|
||||
.app_data(index_scheduler)
|
||||
.app_data(auth)
|
||||
.app_data(web::Data::from(analytics))
|
||||
.app_data(web::Data::new(logs))
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
.limit(http_payload_size_limit)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::env;
|
||||
use std::io::{stderr, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_web::http::KeepAlive;
|
||||
@@ -9,33 +10,60 @@ use actix_web::HttpServer;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use is_terminal::IsTerminal;
|
||||
use meilisearch::analytics::Analytics;
|
||||
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
|
||||
use meilisearch::{
|
||||
analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType, Opt,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use mimalloc::MiMalloc;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt as _;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
static ALLOC: MiMalloc = MiMalloc;
|
||||
|
||||
fn default_layer() -> LogRouteType {
|
||||
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
|
||||
}
|
||||
|
||||
/// does all the setup before meilisearch is launched
|
||||
fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
let mut log_builder = env_logger::Builder::new();
|
||||
log_builder.parse_filters(&opt.log_level.to_string());
|
||||
fn setup(opt: &Opt) -> anyhow::Result<LogRouteHandle> {
|
||||
let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(default_layer());
|
||||
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
|
||||
|
||||
log_builder.init();
|
||||
let subscriber = tracing_subscriber::registry().with(route_layer).with(
|
||||
tracing_subscriber::fmt::layer()
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE)
|
||||
.with_filter(
|
||||
tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string())
|
||||
.unwrap(),
|
||||
),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
// set the subscriber as the default for the application
|
||||
tracing::subscriber::set_global_default(subscriber).unwrap();
|
||||
|
||||
Ok(route_layer_handle)
|
||||
}
|
||||
|
||||
fn on_panic(info: &std::panic::PanicInfo) {
|
||||
let info = info.to_string().replace('\n', " ");
|
||||
tracing::error!(%info);
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
|
||||
anyhow::ensure!(
|
||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||
);
|
||||
|
||||
setup(&opt)?;
|
||||
let log_handle = setup(&opt)?;
|
||||
|
||||
match (opt.env.as_ref(), &opt.master_key) {
|
||||
("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => {
|
||||
@@ -73,7 +101,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
||||
|
||||
run_http(index_scheduler, auth_controller, opt, analytics).await?;
|
||||
run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -82,6 +110,7 @@ async fn run_http(
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: Arc<AuthController>,
|
||||
opt: Opt,
|
||||
logs: LogRouteHandle,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) -> anyhow::Result<()> {
|
||||
let enable_dashboard = &opt.env == "development";
|
||||
@@ -94,6 +123,7 @@ async fn run_http(
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
opt.clone(),
|
||||
logs.clone(),
|
||||
analytics.clone(),
|
||||
enable_dashboard,
|
||||
)
|
||||
@@ -103,7 +133,7 @@ async fn run_http(
|
||||
.keep_alive(KeepAlive::Os);
|
||||
|
||||
if let Some(config) = opt_clone.get_ssl_config()? {
|
||||
http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?;
|
||||
http_server.bind_rustls_021(opt_clone.http_addr, config)?.run().await?;
|
||||
} else {
|
||||
http_server.bind(&opt_clone.http_addr)?.run().await?;
|
||||
}
|
||||
|
||||
@@ -20,7 +20,8 @@ use rustls::server::{
|
||||
use rustls::RootCertStore;
|
||||
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sysinfo::{RefreshKind, System, SystemExt};
|
||||
use sysinfo::{MemoryRefreshKind, RefreshKind, System};
|
||||
use url::Url;
|
||||
|
||||
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
|
||||
|
||||
@@ -28,6 +29,8 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
|
||||
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
|
||||
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
|
||||
const MEILI_ENV: &str = "MEILI_ENV";
|
||||
const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
|
||||
const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
|
||||
#[cfg(feature = "analytics")]
|
||||
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
|
||||
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
|
||||
@@ -48,9 +51,12 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
|
||||
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
|
||||
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
|
||||
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
|
||||
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
|
||||
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
|
||||
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
@@ -154,6 +160,14 @@ pub struct Opt {
|
||||
#[serde(default = "default_env")]
|
||||
pub env: String,
|
||||
|
||||
/// Called whenever a task finishes so a third party can be notified.
|
||||
#[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
|
||||
pub task_webhook_url: Option<Url>,
|
||||
|
||||
/// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified.
|
||||
#[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
|
||||
pub task_webhook_authorization_header: Option<String>,
|
||||
|
||||
/// Deactivates Meilisearch's built-in telemetry when provided.
|
||||
///
|
||||
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
||||
@@ -296,11 +310,23 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_enable_metrics: bool,
|
||||
|
||||
/// Experimental logs route feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/721>
|
||||
///
|
||||
/// Enables the log route on the `POST /logs/stream` endpoint and the `DELETE /logs/stream` to stop receiving logs.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)]
|
||||
#[serde(default)]
|
||||
pub experimental_enable_logs_route: bool,
|
||||
|
||||
/// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
|
||||
#[serde(default)]
|
||||
pub experimental_reduce_indexing_memory_usage: bool,
|
||||
|
||||
/// Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())]
|
||||
#[serde(default = "default_limit_batched_tasks")]
|
||||
pub experimental_max_number_of_batched_tasks: usize,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
@@ -368,9 +394,12 @@ impl Opt {
|
||||
http_addr,
|
||||
master_key,
|
||||
env,
|
||||
task_webhook_url,
|
||||
task_webhook_authorization_header,
|
||||
max_index_size: _,
|
||||
max_task_db_size: _,
|
||||
http_payload_size_limit,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
ssl_cert_path,
|
||||
ssl_key_path,
|
||||
ssl_auth_path,
|
||||
@@ -392,8 +421,9 @@ impl Opt {
|
||||
config_file_path: _,
|
||||
#[cfg(feature = "analytics")]
|
||||
no_analytics,
|
||||
experimental_enable_metrics: enable_metrics_route,
|
||||
experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
|
||||
experimental_enable_metrics,
|
||||
experimental_enable_logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@@ -401,6 +431,16 @@ impl Opt {
|
||||
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
|
||||
}
|
||||
export_to_env_if_not_present(MEILI_ENV, env);
|
||||
if let Some(task_webhook_url) = task_webhook_url {
|
||||
export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
|
||||
}
|
||||
if let Some(task_webhook_authorization_header) = task_webhook_authorization_header {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER,
|
||||
task_webhook_authorization_header,
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "analytics")]
|
||||
{
|
||||
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
|
||||
@@ -409,6 +449,10 @@ impl Opt {
|
||||
MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
|
||||
http_payload_size_limit.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
|
||||
experimental_max_number_of_batched_tasks.to_string(),
|
||||
);
|
||||
if let Some(ssl_cert_path) = ssl_cert_path {
|
||||
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
|
||||
}
|
||||
@@ -433,11 +477,15 @@ impl Opt {
|
||||
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_ENABLE_METRICS,
|
||||
enable_metrics_route.to_string(),
|
||||
experimental_enable_metrics.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE,
|
||||
experimental_enable_logs_route.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
|
||||
reduce_indexing_memory_usage.to_string(),
|
||||
experimental_reduce_indexing_memory_usage.to_string(),
|
||||
);
|
||||
indexer_options.export_to_env();
|
||||
}
|
||||
@@ -455,11 +503,11 @@ impl Opt {
|
||||
}
|
||||
if self.ssl_require_auth {
|
||||
let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots);
|
||||
config.with_client_cert_verifier(verifier)
|
||||
config.with_client_cert_verifier(Arc::from(verifier))
|
||||
} else {
|
||||
let verifier =
|
||||
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots);
|
||||
config.with_client_cert_verifier(verifier)
|
||||
config.with_client_cert_verifier(Arc::from(verifier))
|
||||
}
|
||||
}
|
||||
None => config.with_no_client_auth(),
|
||||
@@ -489,7 +537,10 @@ impl Opt {
|
||||
}
|
||||
|
||||
pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures {
|
||||
InstanceTogglableFeatures { metrics: self.experimental_enable_metrics }
|
||||
InstanceTogglableFeatures {
|
||||
metrics: self.experimental_enable_metrics,
|
||||
logs_route: self.experimental_enable_logs_route,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -598,8 +649,8 @@ impl MaxMemory {
|
||||
|
||||
/// Returns the total amount of bytes available or `None` if this system isn't supported.
|
||||
fn total_memory_bytes() -> Option<u64> {
|
||||
if System::IS_SUPPORTED {
|
||||
let memory_kind = RefreshKind::new().with_memory();
|
||||
if sysinfo::IS_SUPPORTED_SYSTEM {
|
||||
let memory_kind = RefreshKind::new().with_memory(MemoryRefreshKind::new().with_ram());
|
||||
let mut system = System::new_with_specifics(memory_kind);
|
||||
system.refresh_memory();
|
||||
Some(system.total_memory())
|
||||
@@ -727,6 +778,10 @@ fn default_http_payload_size_limit() -> Byte {
|
||||
Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap()
|
||||
}
|
||||
|
||||
fn default_limit_batched_tasks() -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
|
||||
fn default_snapshot_dir() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_SNAPSHOT_DIR)
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@@ -32,6 +32,6 @@ pub async fn create_dump(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Create dump");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::Deserr;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
@@ -33,23 +33,22 @@ async fn get_features(
|
||||
let features = index_scheduler.features();
|
||||
|
||||
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
|
||||
debug!("returns: {:?}", features.runtime_features());
|
||||
HttpResponse::Ok().json(features.runtime_features())
|
||||
let features = features.runtime_features();
|
||||
debug!(returns = ?features, "Get features");
|
||||
HttpResponse::Ok().json(features)
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct RuntimeTogglableFeatures {
|
||||
#[deserr(default)]
|
||||
pub score_details: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub vector_store: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub metrics: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub export_puffin_reports: Option<bool>,
|
||||
pub logs_route: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub proximity_precision: Option<bool>,
|
||||
pub export_puffin_reports: Option<bool>,
|
||||
}
|
||||
|
||||
async fn patch_features(
|
||||
@@ -62,44 +61,40 @@ async fn patch_features(
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features();
|
||||
debug!(parameters = ?new_features, "Patch features");
|
||||
|
||||
let old_features = features.runtime_features();
|
||||
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
|
||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
||||
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
|
||||
export_puffin_reports: new_features
|
||||
.0
|
||||
.export_puffin_reports
|
||||
.unwrap_or(old_features.export_puffin_reports),
|
||||
proximity_precision: new_features
|
||||
.0
|
||||
.proximity_precision
|
||||
.unwrap_or(old_features.proximity_precision),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
// the it renames to camelCase, which we don't want for analytics.
|
||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
score_details,
|
||||
vector_store,
|
||||
metrics,
|
||||
logs_route,
|
||||
export_puffin_reports,
|
||||
proximity_precision,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
"Experimental features Updated".to_string(),
|
||||
json!({
|
||||
"score_details": score_details,
|
||||
"vector_store": vector_store,
|
||||
"metrics": metrics,
|
||||
"logs_route": logs_route,
|
||||
"export_puffin_reports": export_puffin_reports,
|
||||
"proximity_precision": proximity_precision,
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
index_scheduler.put_runtime_features(new_features)?;
|
||||
debug!(returns = ?new_features, "Patch features");
|
||||
Ok(HttpResponse::Ok().json(new_features))
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||
@@ -28,6 +27,7 @@ use serde_json::Value;
|
||||
use tempfile::tempfile;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
|
||||
use crate::error::MeilisearchHttpError;
|
||||
@@ -101,6 +101,7 @@ pub async fn get_document(
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let DocumentParam { index_uid, document_id } = document_param.into_inner();
|
||||
debug!(parameters = ?params, "Get document");
|
||||
let index_uid = IndexUid::try_from(index_uid)?;
|
||||
|
||||
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
|
||||
@@ -110,7 +111,7 @@ pub async fn get_document(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
|
||||
debug!("returns: {:?}", document);
|
||||
debug!(returns = ?document, "Get document");
|
||||
Ok(HttpResponse::Ok().json(document))
|
||||
}
|
||||
|
||||
@@ -131,7 +132,7 @@ pub async fn delete_document(
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete document");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -168,9 +169,8 @@ pub async fn documents_by_query_post(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with body: {:?}", body);
|
||||
|
||||
let body = body.into_inner();
|
||||
debug!(parameters = ?body, "Get documents POST");
|
||||
|
||||
analytics.post_fetch_documents(
|
||||
&DocumentFetchKind::Normal {
|
||||
@@ -191,7 +191,7 @@ pub async fn get_documents(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
debug!(parameters = ?params, "Get documents GET");
|
||||
|
||||
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
|
||||
|
||||
@@ -235,7 +235,7 @@ fn documents_by_query(
|
||||
|
||||
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
||||
|
||||
debug!("returns: {:?}", ret);
|
||||
debug!(returns = ?ret, "Get documents");
|
||||
Ok(HttpResponse::Ok().json(ret))
|
||||
}
|
||||
|
||||
@@ -271,7 +271,7 @@ pub async fn replace_documents(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
debug!("called with params: {:?}", params);
|
||||
debug!(parameters = ?params, "Replace documents");
|
||||
let params = params.into_inner();
|
||||
|
||||
analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||
@@ -288,6 +288,7 @@ pub async fn replace_documents(
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
debug!(returns = ?task, "Replace documents");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@@ -302,8 +303,8 @@ pub async fn update_documents(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
debug!("called with params: {:?}", params);
|
||||
let params = params.into_inner();
|
||||
debug!(parameters = ?params, "Update documents");
|
||||
|
||||
analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||
|
||||
@@ -319,6 +320,7 @@ pub async fn update_documents(
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
debug!(returns = ?task, "Update documents");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@@ -427,7 +429,10 @@ async fn document_addition(
|
||||
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
|
||||
if e.kind() == ErrorKind::NotFound => {}
|
||||
Err(e) => {
|
||||
log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
|
||||
tracing::warn!(
|
||||
index_uuid = %uuid,
|
||||
"Unknown error happened while deleting a malformed update file: {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
// We still want to return the original error to the end user.
|
||||
@@ -453,7 +458,6 @@ async fn document_addition(
|
||||
}
|
||||
};
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(task.into())
|
||||
}
|
||||
|
||||
@@ -464,7 +468,7 @@ pub async fn delete_documents_batch(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
debug!(parameters = ?body, "Delete documents by batch");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
|
||||
@@ -479,7 +483,7 @@ pub async fn delete_documents_batch(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete documents by batch");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -497,7 +501,7 @@ pub async fn delete_documents_by_filter(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
debug!(parameters = ?body, "Delete documents by filter");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let filter = body.into_inner().filter;
|
||||
@@ -515,7 +519,7 @@ pub async fn delete_documents_by_filter(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete documents by filter");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -532,7 +536,7 @@ pub async fn clear_all_documents(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete all documents");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
|
||||
@@ -2,20 +2,20 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::{Analytics, FacetSearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
@@ -36,6 +36,8 @@ pub struct FacetSearchQuery {
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||
@@ -54,7 +56,7 @@ pub async fn search(
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.into_inner();
|
||||
debug!("facet search called with params: {:?}", query);
|
||||
debug!(parameters = ?query, "Facet search");
|
||||
|
||||
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
|
||||
|
||||
@@ -81,7 +83,7 @@ pub async fn search(
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
debug!(returns = ?search_result, "Facet search");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
@@ -95,6 +97,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = value;
|
||||
|
||||
SearchQuery {
|
||||
@@ -119,6 +122,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
matching_strategy,
|
||||
vector,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
@@ -16,6 +15,7 @@ use meilisearch_types::tasks::KindWithContent;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
|
||||
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use crate::analytics::Analytics;
|
||||
@@ -93,6 +93,7 @@ pub async fn list_indexes(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
|
||||
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?paginate, "List indexes");
|
||||
let filters = index_scheduler.filters();
|
||||
let indexes: Vec<Option<IndexView>> =
|
||||
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
|
||||
@@ -105,7 +106,7 @@ pub async fn list_indexes(
|
||||
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
|
||||
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
|
||||
|
||||
debug!("returns: {:?}", ret);
|
||||
debug!(returns = ?ret, "List indexes");
|
||||
Ok(HttpResponse::Ok().json(ret))
|
||||
}
|
||||
|
||||
@@ -124,6 +125,7 @@ pub async fn create_index(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Create index");
|
||||
let IndexCreateRequest { primary_key, uid } = body.into_inner();
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
|
||||
@@ -137,6 +139,7 @@ pub async fn create_index(
|
||||
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
debug!(returns = ?task, "Create index");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
} else {
|
||||
@@ -177,7 +180,7 @@ pub async fn get_index(
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let index_view = IndexView::new(index_uid.into_inner(), &index)?;
|
||||
|
||||
debug!("returns: {:?}", index_view);
|
||||
debug!(returns = ?index_view, "Get index");
|
||||
|
||||
Ok(HttpResponse::Ok().json(index_view))
|
||||
}
|
||||
@@ -189,7 +192,7 @@ pub async fn update_index(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
debug!(parameters = ?body, "Update index");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let body = body.into_inner();
|
||||
analytics.publish(
|
||||
@@ -206,7 +209,7 @@ pub async fn update_index(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Update index");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -218,6 +221,7 @@ pub async fn delete_index(
|
||||
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
debug!(returns = ?task, "Delete index");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@@ -255,6 +259,6 @@ pub async fn get_index_stats(
|
||||
|
||||
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
|
||||
|
||||
debug!("returns: {:?}", stats);
|
||||
debug!(returns = ?stats, "Get index stats");
|
||||
Ok(HttpResponse::Ok().json(stats))
|
||||
}
|
||||
|
||||
@@ -2,23 +2,25 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::vector::DistributionShift;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::analytics::{Analytics, SearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
@@ -74,6 +76,31 @@ pub struct SearchQueryGet {
|
||||
matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)]
|
||||
pub attributes_to_search_on: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||
pub hybrid_embedder: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
|
||||
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
|
||||
pub struct SemanticRatioGet(SemanticRatio);
|
||||
|
||||
impl std::convert::TryFrom<String> for SemanticRatioGet {
|
||||
type Error = InvalidSearchSemanticRatio;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let f: f32 = s.parse().map_err(|_| InvalidSearchSemanticRatio)?;
|
||||
Ok(SemanticRatioGet(SemanticRatio::try_from(f)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for SemanticRatioGet {
|
||||
type Target = SemanticRatio;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SearchQueryGet> for SearchQuery {
|
||||
@@ -86,6 +113,20 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
|
||||
(None, None) => None,
|
||||
(None, Some(semantic_ratio)) => {
|
||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None })
|
||||
}
|
||||
(Some(embedder), None) => Some(HybridQuery {
|
||||
semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
|
||||
embedder: Some(embedder),
|
||||
}),
|
||||
(Some(embedder), Some(semantic_ratio)) => {
|
||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) })
|
||||
}
|
||||
};
|
||||
|
||||
Self {
|
||||
q: other.q,
|
||||
vector: other.vector.map(CS::into_inner),
|
||||
@@ -108,6 +149,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
crop_marker: other.crop_marker,
|
||||
matching_strategy: other.matching_strategy,
|
||||
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
||||
hybrid,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -144,7 +186,7 @@ pub async fn search_with_url_query(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
debug!(parameters = ?params, "Search get");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let mut query: SearchQuery = params.into_inner().into();
|
||||
@@ -158,8 +200,12 @@ pub async fn search_with_url_query(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let distribution = embed(&mut query, index_scheduler.get_ref(), &index).await?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution))
|
||||
.await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
@@ -167,7 +213,7 @@ pub async fn search_with_url_query(
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
debug!(returns = ?search_result, "Search get");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
@@ -181,7 +227,7 @@ pub async fn search_with_post(
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let mut query = params.into_inner();
|
||||
debug!("search called with params: {:?}", query);
|
||||
debug!(parameters = ?query, "Search post");
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
@@ -193,8 +239,12 @@ pub async fn search_with_post(
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let distribution = embed(&mut query, index_scheduler.get_ref(), &index).await?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution))
|
||||
.await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
@@ -202,10 +252,84 @@ pub async fn search_with_post(
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
debug!(returns = ?search_result, "Search post");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
pub async fn embed(
|
||||
query: &mut SearchQuery,
|
||||
index_scheduler: &IndexScheduler,
|
||||
index: &milli::Index,
|
||||
) -> Result<Option<DistributionShift>, ResponseError> {
|
||||
match (&query.hybrid, &query.vector, &query.q) {
|
||||
(Some(HybridQuery { semantic_ratio: _, embedder }), None, Some(q))
|
||||
if !q.trim().is_empty() =>
|
||||
{
|
||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
|
||||
let embedder = if let Some(embedder_name) = embedder {
|
||||
embedders.get(embedder_name)
|
||||
} else {
|
||||
embedders.get_default()
|
||||
};
|
||||
|
||||
let embedder = embedder
|
||||
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
|
||||
.map_err(milli::Error::from)?
|
||||
.0;
|
||||
|
||||
let distribution = embedder.distribution();
|
||||
|
||||
let embeddings = embedder
|
||||
.embed(vec![q.to_owned()])
|
||||
.await
|
||||
.map_err(milli::vector::Error::from)
|
||||
.map_err(milli::Error::from)?
|
||||
.pop()
|
||||
.expect("No vector returned from embedding");
|
||||
|
||||
if embeddings.iter().nth(1).is_some() {
|
||||
warn!("Ignoring embeddings past the first one in long search query");
|
||||
query.vector = Some(embeddings.iter().next().unwrap().to_vec());
|
||||
} else {
|
||||
query.vector = Some(embeddings.into_inner());
|
||||
}
|
||||
Ok(distribution)
|
||||
}
|
||||
(Some(hybrid), vector, _) => {
|
||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
|
||||
let embedder = if let Some(embedder_name) = &hybrid.embedder {
|
||||
embedders.get(embedder_name)
|
||||
} else {
|
||||
embedders.get_default()
|
||||
};
|
||||
|
||||
let embedder = embedder
|
||||
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
|
||||
.map_err(milli::Error::from)?
|
||||
.0;
|
||||
|
||||
if let Some(vector) = vector {
|
||||
if vector.len() != embedder.dimensions() {
|
||||
return Err(meilisearch_types::milli::Error::UserError(
|
||||
meilisearch_types::milli::UserError::InvalidVectorDimensions {
|
||||
expected: embedder.dimensions(),
|
||||
found: vector.len(),
|
||||
},
|
||||
)
|
||||
.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(embedder.distribution())
|
||||
}
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
@@ -2,14 +2,15 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@@ -23,12 +24,12 @@ macro_rules! make_setting_route {
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse, Resource};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::{settings, Settings};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use tracing::debug;
|
||||
use $crate::analytics::Analytics;
|
||||
use $crate::extractors::authentication::policies::*;
|
||||
use $crate::extractors::authentication::GuardedData;
|
||||
@@ -60,7 +61,7 @@ macro_rules! make_setting_route {
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -77,6 +78,7 @@ macro_rules! make_setting_route {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let body = body.into_inner();
|
||||
debug!(parameters = ?body, "Update settings");
|
||||
|
||||
#[allow(clippy::redundant_closure_call)]
|
||||
$analytics(&body, &req);
|
||||
@@ -89,6 +91,11 @@ macro_rules! make_setting_route {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let new_settings = $crate::routes::indexes::settings::validate_settings(
|
||||
new_settings,
|
||||
&index_scheduler,
|
||||
)?;
|
||||
|
||||
let allow_index_creation =
|
||||
index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
|
||||
@@ -103,7 +110,7 @@ macro_rules! make_setting_route {
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Update settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -120,7 +127,7 @@ macro_rules! make_setting_route {
|
||||
let rtxn = index.read_txn()?;
|
||||
let settings = settings(&index, &rtxn)?;
|
||||
|
||||
debug!("returns: {:?}", settings);
|
||||
debug!(returns = ?settings, "Update settings");
|
||||
let mut json = serde_json::json!(&settings);
|
||||
let val = json[$camelcase_attr].take();
|
||||
|
||||
@@ -452,6 +459,7 @@ make_setting_route!(
|
||||
json!({
|
||||
"proximity_precision": {
|
||||
"set": precision.is_some(),
|
||||
"value": precision.unwrap_or_default(),
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
@@ -545,6 +553,67 @@ make_setting_route!(
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/embedders",
|
||||
patch,
|
||||
std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
|
||||
>,
|
||||
embedders,
|
||||
"embedders",
|
||||
analytics,
|
||||
|setting: &Option<std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>>, req: &HttpRequest| {
|
||||
|
||||
|
||||
analytics.publish(
|
||||
"Embedders Updated".to_string(),
|
||||
serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
fn embedder_analytics(
|
||||
setting: Option<
|
||||
&std::collections::BTreeMap<
|
||||
String,
|
||||
Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
|
||||
>,
|
||||
>,
|
||||
) -> serde_json::Value {
|
||||
let mut sources = std::collections::HashSet::new();
|
||||
|
||||
if let Some(s) = &setting {
|
||||
for source in s
|
||||
.values()
|
||||
.filter_map(|config| config.clone().set())
|
||||
.filter_map(|config| config.source.set())
|
||||
{
|
||||
use meilisearch_types::milli::vector::settings::EmbedderSource;
|
||||
match source {
|
||||
EmbedderSource::OpenAi => sources.insert("openAi"),
|
||||
EmbedderSource::HuggingFace => sources.insert("huggingFace"),
|
||||
EmbedderSource::UserProvided => sources.insert("userProvided"),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let document_template_used = setting.as_ref().map(|map| {
|
||||
map.values()
|
||||
.filter_map(|config| config.clone().set())
|
||||
.any(|config| config.document_template.set().is_some())
|
||||
});
|
||||
|
||||
json!(
|
||||
{
|
||||
"total": setting.as_ref().map(|s| s.len()),
|
||||
"sources": sources,
|
||||
"document_template_used": document_template_used,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
macro_rules! generate_configure {
|
||||
($($mod:ident),*) => {
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
@@ -574,7 +643,8 @@ generate_configure!(
|
||||
ranking_rules,
|
||||
typo_tolerance,
|
||||
pagination,
|
||||
faceting
|
||||
faceting,
|
||||
embedders
|
||||
);
|
||||
|
||||
pub async fn update_all(
|
||||
@@ -587,6 +657,8 @@ pub async fn update_all(
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let new_settings = body.into_inner();
|
||||
debug!(parameters = ?new_settings, "Update all settings");
|
||||
let new_settings = validate_settings(new_settings, &index_scheduler)?;
|
||||
|
||||
analytics.publish(
|
||||
"Settings Updated".to_string(),
|
||||
@@ -620,7 +692,8 @@ pub async fn update_all(
|
||||
"set": new_settings.distinct_attribute.as_ref().set().is_some()
|
||||
},
|
||||
"proximity_precision": {
|
||||
"set": new_settings.proximity_precision.as_ref().set().is_some()
|
||||
"set": new_settings.proximity_precision.as_ref().set().is_some(),
|
||||
"value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
|
||||
},
|
||||
"typo_tolerance": {
|
||||
"enabled": new_settings.typo_tolerance
|
||||
@@ -681,6 +754,7 @@ pub async fn update_all(
|
||||
"synonyms": {
|
||||
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
|
||||
},
|
||||
"embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set())
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
@@ -696,7 +770,7 @@ pub async fn update_all(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Update all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -709,7 +783,7 @@ pub async fn get_all(
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let new_settings = settings(&index, &rtxn)?;
|
||||
debug!("returns: {:?}", new_settings);
|
||||
debug!(returns = ?new_settings, "Get all settings");
|
||||
Ok(HttpResponse::Ok().json(new_settings))
|
||||
}
|
||||
|
||||
@@ -732,6 +806,16 @@ pub async fn delete_all(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
fn validate_settings(
|
||||
settings: Settings<Unchecked>,
|
||||
index_scheduler: &IndexScheduler,
|
||||
) -> Result<Settings<Unchecked>, ResponseError> {
|
||||
if matches!(settings.embedders, Setting::Set(_)) {
|
||||
index_scheduler.features().check_vector("Passing `embedders` in settings")?
|
||||
}
|
||||
Ok(settings.validate()?)
|
||||
}
|
||||
|
||||
281
meilisearch/src/routes/logs.rs
Normal file
281
meilisearch/src/routes/logs.rs
Normal file
@@ -0,0 +1,281 @@
|
||||
use std::convert::Infallible;
|
||||
use std::io::Write;
|
||||
use std::ops::ControlFlow;
|
||||
use std::pin::Pin;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_web::web::{Bytes, Data};
|
||||
use actix_web::{web, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||
use futures_util::Stream;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use tokio::sync::mpsc;
|
||||
use tracing_subscriber::filter::Targets;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::LogRouteHandle;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("stream")
|
||||
.route(web::post().to(SeqHandler(get_logs)))
|
||||
.route(web::delete().to(SeqHandler(cancel_logs))),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
pub enum LogMode {
|
||||
#[default]
|
||||
Human,
|
||||
Profile,
|
||||
}
|
||||
|
||||
/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it.
|
||||
#[derive(Clone, Debug)]
|
||||
struct MyTargets(Targets);
|
||||
|
||||
/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
enum MyParseError {
|
||||
#[error(transparent)]
|
||||
ParseError(#[from] tracing_subscriber::filter::ParseError),
|
||||
#[error(
|
||||
"Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`"
|
||||
)]
|
||||
Example,
|
||||
}
|
||||
|
||||
impl FromStr for MyTargets {
|
||||
type Err = MyParseError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
if s.is_empty() {
|
||||
Err(MyParseError::Example)
|
||||
} else {
|
||||
Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MergeWithError<MyParseError> for DeserrJsonError<BadRequest> {
|
||||
fn merge(
|
||||
_self_: Option<Self>,
|
||||
other: MyParseError,
|
||||
merge_location: ValuePointerRef,
|
||||
) -> ControlFlow<Self, Self> {
|
||||
Self::error::<Infallible>(
|
||||
None,
|
||||
ErrorKind::Unexpected { msg: other.to_string() },
|
||||
merge_location,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
|
||||
pub struct GetLogs {
|
||||
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
|
||||
target: MyTargets,
|
||||
|
||||
#[deserr(default, error = DeserrJsonError<BadRequest>)]
|
||||
mode: LogMode,
|
||||
|
||||
#[deserr(default = false, error = DeserrJsonError<BadRequest>)]
|
||||
profile_memory: bool,
|
||||
}
|
||||
|
||||
fn validate_get_logs<E: DeserializeError>(
|
||||
logs: GetLogs,
|
||||
location: ValuePointerRef,
|
||||
) -> Result<GetLogs, E> {
|
||||
if logs.profile_memory && logs.mode != LogMode::Profile {
|
||||
Err(deserr::take_cf_content(E::error::<Infallible>(
|
||||
None,
|
||||
ErrorKind::Unexpected {
|
||||
msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode),
|
||||
},
|
||||
location,
|
||||
)))
|
||||
} else {
|
||||
Ok(logs)
|
||||
}
|
||||
}
|
||||
|
||||
struct LogWriter {
|
||||
sender: mpsc::UnboundedSender<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl Write for LogWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?;
|
||||
Ok(buf.len())
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct HandleGuard {
|
||||
/// We need to keep an handle on the logs to make it available again when the streamer is dropped
|
||||
logs: Arc<LogRouteHandle>,
|
||||
}
|
||||
|
||||
impl Drop for HandleGuard {
|
||||
fn drop(&mut self) {
|
||||
if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) {
|
||||
tracing::error!("Could not free the logs route: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn byte_stream(
|
||||
receiver: mpsc::UnboundedReceiver<Vec<u8>>,
|
||||
guard: HandleGuard,
|
||||
) -> impl futures_util::Stream<Item = Result<Bytes, ResponseError>> {
|
||||
futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move {
|
||||
let vec = receiver.recv().await;
|
||||
|
||||
vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard)))
|
||||
})
|
||||
}
|
||||
|
||||
type PinnedByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, ResponseError>>>>;
|
||||
|
||||
fn make_layer<
|
||||
S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>,
|
||||
>(
|
||||
opt: &GetLogs,
|
||||
logs: Data<LogRouteHandle>,
|
||||
) -> (Box<dyn Layer<S> + Send + Sync>, PinnedByteStream) {
|
||||
let guard = HandleGuard { logs: logs.into_inner() };
|
||||
match opt.mode {
|
||||
LogMode::Human => {
|
||||
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
|
||||
|
||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||
.with_writer(move || LogWriter { sender: sender.clone() })
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE);
|
||||
|
||||
let stream = byte_stream(receiver, guard);
|
||||
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
|
||||
}
|
||||
LogMode::Profile => {
|
||||
let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory);
|
||||
|
||||
let stream = entry_stream(trace, guard);
|
||||
|
||||
(Box::new(layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn entry_stream(
|
||||
trace: tracing_trace::Trace,
|
||||
guard: HandleGuard,
|
||||
) -> impl Stream<Item = Result<Bytes, ResponseError>> {
|
||||
let receiver = trace.into_receiver();
|
||||
let entry_buf = Vec::new();
|
||||
|
||||
futures_util::stream::unfold(
|
||||
(receiver, entry_buf, guard),
|
||||
move |(mut receiver, mut entry_buf, guard)| async move {
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
while bytes.len() < 8192 {
|
||||
entry_buf.clear();
|
||||
|
||||
let Ok(count) = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(1),
|
||||
receiver.recv_many(&mut entry_buf, 100),
|
||||
)
|
||||
.await
|
||||
else {
|
||||
break;
|
||||
};
|
||||
|
||||
if count == 0 {
|
||||
if !bytes.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
// channel closed, exit
|
||||
return None;
|
||||
}
|
||||
|
||||
for entry in &entry_buf {
|
||||
if let Err(error) = serde_json::to_writer(&mut bytes, entry) {
|
||||
tracing::error!(
|
||||
error = &error as &dyn std::error::Error,
|
||||
"deserializing entry"
|
||||
);
|
||||
return Some((
|
||||
Err(ResponseError::from_msg(
|
||||
format!("error deserializing entry: {error}"),
|
||||
Code::Internal,
|
||||
)),
|
||||
(receiver, entry_buf, guard),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some((Ok(bytes.into()), (receiver, entry_buf, guard)))
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub async fn get_logs(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
logs: Data<LogRouteHandle>,
|
||||
body: AwebJson<GetLogs, DeserrJsonError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_logs_route()?;
|
||||
|
||||
let opt = body.into_inner();
|
||||
let mut stream = None;
|
||||
|
||||
logs.modify(|layer| match layer.inner_mut() {
|
||||
None => {
|
||||
// there is no one getting logs
|
||||
*layer.filter_mut() = opt.target.0.clone();
|
||||
let (new_layer, new_stream) = make_layer(&opt, logs.clone());
|
||||
|
||||
*layer.inner_mut() = Some(new_layer);
|
||||
stream = Some(new_stream);
|
||||
}
|
||||
Some(_) => {
|
||||
// there is already someone getting logs
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if let Some(stream) = stream {
|
||||
Ok(HttpResponse::Ok().streaming(stream))
|
||||
} else {
|
||||
Err(MeilisearchHttpError::AlreadyUsedLogRoute.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn cancel_logs(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
logs: Data<LogRouteHandle>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_logs_route()?;
|
||||
|
||||
if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) {
|
||||
tracing::error!("Could not free the logs route: {e}");
|
||||
}
|
||||
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
}
|
||||
@@ -3,7 +3,6 @@ use std::collections::BTreeMap;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
@@ -11,6 +10,7 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@@ -22,6 +22,7 @@ mod api_key;
|
||||
mod dump;
|
||||
pub mod features;
|
||||
pub mod indexes;
|
||||
mod logs;
|
||||
mod metrics;
|
||||
mod multi_search;
|
||||
mod snapshot;
|
||||
@@ -31,6 +32,7 @@ pub mod tasks;
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::scope("/tasks").configure(tasks::configure))
|
||||
.service(web::resource("/health").route(web::get().to(get_health)))
|
||||
.service(web::scope("/logs").configure(logs::configure))
|
||||
.service(web::scope("/keys").configure(api_key::configure))
|
||||
.service(web::scope("/dumps").configure(dump::configure))
|
||||
.service(web::scope("/snapshots").configure(snapshot::configure))
|
||||
@@ -250,7 +252,7 @@ async fn get_stats(
|
||||
|
||||
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
|
||||
|
||||
debug!("returns: {:?}", stats);
|
||||
debug!(returns = ?stats, "Get stats");
|
||||
Ok(HttpResponse::Ok().json(stats))
|
||||
}
|
||||
|
||||
|
||||
@@ -3,16 +3,17 @@ use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::{Analytics, MultiSearchAggregator};
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::search::embed;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
};
|
||||
@@ -51,7 +52,7 @@ pub async fn multi_search_with_post(
|
||||
for (query_index, (index_uid, mut query)) in
|
||||
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
|
||||
{
|
||||
debug!("multi-search #{query_index}: called with params: {:?}", query);
|
||||
debug!(on_index = query_index, parameters = ?query, "Multi-search");
|
||||
|
||||
// Check index from API key
|
||||
if !index_scheduler.filters().is_index_authorized(&index_uid) {
|
||||
@@ -74,10 +75,15 @@ pub async fn multi_search_with_post(
|
||||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features))
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
let distribution = embed(&mut query, index_scheduler.get_ref(), &index)
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, features, distribution)
|
||||
})
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
search_results.push(SearchResultWithIndex {
|
||||
index_uid: index_uid.into_inner(),
|
||||
@@ -101,7 +107,7 @@ pub async fn multi_search_with_post(
|
||||
err
|
||||
})?;
|
||||
|
||||
debug!("returns: {:?}", search_results);
|
||||
debug!(returns = ?search_results, "Multi-search");
|
||||
|
||||
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@@ -27,6 +27,6 @@ pub async fn create_snapshot(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Create snapshot");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -8,11 +8,9 @@ use meilisearch_types::deserr::DeserrQueryParamError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
|
||||
use meilisearch_types::tasks::{
|
||||
serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
|
||||
};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
@@ -37,140 +35,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
|
||||
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
pub uid: TaskId,
|
||||
#[serde(default)]
|
||||
pub index_uid: Option<String>,
|
||||
pub status: Status,
|
||||
#[serde(rename = "type")]
|
||||
pub kind: Kind,
|
||||
pub canceled_by: Option<TaskId>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub details: Option<DetailsView>,
|
||||
pub error: Option<ResponseError>,
|
||||
#[serde(serialize_with = "serialize_duration", default)]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub started_at: Option<OffsetDateTime>,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
pub fn from_task(task: &Task) -> TaskView {
|
||||
TaskView {
|
||||
uid: task.uid,
|
||||
index_uid: task.index_uid().map(ToOwned::to_owned),
|
||||
status: task.status,
|
||||
kind: task.kind.as_kind(),
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details.clone().map(DetailsView::from),
|
||||
error: task.error.clone(),
|
||||
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DetailsView {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub received_documents: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub indexed_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub primary_key: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub provided_ids: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub matched_tasks: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub canceled_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub original_filter: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub dump_uid: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(flatten)]
|
||||
pub settings: Option<Box<Settings<Unchecked>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub swaps: Option<Vec<IndexSwap>>,
|
||||
}
|
||||
|
||||
impl From<Details> for DetailsView {
|
||||
fn from(details: Details) -> Self {
|
||||
match details {
|
||||
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
|
||||
DetailsView {
|
||||
received_documents: Some(received_documents),
|
||||
indexed_documents: Some(indexed_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::SettingsUpdate { settings } => {
|
||||
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexInfo { primary_key } => {
|
||||
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
|
||||
}
|
||||
Details::DocumentDeletion {
|
||||
provided_ids: received_document_ids,
|
||||
deleted_documents,
|
||||
} => DetailsView {
|
||||
provided_ids: Some(received_document_ids),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
original_filter: Some(None),
|
||||
..DetailsView::default()
|
||||
},
|
||||
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
|
||||
DetailsView {
|
||||
provided_ids: Some(0),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::ClearAll { deleted_documents } => {
|
||||
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
|
||||
}
|
||||
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
canceled_tasks: Some(canceled_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
deleted_tasks: Some(deleted_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::Dump { dump_uid } => {
|
||||
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexSwap { swaps } => {
|
||||
DetailsView { swaps: Some(swaps), ..Default::default() }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct TasksFilterQuery {
|
||||
|
||||
@@ -7,24 +7,21 @@ use deserr::Deserr;
|
||||
use either::Either;
|
||||
use index_scheduler::RoFeatures;
|
||||
use indexmap::IndexMap;
|
||||
use log::warn;
|
||||
use meilisearch_auth::IndexSearchRules;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::{
|
||||
dot_product_similarity, FacetValueHit, InternalError, OrderBy, SearchForFacetValues,
|
||||
};
|
||||
use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::vector::DistributionShift;
|
||||
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
use milli::{
|
||||
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
|
||||
SortError, TermsMatchingStrategy, VectorOrArrayOfVectors, DEFAULT_VALUES_PER_FACET,
|
||||
SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
use ordered_float::OrderedFloat;
|
||||
use regex::Regex;
|
||||
use serde::Serialize;
|
||||
use serde_json::{json, Value};
|
||||
@@ -39,6 +36,7 @@ pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
||||
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
@@ -47,6 +45,8 @@ pub struct SearchQuery {
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
@@ -87,6 +87,48 @@ pub struct SearchQuery {
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Deserr)]
|
||||
#[deserr(error = DeserrJsonError<InvalidHybridQuery>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct HybridQuery {
|
||||
/// TODO validate that sementic ratio is between 0.0 and 1,0
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
|
||||
pub semantic_ratio: SemanticRatio,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
||||
pub embedder: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
|
||||
#[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
|
||||
pub struct SemanticRatio(f32);
|
||||
|
||||
impl Default for SemanticRatio {
|
||||
fn default() -> Self {
|
||||
DEFAULT_SEMANTIC_RATIO()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::TryFrom<f32> for SemanticRatio {
|
||||
type Error = InvalidSearchSemanticRatio;
|
||||
|
||||
fn try_from(f: f32) -> Result<Self, Self::Error> {
|
||||
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
|
||||
#[allow(clippy::manual_range_contains)]
|
||||
if f > 1.0 || f < 0.0 {
|
||||
Err(InvalidSearchSemanticRatio)
|
||||
} else {
|
||||
Ok(SemanticRatio(f))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for SemanticRatio {
|
||||
type Target = f32;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl SearchQuery {
|
||||
pub fn is_finite_pagination(&self) -> bool {
|
||||
self.page.or(self.hits_per_page).is_some()
|
||||
@@ -106,6 +148,8 @@ pub struct SearchQueryWithIndex {
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
@@ -171,6 +215,7 @@ impl SearchQueryWithIndex {
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = self;
|
||||
(
|
||||
index_uid,
|
||||
@@ -196,6 +241,7 @@ impl SearchQueryWithIndex {
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
// do not use ..Default::default() here,
|
||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||
},
|
||||
@@ -335,19 +381,44 @@ fn prepare_search<'t>(
|
||||
rtxn: &'t RoTxn,
|
||||
query: &'t SearchQuery,
|
||||
features: RoFeatures,
|
||||
distribution: Option<DistributionShift>,
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||
let mut search = index.search(rtxn);
|
||||
|
||||
if query.vector.is_some() && query.q.is_some() {
|
||||
warn!("Ignoring the query string `q` when used with the `vector` parameter.");
|
||||
if query.vector.is_some() {
|
||||
features.check_vector("Passing `vector` as a query parameter")?;
|
||||
}
|
||||
|
||||
if query.hybrid.is_some() {
|
||||
features.check_vector("Passing `hybrid` as a query parameter")?;
|
||||
}
|
||||
|
||||
if query.hybrid.is_none() && query.q.is_some() && query.vector.is_some() {
|
||||
return Err(MeilisearchHttpError::MissingSearchHybrid);
|
||||
}
|
||||
|
||||
search.distribution_shift(distribution);
|
||||
|
||||
if let Some(ref vector) = query.vector {
|
||||
search.vector(vector.clone());
|
||||
match &query.hybrid {
|
||||
// If semantic ratio is 0.0, only the query search will impact the search results,
|
||||
// skip the vector
|
||||
Some(hybrid) if *hybrid.semantic_ratio == 0.0 => (),
|
||||
_otherwise => {
|
||||
search.vector(vector.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref query) = query.q {
|
||||
search.query(query);
|
||||
if let Some(ref q) = query.q {
|
||||
match &query.hybrid {
|
||||
// If semantic ratio is 1.0, only the vector search will impact the search results,
|
||||
// skip the query
|
||||
Some(hybrid) if *hybrid.semantic_ratio == 1.0 => (),
|
||||
_otherwise => {
|
||||
search.query(q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref searchable) = query.attributes_to_search_on {
|
||||
@@ -370,12 +441,8 @@ fn prepare_search<'t>(
|
||||
ScoringStrategy::Skip
|
||||
});
|
||||
|
||||
if query.show_ranking_score_details {
|
||||
features.check_score_details()?;
|
||||
}
|
||||
|
||||
if query.vector.is_some() {
|
||||
features.check_vector()?;
|
||||
if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid {
|
||||
search.embedder_name(embedder);
|
||||
}
|
||||
|
||||
// compute the offset on the limit depending on the pagination mode.
|
||||
@@ -421,15 +488,22 @@ pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
features: RoFeatures,
|
||||
distribution: Option<DistributionShift>,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||
prepare_search(index, &rtxn, &query, features)?;
|
||||
prepare_search(index, &rtxn, &query, features, distribution)?;
|
||||
|
||||
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
|
||||
search.execute()?;
|
||||
match &query.hybrid {
|
||||
Some(hybrid) => match *hybrid.semantic_ratio {
|
||||
ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?,
|
||||
ratio => search.execute_hybrid(ratio)?,
|
||||
},
|
||||
None => search.execute()?,
|
||||
};
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
|
||||
@@ -538,13 +612,17 @@ pub fn perform_search(
|
||||
insert_geo_distance(sort, &mut document);
|
||||
}
|
||||
|
||||
let semantic_score = match query.vector.as_ref() {
|
||||
Some(vector) => match extract_field("_vectors", &fields_ids_map, obkv)? {
|
||||
Some(vectors) => compute_semantic_score(vector, vectors)?,
|
||||
None => None,
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
let mut semantic_score = None;
|
||||
for details in &score {
|
||||
if let ScoreDetails::Vector(score_details::Vector {
|
||||
target_vector: _,
|
||||
value_similarity: Some((_matching_vector, similarity)),
|
||||
}) = details
|
||||
{
|
||||
semantic_score = Some(*similarity);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let ranking_score =
|
||||
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
||||
@@ -647,11 +725,15 @@ pub fn perform_facet_search(
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features)?;
|
||||
let mut facet_search = SearchForFacetValues::new(facet_name, search);
|
||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?;
|
||||
let mut facet_search =
|
||||
SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some());
|
||||
if let Some(facet_query) = &facet_query {
|
||||
facet_search.query(facet_query);
|
||||
}
|
||||
if let Some(max_facets) = index.max_values_per_facet(&rtxn)? {
|
||||
facet_search.max_values(max_facets as usize);
|
||||
}
|
||||
|
||||
Ok(FacetSearchResult {
|
||||
facet_hits: facet_search.execute()?,
|
||||
@@ -676,18 +758,6 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_semantic_score(query: &[f32], vectors: Value) -> milli::Result<Option<f32>> {
|
||||
let vectors = serde_json::from_value(vectors)
|
||||
.map(VectorOrArrayOfVectors::into_array_of_vectors)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
Ok(vectors
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|v| OrderedFloat(dot_product_similarity(query, &v)))
|
||||
.max()
|
||||
.map(OrderedFloat::into_inner))
|
||||
}
|
||||
|
||||
fn compute_formatted_options(
|
||||
attr_to_highlight: &HashSet<String>,
|
||||
attr_to_crop: &[String],
|
||||
@@ -815,22 +885,6 @@ fn make_document(
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
/// Extract the JSON value under the field name specified
|
||||
/// but doesn't support nested objects.
|
||||
fn extract_field(
|
||||
field_name: &str,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
obkv: obkv::KvReaderU16,
|
||||
) -> Result<Option<serde_json::Value>, MeilisearchHttpError> {
|
||||
match field_ids_map.id(field_name) {
|
||||
Some(fid) => match obkv.get(fid) {
|
||||
Some(value) => Ok(serde_json::from_slice(value).map(Some)?),
|
||||
None => Ok(None),
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_fields<'a>(
|
||||
document: &Document,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
@@ -842,6 +896,14 @@ fn format_fields<'a>(
|
||||
let mut matches_position = compute_matches.then(BTreeMap::new);
|
||||
let mut document = document.clone();
|
||||
|
||||
// reduce the formatted option list to the attributes that should be formatted,
|
||||
// instead of all the attributes to display.
|
||||
let formatting_fields_options: Vec<_> = formatted_options
|
||||
.iter()
|
||||
.filter(|(_, option)| option.should_format())
|
||||
.map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
|
||||
.collect();
|
||||
|
||||
// select the attributes to retrieve
|
||||
let displayable_names =
|
||||
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
||||
@@ -850,13 +912,15 @@ fn format_fields<'a>(
|
||||
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
|
||||
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
|
||||
// highlighted.
|
||||
let format = formatted_options
|
||||
// Warn: The time to compute the format list scales with the number of fields to format;
|
||||
// cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
|
||||
// d*f where d is the total number of fields to display and f is the total number of fields to format.
|
||||
let format = formatting_fields_options
|
||||
.iter()
|
||||
.filter(|(field, _option)| {
|
||||
let name = field_ids_map.name(**field).unwrap();
|
||||
.filter(|(name, _option)| {
|
||||
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
|
||||
})
|
||||
.map(|(_, option)| *option)
|
||||
.map(|(_, option)| **option)
|
||||
.reduce(|acc, option| acc.merge(option));
|
||||
let mut infos = Vec::new();
|
||||
|
||||
@@ -953,7 +1017,7 @@ fn format_value<'a>(
|
||||
let value = matcher.format(format_options);
|
||||
Value::String(value.into_owned())
|
||||
}
|
||||
None => Value::Number(number),
|
||||
None => Value::String(s),
|
||||
}
|
||||
}
|
||||
value => value,
|
||||
|
||||
Binary file not shown.
@@ -59,6 +59,8 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
|
||||
("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"},
|
||||
("GET", "/version") => hashset!{"version", "*"},
|
||||
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
|
||||
("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
|
||||
("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
|
||||
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
|
||||
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
|
||||
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},
|
||||
|
||||
@@ -64,7 +64,7 @@ impl Display for Value {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
json_string!(self, { ".enqueuedAt" => "[date]", ".processedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
|
||||
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,8 @@ use meilisearch::{analytics, create_app, setup_meilisearch};
|
||||
use once_cell::sync::Lazy;
|
||||
use tempfile::TempDir;
|
||||
use tokio::time::sleep;
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use super::index::Index;
|
||||
use super::service::Service;
|
||||
@@ -81,10 +83,16 @@ impl Server {
|
||||
Response = ServiceResponse<impl MessageBody>,
|
||||
Error = actix_web::Error,
|
||||
> {
|
||||
let (_route_layer, route_layer_handle) =
|
||||
tracing_subscriber::reload::Layer::new(None.with_filter(
|
||||
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
|
||||
));
|
||||
|
||||
actix_web::test::init_service(create_app(
|
||||
self.service.index_scheduler.clone().into(),
|
||||
self.service.auth.clone().into(),
|
||||
self.service.options.clone(),
|
||||
route_layer_handle,
|
||||
analytics::MockAnalytics::new(&self.service.options),
|
||||
true,
|
||||
))
|
||||
|
||||
@@ -7,6 +7,8 @@ use actix_web::test::TestRequest;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch::{analytics, create_app, Opt};
|
||||
use meilisearch_auth::AuthController;
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use crate::common::encoder::Encoder;
|
||||
use crate::common::Value;
|
||||
@@ -105,10 +107,16 @@ impl Service {
|
||||
}
|
||||
|
||||
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
|
||||
let (_route_layer, route_layer_handle) =
|
||||
tracing_subscriber::reload::Layer::new(None.with_filter(
|
||||
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
|
||||
));
|
||||
|
||||
let app = test::init_service(create_app(
|
||||
self.index_scheduler.clone().into(),
|
||||
self.auth.clone().into(),
|
||||
self.options.clone(),
|
||||
route_layer_handle,
|
||||
analytics::MockAnalytics::new(&self.options),
|
||||
true,
|
||||
))
|
||||
|
||||
@@ -1760,6 +1760,181 @@ async fn add_documents_invalid_geo_field() {
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// The three next tests are related to #4333
|
||||
|
||||
// _geo has a lat and lng but set to `null`
|
||||
let documents = json!([
|
||||
{
|
||||
"id": "12",
|
||||
"_geo": { "lng": null, "lat": 67}
|
||||
}
|
||||
]);
|
||||
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"uid": 14,
|
||||
"indexUid": "test",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse longitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// _geo has a lat and lng but set to `null`
|
||||
let documents = json!([
|
||||
{
|
||||
"id": "12",
|
||||
"_geo": { "lng": 35, "lat": null }
|
||||
}
|
||||
]);
|
||||
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"uid": 15,
|
||||
"indexUid": "test",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// _geo has a lat and lng but set to `null`
|
||||
let documents = json!([
|
||||
{
|
||||
"id": "13",
|
||||
"_geo": { "lng": null, "lat": null }
|
||||
}
|
||||
]);
|
||||
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"uid": 16,
|
||||
"indexUid": "test",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `13`. Was expecting finite numbers but instead got `null` and `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
// Related to #4333
|
||||
#[actix_rt::test]
|
||||
async fn add_invalid_geo_and_then_settings() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(Some("id")).await;
|
||||
|
||||
// _geo is not an object
|
||||
let documents = json!([
|
||||
{
|
||||
"id": "11",
|
||||
"_geo": { "lat": null, "lng": null },
|
||||
}
|
||||
]);
|
||||
let (ret, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let ret = index.wait_task(ret.uid()).await;
|
||||
snapshot!(ret, @r###"
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (ret, code) = index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let ret = index.wait_task(ret.uid()).await;
|
||||
snapshot!(ret, @r###"
|
||||
{
|
||||
"uid": 2,
|
||||
"indexUid": "test",
|
||||
"status": "failed",
|
||||
"type": "settingsUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"sortableAttributes": [
|
||||
"_geo"
|
||||
]
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -59,7 +59,7 @@ async fn import_dump_v1_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -220,7 +220,7 @@ async fn import_dump_v1_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -367,7 +367,7 @@ async fn import_dump_v1_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -500,7 +500,7 @@ async fn import_dump_v2_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -645,7 +645,7 @@ async fn import_dump_v2_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -789,7 +789,7 @@ async fn import_dump_v2_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -922,7 +922,7 @@ async fn import_dump_v3_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -1067,7 +1067,7 @@ async fn import_dump_v3_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -1211,7 +1211,7 @@ async fn import_dump_v3_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -1344,7 +1344,7 @@ async fn import_dump_v4_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -1489,7 +1489,7 @@ async fn import_dump_v4_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -1633,7 +1633,7 @@ async fn import_dump_v4_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"proximityPrecision": "byWord",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@@ -1845,11 +1845,10 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1878,7 +1877,7 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": "attributeScale",
|
||||
"proximityPrecision": "byAttribute",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
|
||||
@@ -18,11 +18,10 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -31,11 +30,10 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -44,11 +42,10 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -58,11 +55,10 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -72,11 +68,10 @@ async fn experimental_features() {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
@@ -93,11 +88,10 @@ async fn experimental_feature_metrics() {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": true,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -152,7 +146,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`, `proximityPrecision`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
||||
@@ -5,6 +5,7 @@ mod documents;
|
||||
mod dumps;
|
||||
mod features;
|
||||
mod index;
|
||||
mod logs;
|
||||
mod search;
|
||||
mod settings;
|
||||
mod snapshot;
|
||||
|
||||
182
meilisearch/tests/logs/error.rs
Normal file
182
meilisearch/tests/logs/error.rs
Normal file
@@ -0,0 +1,182 @@
|
||||
use meili_snap::*;
|
||||
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn logs_stream_bad_target() {
|
||||
let server = Server::new().await;
|
||||
|
||||
// Wrong type
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Wrong type
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.target`: expected a string, but found an array: `[]`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Our help message
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// An error from the target parser
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn logs_stream_bad_mode() {
|
||||
let server = Server::new().await;
|
||||
|
||||
// Wrong type
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Wrong type
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Wrong value
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Unknown value `tamo` at `.mode`: expected one of `human`, `profile`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn logs_stream_bad_profile_memory() {
|
||||
let server = Server::new().await;
|
||||
|
||||
// Wrong type
|
||||
let (response, code) =
|
||||
server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Wrong type
|
||||
let (response, code) =
|
||||
server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Used with default parameters
|
||||
let (response, code) =
|
||||
server.service.post("/logs/stream", json!({ "profileMemory": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Human mode.",
|
||||
"code": "invalid_settings_typo_tolerance",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Used with an unsupported mode
|
||||
let (response, code) =
|
||||
server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Unknown value `fmt` at `.mode`: expected one of `human`, `profile`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn logs_stream_without_enabling_the_route() {
|
||||
let server = Server::new().await;
|
||||
|
||||
let (response, code) = server.service.post("/logs/stream", json!({})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = server.service.delete("/logs/stream").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
92
meilisearch/tests/logs/mod.rs
Normal file
92
meilisearch/tests/logs/mod.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
mod error;
|
||||
|
||||
use std::rc::Rc;
|
||||
use std::str::FromStr;
|
||||
|
||||
use actix_web::http::header::ContentType;
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch::{analytics, create_app, Opt};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use crate::common::{default_settings, Server};
|
||||
use crate::json;
|
||||
|
||||
#[actix_web::test]
|
||||
async fn basic_test_log_stream_route() {
|
||||
let db_path = tempfile::tempdir().unwrap();
|
||||
let server = Server::new_with_options(Opt {
|
||||
experimental_enable_logs_route: true,
|
||||
..default_settings(db_path.path())
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (route_layer, route_layer_handle) =
|
||||
tracing_subscriber::reload::Layer::new(None.with_filter(
|
||||
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
|
||||
));
|
||||
|
||||
let subscriber = tracing_subscriber::registry().with(route_layer).with(
|
||||
tracing_subscriber::fmt::layer()
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
|
||||
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("INFO").unwrap()),
|
||||
);
|
||||
|
||||
let app = actix_web::test::init_service(create_app(
|
||||
server.service.index_scheduler.clone().into(),
|
||||
server.service.auth.clone().into(),
|
||||
server.service.options.clone(),
|
||||
route_layer_handle,
|
||||
analytics::MockAnalytics::new(&server.service.options),
|
||||
true,
|
||||
))
|
||||
.await;
|
||||
|
||||
// set the subscriber as the default for the application
|
||||
tracing::subscriber::set_global_default(subscriber).unwrap();
|
||||
|
||||
let app = Rc::new(app);
|
||||
|
||||
// First, we start listening on the `/logs/stream` route
|
||||
let handle_app = app.clone();
|
||||
let handle = tokio::task::spawn_local(async move {
|
||||
let req = actix_web::test::TestRequest::post()
|
||||
.uri("/logs/stream")
|
||||
.insert_header(ContentType::json())
|
||||
.set_payload(
|
||||
serde_json::to_vec(&json!({
|
||||
"mode": "human",
|
||||
"target": "info",
|
||||
}))
|
||||
.unwrap(),
|
||||
);
|
||||
let req = req.to_request();
|
||||
let ret = actix_web::test::call_service(&*handle_app, req).await;
|
||||
actix_web::test::read_body(ret).await
|
||||
});
|
||||
|
||||
// We're going to create an index to get at least one info log saying we processed a batch of task
|
||||
let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await;
|
||||
snapshot!(ret, @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "tamo",
|
||||
"status": "enqueued",
|
||||
"type": "indexCreation",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
server.wait_task(ret.uid()).await;
|
||||
|
||||
let req = actix_web::test::TestRequest::delete().uri("/logs/stream");
|
||||
let req = req.to_request();
|
||||
let ret = actix_web::test::call_service(&*app, req).await;
|
||||
let code = ret.status();
|
||||
snapshot!(code, @"204 No Content");
|
||||
|
||||
let logs = handle.await.unwrap();
|
||||
let logs = String::from_utf8(logs.to_vec()).unwrap();
|
||||
assert!(logs.contains("INFO"), "{logs}");
|
||||
}
|
||||
@@ -105,6 +105,24 @@ async fn more_advanced_facet_search() {
|
||||
snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_facet_search_with_max_values() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await;
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn non_filterable_facet_search_error() {
|
||||
let server = Server::new().await;
|
||||
|
||||
221
meilisearch/tests/search/hybrid.rs
Normal file
221
meilisearch/tests/search/hybrid.rs
Normal file
@@ -0,0 +1,221 @@
|
||||
use meili_snap::snapshot;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::index::Index;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({ "embedders": {"default": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 2}}} ))
|
||||
.await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.add_documents(documents.clone(), None).await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
index
|
||||
}
|
||||
|
||||
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"desc": "a Captain Marvel ersatz",
|
||||
"id": "1",
|
||||
"_vectors": {"default": [1.0, 3.0]},
|
||||
},
|
||||
{
|
||||
"title": "Captain Planet",
|
||||
"desc": "He's not part of the Marvel Cinematic Universe",
|
||||
"id": "2",
|
||||
"_vectors": {"default": [1.0, 2.0]},
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "3",
|
||||
"_vectors": {"default": [2.0, 3.0]},
|
||||
}])
|
||||
});
|
||||
|
||||
static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
|
||||
json!([{
|
||||
"title": "Shazam!",
|
||||
"desc": "a Captain Marvel ersatz",
|
||||
"id": "1",
|
||||
"_vectors": {"default": [1.0, 3.0]},
|
||||
}])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]}}]"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn highlighter() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 0.2},
|
||||
"attributesToHighlight": [
|
||||
"desc"
|
||||
],
|
||||
"highlightPreTag": "**BEGIN**",
|
||||
"highlightPostTag": "**END**"
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 0.8},
|
||||
"attributesToHighlight": [
|
||||
"desc"
|
||||
],
|
||||
"highlightPreTag": "**BEGIN**",
|
||||
"highlightPostTag": "**END**"
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_semanticScore":0.9472136}]"###);
|
||||
|
||||
// no highlighting on full semantic
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 1.0},
|
||||
"attributesToHighlight": [
|
||||
"desc"
|
||||
],
|
||||
"highlightPreTag": "**BEGIN**",
|
||||
"highlightPostTag": "**END**"
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}}]"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn invalid_semantic_ratio() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value at `.hybrid.semanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_search_semantic_ratio",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value at `.hybrid.semanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_search_semantic_ratio",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_get(
|
||||
&yaup::to_string(
|
||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}),
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `hybridSemanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_search_semantic_ratio",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_get(
|
||||
&yaup::to_string(
|
||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}),
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `hybridSemanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_search_semantic_ratio",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn single_document() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
|
||||
)
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
|
||||
}
|
||||
@@ -6,6 +6,7 @@ mod errors;
|
||||
mod facet_search;
|
||||
mod formatted;
|
||||
mod geo;
|
||||
mod hybrid;
|
||||
mod multi;
|
||||
mod pagination;
|
||||
mod restrict_searchable;
|
||||
@@ -20,22 +21,27 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"id": "287947",
|
||||
"_vectors": { "manual": [1, 2, 3]},
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"id": "299537",
|
||||
"_vectors": { "manual": [1, 2, 54] },
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"id": "522681",
|
||||
"_vectors": { "manual": [10, -23, 32] },
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428",
|
||||
"_vectors": { "manual": [-100, 231, 32] },
|
||||
},
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"_vectors": { "manual": [-100, 340, 90] },
|
||||
}
|
||||
])
|
||||
});
|
||||
@@ -57,6 +63,7 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
},
|
||||
],
|
||||
"cattos": "pésti",
|
||||
"_vectors": { "manual": [1, 2, 3]},
|
||||
},
|
||||
{
|
||||
"id": 654,
|
||||
@@ -69,12 +76,14 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
},
|
||||
],
|
||||
"cattos": ["simba", "pestiféré"],
|
||||
"_vectors": { "manual": [1, 2, 54] },
|
||||
},
|
||||
{
|
||||
"id": 750,
|
||||
"father": "romain",
|
||||
"mother": "michelle",
|
||||
"cattos": ["enigma"],
|
||||
"_vectors": { "manual": [10, 23, 32] },
|
||||
},
|
||||
{
|
||||
"id": 951,
|
||||
@@ -91,6 +100,7 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
},
|
||||
],
|
||||
"cattos": ["moumoute", "gomez"],
|
||||
"_vectors": { "manual": [10, 23, 32] },
|
||||
},
|
||||
])
|
||||
});
|
||||
@@ -756,38 +766,14 @@ async fn faceting_max_values_per_facet() {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn experimental_feature_score_details() {
|
||||
async fn test_score_details() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
|
||||
index.add_documents(json!(documents), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": "train dragon",
|
||||
"showRankingScoreDetails": true,
|
||||
}),
|
||||
|response, code| {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Computing score details requires enabling the `score details` experimental feature. See https://github.com/meilisearch/product/discussions/674",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let (response, code) = server.set_features(json!({"scoreDetails": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response["scoreDetails"], @"true");
|
||||
let res = index.add_documents(json!(documents), None).await;
|
||||
index.wait_task(res.0.uid()).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
@@ -802,6 +788,13 @@ async fn experimental_feature_score_details() {
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
]
|
||||
},
|
||||
"_rankingScoreDetails": {
|
||||
"words": {
|
||||
"order": 0,
|
||||
@@ -823,7 +816,7 @@ async fn experimental_feature_score_details() {
|
||||
"order": 3,
|
||||
"attributeRankingOrderScore": 1.0,
|
||||
"queryWordDistanceScore": 0.8095238095238095,
|
||||
"score": 0.9365079365079364
|
||||
"score": 0.9727891156462584
|
||||
},
|
||||
"exactness": {
|
||||
"order": 4,
|
||||
@@ -870,13 +863,100 @@ async fn experimental_feature_vector_store() {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response["vectorStore"], @"true");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(response, @r###"
|
||||
{
|
||||
"taskUid": 1,
|
||||
"indexUid": "test",
|
||||
"status": "enqueued",
|
||||
"type": "settingsUpdate",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["status"]), @"\"succeeded\"");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]");
|
||||
// vector search returns all documents that don't have vectors in the last bucket, like all sorts
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
3
|
||||
]
|
||||
},
|
||||
"_semanticScore": 1.0
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
54
|
||||
]
|
||||
},
|
||||
"_semanticScore": 0.9129112
|
||||
},
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
340,
|
||||
90
|
||||
]
|
||||
},
|
||||
"_semanticScore": 0.8106413
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
]
|
||||
},
|
||||
"_semanticScore": 0.74120104
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
10,
|
||||
-23,
|
||||
32
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[cfg(feature = "default")]
|
||||
@@ -1126,7 +1206,14 @@ async fn simple_search_with_strange_synonyms() {
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428"
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
@@ -1140,7 +1227,14 @@ async fn simple_search_with_strange_synonyms() {
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428"
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
@@ -1154,7 +1248,14 @@ async fn simple_search_with_strange_synonyms() {
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428"
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
|
||||
@@ -72,7 +72,14 @@ async fn simple_search_single_index() {
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465"
|
||||
"id": "450465",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
340,
|
||||
90
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
@@ -86,7 +93,14 @@ async fn simple_search_single_index() {
|
||||
"hits": [
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"id": "299537"
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
54
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "captain",
|
||||
@@ -177,7 +191,14 @@ async fn simple_search_two_indexes() {
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465"
|
||||
"id": "450465",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
340,
|
||||
90
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
@@ -203,7 +224,14 @@ async fn simple_search_two_indexes() {
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
"cattos": "pésti",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
3
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 654,
|
||||
@@ -218,7 +246,14 @@ async fn simple_search_two_indexes() {
|
||||
"cattos": [
|
||||
"simba",
|
||||
"pestiféré"
|
||||
]
|
||||
],
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
54
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "pésti",
|
||||
|
||||
@@ -335,3 +335,35 @@ async fn exactness_ranking_rule_order() {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_on_exact_field() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(
|
||||
&server,
|
||||
&json!([
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"exact": "Captain Marivel",
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marivel",
|
||||
"exact": "Captain the Marvel",
|
||||
"id": "2",
|
||||
}]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let (response, code) =
|
||||
index.update_settings_typo_tolerance(json!({ "disableOnAttributes": ["exact"] })).await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(1).await;
|
||||
// Searching on an exact attribute should only return the document matching without typo.
|
||||
index
|
||||
.search(json!({"q": "Marvel", "attributesToSearchOn": ["exact"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"].as_array().unwrap().len(), @"1");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -83,6 +83,7 @@ async fn get_settings() {
|
||||
"maxTotalHits": 1000,
|
||||
})
|
||||
);
|
||||
assert_eq!(settings["proximityPrecision"], json!("byWord"));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -27,17 +27,6 @@ static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_search() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
@@ -45,7 +34,7 @@ async fn attribute_scale_search() {
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale",
|
||||
"proximityPrecision": "byAttribute",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
@@ -111,17 +100,6 @@ async fn attribute_scale_search() {
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_phrase_search() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
@@ -129,7 +107,7 @@ async fn attribute_scale_phrase_search() {
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale",
|
||||
"proximityPrecision": "byAttribute",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
@@ -190,17 +168,6 @@ async fn attribute_scale_phrase_search() {
|
||||
#[actix_rt::test]
|
||||
async fn word_scale_set_and_reset() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
@@ -209,7 +176,7 @@ async fn word_scale_set_and_reset() {
|
||||
// Set and reset the setting ensuring the swap between the 2 settings is applied.
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale",
|
||||
"proximityPrecision": "byAttribute",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
@@ -217,7 +184,7 @@ async fn word_scale_set_and_reset() {
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "wordScale",
|
||||
"proximityPrecision": "byWord",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
@@ -316,17 +283,6 @@ async fn word_scale_set_and_reset() {
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_default_ranking_rules() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
@@ -334,7 +290,7 @@ async fn attribute_scale_default_ranking_rules() {
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale"
|
||||
"proximityPrecision": "byAttribute"
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
mod errors;
|
||||
mod webhook;
|
||||
|
||||
use meili_snap::insta::assert_json_snapshot;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
|
||||
119
meilisearch/tests/tasks/webhook.rs
Normal file
119
meilisearch/tests/tasks/webhook.rs
Normal file
@@ -0,0 +1,119 @@
|
||||
//! To test the webhook, we need to spawn a new server with a URL listening for
|
||||
//! post requests. The webhook handle starts a server and forwards all the
|
||||
//! received requests into a channel for you to handle.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_http::body::MessageBody;
|
||||
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
||||
use actix_web::web::{Bytes, Data};
|
||||
use actix_web::{post, App, HttpResponse, HttpServer};
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
use tokio::sync::mpsc;
|
||||
use url::Url;
|
||||
|
||||
use crate::common::{default_settings, Server};
|
||||
use crate::json;
|
||||
|
||||
#[post("/")]
|
||||
async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
|
||||
let body = body.to_vec();
|
||||
sender.send(body).unwrap();
|
||||
HttpResponse::Ok().into()
|
||||
}
|
||||
|
||||
fn create_app(
|
||||
sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
|
||||
) -> actix_web::App<
|
||||
impl ServiceFactory<
|
||||
actix_web::dev::ServiceRequest,
|
||||
Config = (),
|
||||
Response = ServiceResponse<impl MessageBody>,
|
||||
Error = actix_web::Error,
|
||||
InitError = (),
|
||||
>,
|
||||
> {
|
||||
App::new().service(forward_body).app_data(Data::from(sender))
|
||||
}
|
||||
|
||||
struct WebhookHandle {
|
||||
pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
|
||||
pub url: String,
|
||||
pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
|
||||
}
|
||||
|
||||
async fn create_webhook_server() -> WebhookHandle {
|
||||
let (sender, receiver) = mpsc::unbounded_channel();
|
||||
let sender = Arc::new(sender);
|
||||
|
||||
// By listening on the port 0, the system will give us any available port.
|
||||
let server =
|
||||
HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
|
||||
let (ip, scheme) = server.addrs_with_scheme()[0];
|
||||
let url = format!("{scheme}://{ip}/");
|
||||
|
||||
let server_handle = tokio::spawn(server.run());
|
||||
WebhookHandle { server_handle, url, receiver }
|
||||
}
|
||||
|
||||
#[actix_web::test]
|
||||
async fn test_basic_webhook() {
|
||||
let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
|
||||
|
||||
let db_path = tempfile::tempdir().unwrap();
|
||||
let server = Server::new_with_options(Opt {
|
||||
task_webhook_url: Some(Url::parse(&url).unwrap()),
|
||||
..default_settings(db_path.path())
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let index = server.index("tamo");
|
||||
// May be flaky: we're relying on the fact that while the first document addition is processed, the other
|
||||
// operations will be received and will be batched together. If it doesn't happen it's not a problem
|
||||
// the rest of the test won't assume anything about the number of tasks per batch.
|
||||
for i in 0..5 {
|
||||
let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
|
||||
}
|
||||
|
||||
let mut nb_tasks = 0;
|
||||
while let Some(payload) = receiver.recv().await {
|
||||
let payload = String::from_utf8(payload).unwrap();
|
||||
let jsonl = payload.split('\n');
|
||||
for json in jsonl {
|
||||
if json.is_empty() {
|
||||
break; // we reached EOF
|
||||
}
|
||||
nb_tasks += 1;
|
||||
let json: serde_json::Value = serde_json::from_str(json).unwrap();
|
||||
snapshot!(
|
||||
json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"indexUid": "tamo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
if nb_tasks == 5 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
|
||||
|
||||
server_handle.abort();
|
||||
}
|
||||
@@ -9,11 +9,11 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.75"
|
||||
clap = { version = "4.2.1", features = ["derive"] }
|
||||
anyhow = "1.0.79"
|
||||
clap = { version = "4.4.17", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
time = { version = "0.3.30", features = ["formatting"] }
|
||||
uuid = { version = "1.5.0", features = ["v4"], default-features = false }
|
||||
time = { version = "0.3.31", features = ["formatting"] }
|
||||
uuid = { version = "1.6.1", features = ["v4"], default-features = false }
|
||||
|
||||
@@ -14,51 +14,53 @@ license.workspace = true
|
||||
[dependencies]
|
||||
bimap = { version = "0.6.3", features = ["serde"] }
|
||||
bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
bstr = "1.9.0"
|
||||
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.8.5", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.6.0"
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
crossbeam-channel = "0.5.11"
|
||||
deserr = "0.6.1"
|
||||
either = { version = "1.9.0", features = ["serde"] }
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.5", default-features = false, features = [
|
||||
"rayon", "tempfile"
|
||||
"rayon",
|
||||
"tempfile",
|
||||
] }
|
||||
heed = { version = "0.20.0-alpha.9", default-features = false, features = [
|
||||
"serde-json", "serde-bincode", "read-txn-no-tls"
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
"read-txn-no-tls",
|
||||
] }
|
||||
indexmap = { version = "2.0.0", features = ["serde"] }
|
||||
instant-distance = { version = "0.6.1", features = ["with-serde"] }
|
||||
indexmap = { version = "2.1.0", features = ["serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memmap2 = "0.7.1"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.17.1"
|
||||
ordered-float = "3.6.0"
|
||||
obkv = "0.2.1"
|
||||
once_cell = "1.19.0"
|
||||
ordered-float = "4.2.0"
|
||||
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
||||
rayon = "1.7.0"
|
||||
roaring = "0.10.1"
|
||||
rayon = "1.8.0"
|
||||
roaring = "0.10.2"
|
||||
rstar = { version = "0.11.0", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
slice-group-by = "0.3.0"
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
slice-group-by = "0.3.1"
|
||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||
smallvec = "1.10.0"
|
||||
smallvec = "1.12.0"
|
||||
smartstring = "1.0.1"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = [
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
uuid = { version = "1.3.1", features = ["v4"] }
|
||||
uuid = { version = "1.6.1", features = ["v4"] }
|
||||
|
||||
filter-parser = { path = "../filter-parser" }
|
||||
|
||||
@@ -69,14 +71,31 @@ itertools = "0.11.0"
|
||||
puffin = "0.16.0"
|
||||
|
||||
# logging
|
||||
log = "0.4.17"
|
||||
logging_timer = "1.1.0"
|
||||
csv = "1.2.1"
|
||||
csv = "1.3.0"
|
||||
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||
candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||
candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = ["onig"] }
|
||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
|
||||
"online",
|
||||
] }
|
||||
tokio = { version = "1.35.1", features = ["rt"] }
|
||||
futures = "0.3.30"
|
||||
reqwest = { version = "0.11.23", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
tiktoken-rs = "0.5.8"
|
||||
liquid = "0.26.4"
|
||||
arroy = "0.2.0"
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.40"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
mimalloc = { version = "0.1.39", default-features = false }
|
||||
big_s = "1.0.2"
|
||||
insta = "1.29.0"
|
||||
insta = "1.34.0"
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.7.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
@@ -110,3 +129,6 @@ greek = ["charabia/greek"]
|
||||
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["charabia/khmer"]
|
||||
|
||||
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
|
||||
cuda = ["candle-core/cuda"]
|
||||
|
||||
@@ -5,8 +5,8 @@ use std::time::Instant;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::{
|
||||
execute_search, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, SearchLogger,
|
||||
TermsMatchingStrategy,
|
||||
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext,
|
||||
SearchLogger, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
#[global_allocator]
|
||||
@@ -49,14 +49,15 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
let start = Instant::now();
|
||||
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let universe = filtered_universe(&ctx, &None)?;
|
||||
|
||||
let docs = execute_search(
|
||||
&mut ctx,
|
||||
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
|
||||
&None,
|
||||
(!query.trim().is_empty()).then(|| query.trim()),
|
||||
TermsMatchingStrategy::Last,
|
||||
milli::score_details::ScoringStrategy::Skip,
|
||||
false,
|
||||
&None,
|
||||
universe,
|
||||
&None,
|
||||
GeoSortStrategy::default(),
|
||||
0,
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
use std::ops;
|
||||
|
||||
use instant_distance::Point;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::normalize_vector;
|
||||
|
||||
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
|
||||
pub struct NDotProductPoint(Vec<f32>);
|
||||
|
||||
impl NDotProductPoint {
|
||||
pub fn new(point: Vec<f32>) -> Self {
|
||||
NDotProductPoint(normalize_vector(point))
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> Vec<f32> {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Deref for NDotProductPoint {
|
||||
type Target = [f32];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.0.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point for NDotProductPoint {
|
||||
fn distance(&self, other: &Self) -> f32 {
|
||||
let dist = 1.0 - dot_product_similarity(&self.0, &other.0);
|
||||
debug_assert!(!dist.is_nan());
|
||||
dist
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the dot product similarity score that will between 0.0 and 1.0
|
||||
/// if both vectors are normalized. The higher the more similar the vectors are.
|
||||
pub fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
a.iter().zip(b).map(|(a, b)| a * b).sum()
|
||||
}
|
||||
@@ -25,6 +25,7 @@ impl<R: io::Read + io::Seek> DocumentsBatchReader<R> {
|
||||
///
|
||||
/// It first retrieves the index, then moves to the first document. Use the `into_cursor`
|
||||
/// method to iterator over the documents, from the first to the last.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
|
||||
pub fn from_reader(reader: R) -> Result<Self, Error> {
|
||||
let reader = grenad::Reader::new(reader)?;
|
||||
let mut cursor = reader.into_cursor()?;
|
||||
|
||||
@@ -61,6 +61,10 @@ pub enum InternalError {
|
||||
AbortedIndexation,
|
||||
#[error("The matching words list contains at least one invalid member.")]
|
||||
InvalidMatchingWords,
|
||||
#[error(transparent)]
|
||||
ArroyError(#[from] arroy::Error),
|
||||
#[error(transparent)]
|
||||
VectorEmbeddingError(#[from] crate::vector::Error),
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
@@ -110,8 +114,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
InvalidGeoField(#[from] GeoError),
|
||||
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
|
||||
InvalidVectorDimensions { expected: usize, found: usize },
|
||||
#[error("The `_vectors` field in the document with the id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
|
||||
InvalidVectorsType { document_id: Value, value: Value },
|
||||
#[error("The `_vectors.{subfield}` field in the document with id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
|
||||
InvalidVectorsType { document_id: Value, value: Value, subfield: String },
|
||||
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
|
||||
InvalidVectorsMapType { document_id: Value, value: Value },
|
||||
#[error("{0}")]
|
||||
InvalidFilter(String),
|
||||
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
|
||||
@@ -166,7 +172,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
TooManyDocumentIds { primary_key: String, document: Object },
|
||||
#[error("The primary key inference failed as the engine did not find any field ending with `id` in its name. Please specify the primary key manually using the `primaryKey` query parameter.")]
|
||||
NoPrimaryKeyCandidateFound,
|
||||
#[error("The primary key inference failed as the engine found {} fields ending with `id` in their names: '{}' and '{}'. Please specify the primary key manually using the `primaryKey` query parameter.", .candidates.len(), .candidates.get(0).unwrap(), .candidates.get(1).unwrap())]
|
||||
#[error("The primary key inference failed as the engine found {} fields ending with `id` in their names: '{}' and '{}'. Please specify the primary key manually using the `primaryKey` query parameter.", .candidates.len(), .candidates.first().unwrap(), .candidates.get(1).unwrap())]
|
||||
MultiplePrimaryKeyCandidatesFound { candidates: Vec<String> },
|
||||
#[error("There is no more space left on the device. Consider increasing the size of the disk/partition.")]
|
||||
NoSpaceLeftOnDevice,
|
||||
@@ -180,6 +186,93 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
UnknownInternalDocumentId { document_id: DocumentId },
|
||||
#[error("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {0}` and twoTypos: {1}`.")]
|
||||
InvalidMinTypoWordLenSetting(u8, u8),
|
||||
#[error(transparent)]
|
||||
VectorEmbeddingError(#[from] crate::vector::Error),
|
||||
#[error(transparent)]
|
||||
MissingDocumentField(#[from] crate::prompt::error::RenderPromptError),
|
||||
#[error(transparent)]
|
||||
InvalidPrompt(#[from] crate::prompt::error::NewPromptError),
|
||||
#[error("`.embedders.{0}.documentTemplate`: Invalid template: {1}.")]
|
||||
InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError),
|
||||
#[error("Too many embedders in the configuration. Found {0}, but limited to 256.")]
|
||||
TooManyEmbedders(usize),
|
||||
#[error("Cannot find embedder with name {0}.")]
|
||||
InvalidEmbedder(String),
|
||||
#[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")]
|
||||
TooManyVectors(String, usize),
|
||||
#[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}` (only available for sources: {}). Available fields: {}",
|
||||
allowed_sources_for_field
|
||||
.iter()
|
||||
.map(|accepted| format!("`{}`", accepted))
|
||||
.collect::<Vec<String>>()
|
||||
.join(", "),
|
||||
allowed_fields_for_source
|
||||
.iter()
|
||||
.map(|accepted| format!("`{}`", accepted))
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ")
|
||||
)]
|
||||
InvalidFieldForSource {
|
||||
embedder_name: String,
|
||||
source_: crate::vector::settings::EmbedderSource,
|
||||
field: &'static str,
|
||||
allowed_fields_for_source: &'static [&'static str],
|
||||
allowed_sources_for_field: &'static [crate::vector::settings::EmbedderSource],
|
||||
},
|
||||
#[error("`.embedders.{embedder_name}.model`: Invalid model `{model}` for OpenAI. Supported models: {:?}", crate::vector::openai::EmbeddingModel::supported_models())]
|
||||
InvalidOpenAiModel { embedder_name: String, model: String },
|
||||
#[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source {source_})")]
|
||||
MissingFieldForSource {
|
||||
field: &'static str,
|
||||
source_: crate::vector::settings::EmbedderSource,
|
||||
embedder_name: String,
|
||||
},
|
||||
#[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its native dimensions of {expected_dimensions}. Found {dimensions}")]
|
||||
InvalidOpenAiModelDimensions {
|
||||
embedder_name: String,
|
||||
model: &'static str,
|
||||
dimensions: usize,
|
||||
expected_dimensions: usize,
|
||||
},
|
||||
#[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its dimensions to a value higher than {max_dimensions}. Found {dimensions}")]
|
||||
InvalidOpenAiModelDimensionsMax {
|
||||
embedder_name: String,
|
||||
model: &'static str,
|
||||
dimensions: usize,
|
||||
max_dimensions: usize,
|
||||
},
|
||||
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
|
||||
InvalidSettingsDimensions { embedder_name: String },
|
||||
}
|
||||
|
||||
impl From<crate::vector::Error> for Error {
|
||||
fn from(value: crate::vector::Error) -> Self {
|
||||
match value.fault() {
|
||||
FaultSource::User => Error::UserError(value.into()),
|
||||
FaultSource::Runtime => Error::InternalError(value.into()),
|
||||
FaultSource::Bug => Error::InternalError(value.into()),
|
||||
FaultSource::Undecided => Error::InternalError(value.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<arroy::Error> for Error {
|
||||
fn from(value: arroy::Error) -> Self {
|
||||
match value {
|
||||
arroy::Error::Heed(heed) => heed.into(),
|
||||
arroy::Error::Io(io) => io.into(),
|
||||
arroy::Error::InvalidVecDimension { expected, received } => {
|
||||
Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
|
||||
}
|
||||
arroy::Error::DatabaseFull
|
||||
| arroy::Error::InvalidItemAppend
|
||||
| arroy::Error::UnmatchingDistance { .. }
|
||||
| arroy::Error::MissingNode
|
||||
| arroy::Error::MissingMetadata => {
|
||||
Error::InternalError(InternalError::ArroyError(value))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
@@ -336,6 +429,26 @@ impl From<HeedError> for Error {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum FaultSource {
|
||||
User,
|
||||
Runtime,
|
||||
Bug,
|
||||
Undecided,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for FaultSource {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let s = match self {
|
||||
FaultSource::User => "user error",
|
||||
FaultSource::Runtime => "runtime error",
|
||||
FaultSource::Bug => "coding error",
|
||||
FaultSource::Undecided => "error",
|
||||
};
|
||||
f.write_str(s)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn conditionally_lookup_for_error_message() {
|
||||
let prefix = "Attribute `name` is not sortable.";
|
||||
|
||||
@@ -10,7 +10,6 @@ use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::distance::NDotProductPoint;
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
@@ -22,7 +21,7 @@ use crate::heed_codec::{
|
||||
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
|
||||
};
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::readable_slices::ReadableSlices;
|
||||
use crate::vector::EmbeddingConfig;
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
|
||||
@@ -30,9 +29,6 @@ use crate::{
|
||||
BEU32, BEU64,
|
||||
};
|
||||
|
||||
/// The HNSW data-structure that we serialize, fill and search in.
|
||||
pub type Hnsw = instant_distance::Hnsw<NDotProductPoint>;
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
|
||||
|
||||
@@ -48,10 +44,6 @@ pub mod main_key {
|
||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
|
||||
pub const GEO_RTREE_KEY: &str = "geo-rtree";
|
||||
/// The prefix of the key that is used to store the, potential big, HNSW structure.
|
||||
/// It is concatenated with a big-endian encoded number (non-human readable).
|
||||
/// e.g. vector-hnsw0x0032.
|
||||
pub const VECTOR_HNSW_KEY_PREFIX: &str = "vector-hnsw";
|
||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
|
||||
pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
|
||||
@@ -74,6 +66,7 @@ pub mod main_key {
|
||||
pub const SORT_FACET_VALUES_BY: &str = "sort-facet-values-by";
|
||||
pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
|
||||
pub const PROXIMITY_PRECISION: &str = "proximity-precision";
|
||||
pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
|
||||
}
|
||||
|
||||
pub mod db_name {
|
||||
@@ -99,7 +92,8 @@ pub mod db_name {
|
||||
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
pub const VECTOR_ID_DOCID: &str = "vector-id-docids";
|
||||
pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
|
||||
pub const VECTOR_ARROY: &str = "vector-arroy";
|
||||
pub const DOCUMENTS: &str = "documents";
|
||||
pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids";
|
||||
}
|
||||
@@ -166,8 +160,10 @@ pub struct Index {
|
||||
/// Maps the document id, the facet field id and the strings.
|
||||
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
|
||||
|
||||
/// Maps a vector id to the document id that have it.
|
||||
pub vector_id_docid: Database<BEU32, BEU32>,
|
||||
/// Maps an embedder name to its id in the arroy store.
|
||||
pub embedder_category_id: Database<Str, U8>,
|
||||
/// Vector store based on arroy™.
|
||||
pub vector_arroy: arroy::Database<arroy::distances::Angular>,
|
||||
|
||||
/// Maps the document id to the document as an obkv store.
|
||||
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
||||
@@ -182,7 +178,7 @@ impl Index {
|
||||
) -> Result<Index> {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(24);
|
||||
options.max_dbs(25);
|
||||
|
||||
let env = options.open(path)?;
|
||||
let mut wtxn = env.write_txn()?;
|
||||
@@ -222,7 +218,11 @@ impl Index {
|
||||
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||
let field_id_docid_facet_strings =
|
||||
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
|
||||
let vector_id_docid = env.create_database(&mut wtxn, Some(VECTOR_ID_DOCID))?;
|
||||
// vector stuff
|
||||
let embedder_category_id =
|
||||
env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
|
||||
let vector_arroy = env.create_database(&mut wtxn, Some(VECTOR_ARROY))?;
|
||||
|
||||
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
@@ -252,7 +252,8 @@ impl Index {
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_id_docid,
|
||||
vector_arroy,
|
||||
embedder_category_id,
|
||||
documents,
|
||||
})
|
||||
}
|
||||
@@ -475,63 +476,6 @@ impl Index {
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/* vector HNSW */
|
||||
|
||||
/// Writes the provided `hnsw`.
|
||||
pub(crate) fn put_vector_hnsw(&self, wtxn: &mut RwTxn, hnsw: &Hnsw) -> heed::Result<()> {
|
||||
// We must delete all the chunks before we write the new HNSW chunks.
|
||||
self.delete_vector_hnsw(wtxn)?;
|
||||
|
||||
let chunk_size = 1024 * 1024 * (1024 + 512); // 1.5 GiB
|
||||
let bytes = bincode::serialize(hnsw).map_err(Into::into).map_err(heed::Error::Encoding)?;
|
||||
for (i, chunk) in bytes.chunks(chunk_size).enumerate() {
|
||||
let i = i as u32;
|
||||
let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec();
|
||||
key.extend_from_slice(&i.to_be_bytes());
|
||||
self.main.remap_types::<Bytes, Bytes>().put(wtxn, &key, chunk)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete the `hnsw`.
|
||||
pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
let mut iter = self
|
||||
.main
|
||||
.remap_types::<Bytes, DecodeIgnore>()
|
||||
.prefix_iter_mut(wtxn, main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes())?;
|
||||
let mut deleted = false;
|
||||
while iter.next().transpose()?.is_some() {
|
||||
// We do not keep a reference to the key or the value.
|
||||
unsafe { deleted |= iter.del_current()? };
|
||||
}
|
||||
Ok(deleted)
|
||||
}
|
||||
|
||||
/// Returns the `hnsw`.
|
||||
pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result<Option<Hnsw>> {
|
||||
let mut slices = Vec::new();
|
||||
for result in self
|
||||
.main
|
||||
.remap_types::<Str, Bytes>()
|
||||
.prefix_iter(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)?
|
||||
{
|
||||
let (_, slice) = result?;
|
||||
slices.push(slice);
|
||||
}
|
||||
|
||||
if slices.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
let readable_slices: ReadableSlices<_> = slices.into_iter().collect();
|
||||
Ok(Some(
|
||||
bincode::deserialize_from(readable_slices)
|
||||
.map_err(Into::into)
|
||||
.map_err(heed::Error::Decoding)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/* field distribution */
|
||||
|
||||
/// Writes the field distribution which associates every field name with
|
||||
@@ -1528,6 +1472,41 @@ impl Index {
|
||||
|
||||
Ok(script_language)
|
||||
}
|
||||
|
||||
pub(crate) fn put_embedding_configs(
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
configs: Vec<(String, EmbeddingConfig)>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeJson<Vec<(String, EmbeddingConfig)>>>().put(
|
||||
wtxn,
|
||||
main_key::EMBEDDING_CONFIGS,
|
||||
&configs,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_embedding_configs(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::EMBEDDING_CONFIGS)
|
||||
}
|
||||
|
||||
pub fn embedding_configs(
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
) -> Result<Vec<(String, crate::vector::EmbeddingConfig)>> {
|
||||
Ok(self
|
||||
.main
|
||||
.remap_types::<Str, SerdeJson<Vec<(String, EmbeddingConfig)>>>()
|
||||
.get(rtxn, main_key::EMBEDDING_CONFIGS)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn default_embedding_name(&self, rtxn: &RoTxn<'_>) -> Result<String> {
|
||||
let configs = self.embedding_configs(rtxn)?;
|
||||
Ok(match configs.as_slice() {
|
||||
[(ref first_name, _)] => first_name.clone(),
|
||||
_ => "default".to_owned(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -10,18 +10,18 @@ pub mod documents;
|
||||
|
||||
mod asc_desc;
|
||||
mod criterion;
|
||||
pub mod distance;
|
||||
mod error;
|
||||
mod external_documents_ids;
|
||||
pub mod facet;
|
||||
mod fields_ids_map;
|
||||
pub mod heed_codec;
|
||||
pub mod index;
|
||||
pub mod prompt;
|
||||
pub mod proximity;
|
||||
mod readable_slices;
|
||||
pub mod score_details;
|
||||
mod search;
|
||||
pub mod update;
|
||||
pub mod vector;
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
@@ -32,13 +32,12 @@ use std::convert::{TryFrom, TryInto};
|
||||
use std::hash::BuildHasherDefault;
|
||||
|
||||
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
||||
pub use distance::dot_product_similarity;
|
||||
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
||||
use fxhash::{FxHasher32, FxHasher64};
|
||||
pub use grenad::CompressionType;
|
||||
pub use search::new::{
|
||||
execute_search, DefaultSearchLogger, GeoSortStrategy, SearchContext, SearchLogger,
|
||||
VisualSearchLogger,
|
||||
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, SearchContext,
|
||||
SearchLogger, VisualSearchLogger,
|
||||
};
|
||||
use serde_json::Value;
|
||||
pub use {charabia as tokenizer, heed};
|
||||
|
||||
97
milli/src/prompt/context.rs
Normal file
97
milli/src/prompt/context.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
use liquid::model::{
|
||||
ArrayView, DisplayCow, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue,
|
||||
};
|
||||
use liquid::{ObjectView, ValueView};
|
||||
|
||||
use super::document::Document;
|
||||
use super::fields::Fields;
|
||||
use crate::FieldsIdsMap;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Context<'a> {
|
||||
document: &'a Document<'a>,
|
||||
fields: Fields<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self {
|
||||
Self { document, fields: Fields::new(document, field_id_map) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ObjectView for Context<'a> {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
2
|
||||
}
|
||||
|
||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||
Box::new(["doc", "fields"].iter().map(|s| KStringCow::from_static(s)))
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(
|
||||
std::iter::once(self.document.as_value())
|
||||
.chain(std::iter::once(self.fields.as_value())),
|
||||
)
|
||||
}
|
||||
|
||||
fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
|
||||
Box::new(self.keys().zip(self.values()))
|
||||
}
|
||||
|
||||
fn contains_key(&self, index: &str) -> bool {
|
||||
index == "doc" || index == "fields"
|
||||
}
|
||||
|
||||
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
||||
match index {
|
||||
"doc" => Some(self.document.as_value()),
|
||||
"fields" => Some(self.fields.as_value()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ValueView for Context<'a> {
|
||||
fn as_debug(&self) -> &dyn std::fmt::Debug {
|
||||
self
|
||||
}
|
||||
|
||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectRender::new(self)))
|
||||
}
|
||||
|
||||
fn source(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectSource::new(self)))
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"object"
|
||||
}
|
||||
|
||||
fn query_state(&self, state: liquid::model::State) -> bool {
|
||||
match state {
|
||||
State::Truthy => true,
|
||||
State::DefaultValue | State::Empty | State::Blank => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
|
||||
let s = ObjectRender::new(self).to_string();
|
||||
KStringCow::from_string(s)
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
LiquidValue::Object(
|
||||
self.iter().map(|(k, x)| (k.to_string().into(), x.to_value())).collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
131
milli/src/prompt/document.rs
Normal file
131
milli/src/prompt/document.rs
Normal file
@@ -0,0 +1,131 @@
|
||||
use std::cell::OnceCell;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use liquid::model::{
|
||||
DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue,
|
||||
};
|
||||
use liquid::{ObjectView, ValueView};
|
||||
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
use crate::FieldsIdsMap;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Document<'a>(BTreeMap<&'a str, (&'a [u8], ParsedValue)>);
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ParsedValue(std::cell::OnceCell<LiquidValue>);
|
||||
|
||||
impl ParsedValue {
|
||||
fn empty() -> ParsedValue {
|
||||
ParsedValue(OnceCell::new())
|
||||
}
|
||||
|
||||
fn get(&self, raw: &[u8]) -> &LiquidValue {
|
||||
self.0.get_or_init(|| {
|
||||
let value: serde_json::Value = serde_json::from_slice(raw).unwrap();
|
||||
liquid::model::to_value(&value).unwrap()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Document<'a> {
|
||||
pub fn new(
|
||||
data: obkv::KvReaderU16<'a>,
|
||||
side: DelAdd,
|
||||
inverted_field_map: &'a FieldsIdsMap,
|
||||
) -> Self {
|
||||
let mut out_data = BTreeMap::new();
|
||||
for (fid, raw) in data {
|
||||
let obkv = KvReaderDelAdd::new(raw);
|
||||
let Some(raw) = obkv.get(side) else {
|
||||
continue;
|
||||
};
|
||||
let Some(name) = inverted_field_map.name(fid) else {
|
||||
continue;
|
||||
};
|
||||
out_data.insert(name, (raw, ParsedValue::empty()));
|
||||
}
|
||||
Self(out_data)
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
fn iter(&self) -> impl Iterator<Item = (KString, LiquidValue)> + '_ {
|
||||
self.0.iter().map(|(&k, (raw, data))| (k.to_owned().into(), data.get(raw).to_owned()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ObjectView for Document<'a> {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
self.len() as i64
|
||||
}
|
||||
|
||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||
let keys = BTreeMap::keys(&self.0).map(|&s| s.into());
|
||||
Box::new(keys)
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(self.0.values().map(|(raw, v)| v.get(raw) as &dyn ValueView))
|
||||
}
|
||||
|
||||
fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
|
||||
Box::new(self.0.iter().map(|(&k, (raw, data))| (k.into(), data.get(raw) as &dyn ValueView)))
|
||||
}
|
||||
|
||||
fn contains_key(&self, index: &str) -> bool {
|
||||
self.0.contains_key(index)
|
||||
}
|
||||
|
||||
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
||||
self.0.get(index).map(|(raw, v)| v.get(raw) as &dyn ValueView)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ValueView for Document<'a> {
|
||||
fn as_debug(&self) -> &dyn std::fmt::Debug {
|
||||
self
|
||||
}
|
||||
|
||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectRender::new(self)))
|
||||
}
|
||||
|
||||
fn source(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectSource::new(self)))
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"object"
|
||||
}
|
||||
|
||||
fn query_state(&self, state: liquid::model::State) -> bool {
|
||||
match state {
|
||||
State::Truthy => true,
|
||||
State::DefaultValue | State::Empty | State::Blank => self.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
|
||||
let s = ObjectRender::new(self).to_string();
|
||||
KStringCow::from_string(s)
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
LiquidValue::Object(self.iter().collect())
|
||||
}
|
||||
|
||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
56
milli/src/prompt/error.rs
Normal file
56
milli/src/prompt/error.rs
Normal file
@@ -0,0 +1,56 @@
|
||||
use crate::error::FaultSource;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("{fault}: {kind}")]
|
||||
pub struct NewPromptError {
|
||||
pub kind: NewPromptErrorKind,
|
||||
pub fault: FaultSource,
|
||||
}
|
||||
|
||||
impl From<NewPromptError> for crate::Error {
|
||||
fn from(value: NewPromptError) -> Self {
|
||||
crate::Error::UserError(crate::UserError::InvalidPrompt(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl NewPromptError {
|
||||
pub(crate) fn cannot_parse_template(inner: liquid::Error) -> NewPromptError {
|
||||
Self { kind: NewPromptErrorKind::CannotParseTemplate(inner), fault: FaultSource::User }
|
||||
}
|
||||
|
||||
pub(crate) fn invalid_fields_in_template(inner: liquid::Error) -> NewPromptError {
|
||||
Self { kind: NewPromptErrorKind::InvalidFieldsInTemplate(inner), fault: FaultSource::User }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum NewPromptErrorKind {
|
||||
#[error("cannot parse template: {0}")]
|
||||
CannotParseTemplate(liquid::Error),
|
||||
#[error("template contains invalid fields: {0}. Only `doc.*`, `fields[i].name`, `fields[i].value` are supported")]
|
||||
InvalidFieldsInTemplate(liquid::Error),
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("{fault}: {kind}")]
|
||||
pub struct RenderPromptError {
|
||||
pub kind: RenderPromptErrorKind,
|
||||
pub fault: FaultSource,
|
||||
}
|
||||
impl RenderPromptError {
|
||||
pub(crate) fn missing_context(inner: liquid::Error) -> RenderPromptError {
|
||||
Self { kind: RenderPromptErrorKind::MissingContext(inner), fault: FaultSource::User }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum RenderPromptErrorKind {
|
||||
#[error("missing field in document: {0}")]
|
||||
MissingContext(liquid::Error),
|
||||
}
|
||||
|
||||
impl From<RenderPromptError> for crate::Error {
|
||||
fn from(value: RenderPromptError) -> Self {
|
||||
crate::Error::UserError(crate::UserError::MissingDocumentField(value))
|
||||
}
|
||||
}
|
||||
172
milli/src/prompt/fields.rs
Normal file
172
milli/src/prompt/fields.rs
Normal file
@@ -0,0 +1,172 @@
|
||||
use liquid::model::{
|
||||
ArrayView, DisplayCow, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue,
|
||||
};
|
||||
use liquid::{ObjectView, ValueView};
|
||||
|
||||
use super::document::Document;
|
||||
use crate::FieldsIdsMap;
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Fields<'a>(Vec<FieldValue<'a>>);
|
||||
|
||||
impl<'a> Fields<'a> {
|
||||
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self {
|
||||
Self(
|
||||
std::iter::repeat(document)
|
||||
.zip(field_id_map.iter())
|
||||
.map(|(document, (_fid, name))| FieldValue { document, name })
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct FieldValue<'a> {
|
||||
name: &'a str,
|
||||
document: &'a Document<'a>,
|
||||
}
|
||||
|
||||
impl<'a> ValueView for FieldValue<'a> {
|
||||
fn as_debug(&self) -> &dyn std::fmt::Debug {
|
||||
self
|
||||
}
|
||||
|
||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectRender::new(self)))
|
||||
}
|
||||
|
||||
fn source(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectSource::new(self)))
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"object"
|
||||
}
|
||||
|
||||
fn query_state(&self, state: liquid::model::State) -> bool {
|
||||
match state {
|
||||
State::Truthy => true,
|
||||
State::DefaultValue | State::Empty | State::Blank => self.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
|
||||
let s = ObjectRender::new(self).to_string();
|
||||
KStringCow::from_string(s)
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
LiquidValue::Object(
|
||||
self.iter().map(|(k, v)| (k.to_string().into(), v.to_value())).collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FieldValue<'a> {
|
||||
pub fn name(&self) -> &&'a str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
pub fn value(&self) -> &dyn ValueView {
|
||||
self.document.get(self.name).unwrap_or(&LiquidValue::Nil)
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.size() == 0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ObjectView for FieldValue<'a> {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
2
|
||||
}
|
||||
|
||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||
Box::new(["name", "value"].iter().map(|&x| KStringCow::from_static(x)))
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(
|
||||
std::iter::once(self.name() as &dyn ValueView).chain(std::iter::once(self.value())),
|
||||
)
|
||||
}
|
||||
|
||||
fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
|
||||
Box::new(self.keys().zip(self.values()))
|
||||
}
|
||||
|
||||
fn contains_key(&self, index: &str) -> bool {
|
||||
index == "name" || index == "value"
|
||||
}
|
||||
|
||||
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
||||
match index {
|
||||
"name" => Some(self.name()),
|
||||
"value" => Some(self.value()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ArrayView for Fields<'a> {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self.0.as_value()
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
self.0.len() as i64
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
self.0.values()
|
||||
}
|
||||
|
||||
fn contains_key(&self, index: i64) -> bool {
|
||||
self.0.contains_key(index)
|
||||
}
|
||||
|
||||
fn get(&self, index: i64) -> Option<&dyn ValueView> {
|
||||
ArrayView::get(&self.0, index)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ValueView for Fields<'a> {
|
||||
fn as_debug(&self) -> &dyn std::fmt::Debug {
|
||||
self
|
||||
}
|
||||
|
||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||
self.0.render()
|
||||
}
|
||||
|
||||
fn source(&self) -> liquid::model::DisplayCow<'_> {
|
||||
self.0.source()
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
self.0.type_name()
|
||||
}
|
||||
|
||||
fn query_state(&self, state: liquid::model::State) -> bool {
|
||||
self.0.query_state(state)
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
|
||||
self.0.to_kstr()
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
self.0.to_value()
|
||||
}
|
||||
|
||||
fn as_array(&self) -> Option<&dyn ArrayView> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
176
milli/src/prompt/mod.rs
Normal file
176
milli/src/prompt/mod.rs
Normal file
@@ -0,0 +1,176 @@
|
||||
mod context;
|
||||
mod document;
|
||||
pub(crate) mod error;
|
||||
mod fields;
|
||||
mod template_checker;
|
||||
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use error::{NewPromptError, RenderPromptError};
|
||||
|
||||
use self::context::Context;
|
||||
use self::document::Document;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::FieldsIdsMap;
|
||||
|
||||
pub struct Prompt {
|
||||
template: liquid::Template,
|
||||
template_text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PromptData {
|
||||
pub template: String,
|
||||
}
|
||||
|
||||
impl From<Prompt> for PromptData {
|
||||
fn from(value: Prompt) -> Self {
|
||||
Self { template: value.template_text }
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<PromptData> for Prompt {
|
||||
type Error = NewPromptError;
|
||||
|
||||
fn try_from(value: PromptData) -> Result<Self, Self::Error> {
|
||||
Prompt::new(value.template)
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Prompt {
|
||||
fn clone(&self) -> Self {
|
||||
let template_text = self.template_text.clone();
|
||||
Self { template: new_template(&template_text).unwrap(), template_text }
|
||||
}
|
||||
}
|
||||
|
||||
fn new_template(text: &str) -> Result<liquid::Template, liquid::Error> {
|
||||
liquid::ParserBuilder::with_stdlib().build().unwrap().parse(text)
|
||||
}
|
||||
|
||||
fn default_template() -> liquid::Template {
|
||||
new_template(default_template_text()).unwrap()
|
||||
}
|
||||
|
||||
fn default_template_text() -> &'static str {
|
||||
"{% for field in fields %} \
|
||||
{{ field.name }}: {{ field.value }}\n\
|
||||
{% endfor %}"
|
||||
}
|
||||
|
||||
impl Default for Prompt {
|
||||
fn default() -> Self {
|
||||
Self { template: default_template(), template_text: default_template_text().into() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PromptData {
|
||||
fn default() -> Self {
|
||||
Self { template: default_template_text().into() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Prompt {
|
||||
pub fn new(template: String) -> Result<Self, NewPromptError> {
|
||||
let this = Self {
|
||||
template: liquid::ParserBuilder::with_stdlib()
|
||||
.build()
|
||||
.unwrap()
|
||||
.parse(&template)
|
||||
.map_err(NewPromptError::cannot_parse_template)?,
|
||||
template_text: template,
|
||||
};
|
||||
|
||||
// render template with special object that's OK with `doc.*` and `fields.*`
|
||||
this.template
|
||||
.render(&template_checker::TemplateChecker)
|
||||
.map_err(NewPromptError::invalid_fields_in_template)?;
|
||||
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
pub fn render(
|
||||
&self,
|
||||
document: obkv::KvReaderU16<'_>,
|
||||
side: DelAdd,
|
||||
field_id_map: &FieldsIdsMap,
|
||||
) -> Result<String, RenderPromptError> {
|
||||
let document = Document::new(document, side, field_id_map);
|
||||
let context = Context::new(&document, field_id_map);
|
||||
|
||||
self.template.render(&context).map_err(RenderPromptError::missing_context)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::Prompt;
|
||||
use crate::error::FaultSource;
|
||||
use crate::prompt::error::{NewPromptError, NewPromptErrorKind};
|
||||
|
||||
#[test]
|
||||
fn default_template() {
|
||||
// does not panic
|
||||
Prompt::default();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_template() {
|
||||
Prompt::new("".into()).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn template_ok() {
|
||||
Prompt::new("{{doc.title}}: {{doc.overview}}".into()).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn template_syntax() {
|
||||
assert!(matches!(
|
||||
Prompt::new("{{doc.title: {{doc.overview}}".into()),
|
||||
Err(NewPromptError {
|
||||
kind: NewPromptErrorKind::CannotParseTemplate(_),
|
||||
fault: FaultSource::User
|
||||
})
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn template_missing_doc() {
|
||||
assert!(matches!(
|
||||
Prompt::new("{{title}}: {{overview}}".into()),
|
||||
Err(NewPromptError {
|
||||
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
||||
fault: FaultSource::User
|
||||
})
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn template_nested_doc() {
|
||||
Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into()).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn template_fields() {
|
||||
Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into()).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn template_fields_ok() {
|
||||
Prompt::new("{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn template_fields_invalid() {
|
||||
assert!(matches!(
|
||||
// intentionally garbled field
|
||||
Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into()),
|
||||
Err(NewPromptError {
|
||||
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
||||
fault: FaultSource::User
|
||||
})
|
||||
));
|
||||
}
|
||||
}
|
||||
301
milli/src/prompt/template_checker.rs
Normal file
301
milli/src/prompt/template_checker.rs
Normal file
@@ -0,0 +1,301 @@
|
||||
use liquid::model::{
|
||||
ArrayView, DisplayCow, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue,
|
||||
};
|
||||
use liquid::{Object, ObjectView, ValueView};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TemplateChecker;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DummyDoc;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DummyFields;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DummyField;
|
||||
|
||||
const DUMMY_VALUE: &LiquidValue = &LiquidValue::Nil;
|
||||
|
||||
impl ObjectView for DummyField {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
2
|
||||
}
|
||||
|
||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||
Box::new(["name", "value"].iter().map(|s| KStringCow::from_static(s)))
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(vec![DUMMY_VALUE.as_view(), DUMMY_VALUE.as_view()].into_iter())
|
||||
}
|
||||
|
||||
fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
|
||||
Box::new(self.keys().zip(self.values()))
|
||||
}
|
||||
|
||||
fn contains_key(&self, index: &str) -> bool {
|
||||
index == "name" || index == "value"
|
||||
}
|
||||
|
||||
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
||||
if self.contains_key(index) {
|
||||
Some(DUMMY_VALUE.as_view())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ValueView for DummyField {
|
||||
fn as_debug(&self) -> &dyn std::fmt::Debug {
|
||||
self
|
||||
}
|
||||
|
||||
fn render(&self) -> DisplayCow<'_> {
|
||||
DUMMY_VALUE.render()
|
||||
}
|
||||
|
||||
fn source(&self) -> DisplayCow<'_> {
|
||||
DUMMY_VALUE.source()
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"object"
|
||||
}
|
||||
|
||||
fn query_state(&self, state: State) -> bool {
|
||||
match state {
|
||||
State::Truthy => true,
|
||||
State::DefaultValue => false,
|
||||
State::Empty => false,
|
||||
State::Blank => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> KStringCow<'_> {
|
||||
DUMMY_VALUE.to_kstr()
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
let mut this = Object::new();
|
||||
this.insert("name".into(), LiquidValue::Nil);
|
||||
this.insert("value".into(), LiquidValue::Nil);
|
||||
LiquidValue::Object(this)
|
||||
}
|
||||
|
||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ValueView for DummyFields {
|
||||
fn as_debug(&self) -> &dyn std::fmt::Debug {
|
||||
self
|
||||
}
|
||||
|
||||
fn render(&self) -> DisplayCow<'_> {
|
||||
DUMMY_VALUE.render()
|
||||
}
|
||||
|
||||
fn source(&self) -> DisplayCow<'_> {
|
||||
DUMMY_VALUE.source()
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"array"
|
||||
}
|
||||
|
||||
fn query_state(&self, state: State) -> bool {
|
||||
match state {
|
||||
State::Truthy => true,
|
||||
State::DefaultValue => false,
|
||||
State::Empty => false,
|
||||
State::Blank => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> KStringCow<'_> {
|
||||
DUMMY_VALUE.to_kstr()
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
LiquidValue::Array(vec![DummyField.to_value()])
|
||||
}
|
||||
|
||||
fn as_array(&self) -> Option<&dyn ArrayView> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ArrayView for DummyFields {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
u16::MAX as i64
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(std::iter::once(DummyField.as_value()))
|
||||
}
|
||||
|
||||
fn contains_key(&self, index: i64) -> bool {
|
||||
index < self.size()
|
||||
}
|
||||
|
||||
fn get(&self, _index: i64) -> Option<&dyn ValueView> {
|
||||
Some(DummyField.as_value())
|
||||
}
|
||||
}
|
||||
|
||||
impl ObjectView for DummyDoc {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
1000
|
||||
}
|
||||
|
||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||
Box::new(std::iter::empty())
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(std::iter::empty())
|
||||
}
|
||||
|
||||
fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
|
||||
Box::new(std::iter::empty())
|
||||
}
|
||||
|
||||
fn contains_key(&self, _index: &str) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn get<'s>(&'s self, _index: &str) -> Option<&'s dyn ValueView> {
|
||||
// Recursively sends itself
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ValueView for DummyDoc {
|
||||
fn as_debug(&self) -> &dyn std::fmt::Debug {
|
||||
self
|
||||
}
|
||||
|
||||
fn render(&self) -> DisplayCow<'_> {
|
||||
DUMMY_VALUE.render()
|
||||
}
|
||||
|
||||
fn source(&self) -> DisplayCow<'_> {
|
||||
DUMMY_VALUE.source()
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"object"
|
||||
}
|
||||
|
||||
fn query_state(&self, state: State) -> bool {
|
||||
match state {
|
||||
State::Truthy => true,
|
||||
State::DefaultValue => false,
|
||||
State::Empty => false,
|
||||
State::Blank => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> KStringCow<'_> {
|
||||
DUMMY_VALUE.to_kstr()
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
LiquidValue::Nil
|
||||
}
|
||||
|
||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ObjectView for TemplateChecker {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
2
|
||||
}
|
||||
|
||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||
Box::new(["doc", "fields"].iter().map(|s| KStringCow::from_static(s)))
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(
|
||||
std::iter::once(DummyDoc.as_value()).chain(std::iter::once(DummyFields.as_value())),
|
||||
)
|
||||
}
|
||||
|
||||
fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
|
||||
Box::new(self.keys().zip(self.values()))
|
||||
}
|
||||
|
||||
fn contains_key(&self, index: &str) -> bool {
|
||||
index == "doc" || index == "fields"
|
||||
}
|
||||
|
||||
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
||||
match index {
|
||||
"doc" => Some(DummyDoc.as_value()),
|
||||
"fields" => Some(DummyFields.as_value()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ValueView for TemplateChecker {
|
||||
fn as_debug(&self) -> &dyn std::fmt::Debug {
|
||||
self
|
||||
}
|
||||
|
||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectRender::new(self)))
|
||||
}
|
||||
|
||||
fn source(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectSource::new(self)))
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"object"
|
||||
}
|
||||
|
||||
fn query_state(&self, state: liquid::model::State) -> bool {
|
||||
match state {
|
||||
State::Truthy => true,
|
||||
State::DefaultValue | State::Empty | State::Blank => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
|
||||
let s = ObjectRender::new(self).to_string();
|
||||
KStringCow::from_string(s)
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
LiquidValue::Object(
|
||||
self.iter().map(|(k, x)| (k.to_string().into(), x.to_value())).collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
@@ -32,6 +32,6 @@ pub fn path_proximity(path: &[Position]) -> u32 {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum ProximityPrecision {
|
||||
#[default]
|
||||
WordScale,
|
||||
AttributeScale,
|
||||
ByWord,
|
||||
ByAttribute,
|
||||
}
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
use std::io::{self, Read};
|
||||
use std::iter::FromIterator;
|
||||
|
||||
pub struct ReadableSlices<A> {
|
||||
inner: Vec<A>,
|
||||
pos: u64,
|
||||
}
|
||||
|
||||
impl<A> FromIterator<A> for ReadableSlices<A> {
|
||||
fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self {
|
||||
ReadableSlices { inner: iter.into_iter().collect(), pos: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: AsRef<[u8]>> Read for ReadableSlices<A> {
|
||||
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
|
||||
let original_buf_len = buf.len();
|
||||
|
||||
// We explore the list of slices to find the one where we must start reading.
|
||||
let mut pos = self.pos;
|
||||
let index = match self
|
||||
.inner
|
||||
.iter()
|
||||
.map(|s| s.as_ref().len() as u64)
|
||||
.position(|size| pos.checked_sub(size).map(|p| pos = p).is_none())
|
||||
{
|
||||
Some(index) => index,
|
||||
None => return Ok(0),
|
||||
};
|
||||
|
||||
let mut inner_pos = pos as usize;
|
||||
for slice in &self.inner[index..] {
|
||||
let slice = &slice.as_ref()[inner_pos..];
|
||||
|
||||
if buf.len() > slice.len() {
|
||||
// We must exhaust the current slice and go to the next one there is not enough here.
|
||||
buf[..slice.len()].copy_from_slice(slice);
|
||||
buf = &mut buf[slice.len()..];
|
||||
inner_pos = 0;
|
||||
} else {
|
||||
// There is enough in this slice to fill the remaining bytes of the buffer.
|
||||
// Let's break just after filling it.
|
||||
buf.copy_from_slice(&slice[..buf.len()]);
|
||||
buf = &mut [];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let written = original_buf_len - buf.len();
|
||||
self.pos += written as u64;
|
||||
Ok(written)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::io::Read;
|
||||
|
||||
use super::ReadableSlices;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let data: Vec<_> = (0..100).collect();
|
||||
let splits: Vec<_> = data.chunks(3).collect();
|
||||
let mut rdslices: ReadableSlices<_> = splits.into_iter().collect();
|
||||
|
||||
let mut output = Vec::new();
|
||||
let length = rdslices.read_to_end(&mut output).unwrap();
|
||||
assert_eq!(length, data.len());
|
||||
assert_eq!(output, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn small_reads() {
|
||||
let data: Vec<_> = (0..u8::MAX).collect();
|
||||
let splits: Vec<_> = data.chunks(27).collect();
|
||||
let mut rdslices: ReadableSlices<_> = splits.into_iter().collect();
|
||||
|
||||
let buffer = &mut [0; 45];
|
||||
let length = rdslices.read(buffer).unwrap();
|
||||
let expected: Vec<_> = (0..buffer.len() as u8).collect();
|
||||
assert_eq!(length, buffer.len());
|
||||
assert_eq!(buffer, &expected[..]);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,6 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use itertools::Itertools;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::distance_between_two_points;
|
||||
@@ -12,9 +15,24 @@ pub enum ScoreDetails {
|
||||
ExactAttribute(ExactAttribute),
|
||||
ExactWords(ExactWords),
|
||||
Sort(Sort),
|
||||
Vector(Vector),
|
||||
GeoSort(GeoSort),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum ScoreValue<'a> {
|
||||
Score(f64),
|
||||
Sort(&'a Sort),
|
||||
GeoSort(&'a GeoSort),
|
||||
}
|
||||
|
||||
enum RankOrValue<'a> {
|
||||
Rank(Rank),
|
||||
Sort(&'a Sort),
|
||||
GeoSort(&'a GeoSort),
|
||||
Score(f64),
|
||||
}
|
||||
|
||||
impl ScoreDetails {
|
||||
pub fn local_score(&self) -> Option<f64> {
|
||||
self.rank().map(Rank::local_score)
|
||||
@@ -31,11 +49,55 @@ impl ScoreDetails {
|
||||
ScoreDetails::ExactWords(details) => Some(details.rank()),
|
||||
ScoreDetails::Sort(_) => None,
|
||||
ScoreDetails::GeoSort(_) => None,
|
||||
ScoreDetails::Vector(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn global_score<'a>(details: impl Iterator<Item = &'a Self>) -> f64 {
|
||||
Rank::global_score(details.filter_map(Self::rank))
|
||||
pub fn global_score<'a>(details: impl Iterator<Item = &'a Self> + 'a) -> f64 {
|
||||
Self::score_values(details)
|
||||
.find_map(|x| {
|
||||
let ScoreValue::Score(score) = x else {
|
||||
return None;
|
||||
};
|
||||
Some(score)
|
||||
})
|
||||
.unwrap_or(1.0f64)
|
||||
}
|
||||
|
||||
pub fn score_values<'a>(
|
||||
details: impl Iterator<Item = &'a Self> + 'a,
|
||||
) -> impl Iterator<Item = ScoreValue<'a>> + 'a {
|
||||
details
|
||||
.map(ScoreDetails::rank_or_value)
|
||||
.coalesce(|left, right| match (left, right) {
|
||||
(RankOrValue::Rank(left), RankOrValue::Rank(right)) => {
|
||||
Ok(RankOrValue::Rank(Rank::merge(left, right)))
|
||||
}
|
||||
(left, right) => Err((left, right)),
|
||||
})
|
||||
.map(|rank_or_value| match rank_or_value {
|
||||
RankOrValue::Rank(r) => ScoreValue::Score(r.local_score()),
|
||||
RankOrValue::Sort(s) => ScoreValue::Sort(s),
|
||||
RankOrValue::GeoSort(g) => ScoreValue::GeoSort(g),
|
||||
RankOrValue::Score(s) => ScoreValue::Score(s),
|
||||
})
|
||||
}
|
||||
|
||||
fn rank_or_value(&self) -> RankOrValue<'_> {
|
||||
match self {
|
||||
ScoreDetails::Words(w) => RankOrValue::Rank(w.rank()),
|
||||
ScoreDetails::Typo(t) => RankOrValue::Rank(t.rank()),
|
||||
ScoreDetails::Proximity(p) => RankOrValue::Rank(*p),
|
||||
ScoreDetails::Fid(f) => RankOrValue::Rank(*f),
|
||||
ScoreDetails::Position(p) => RankOrValue::Rank(*p),
|
||||
ScoreDetails::ExactAttribute(e) => RankOrValue::Rank(e.rank()),
|
||||
ScoreDetails::ExactWords(e) => RankOrValue::Rank(e.rank()),
|
||||
ScoreDetails::Sort(sort) => RankOrValue::Sort(sort),
|
||||
ScoreDetails::GeoSort(geosort) => RankOrValue::GeoSort(geosort),
|
||||
ScoreDetails::Vector(vector) => RankOrValue::Score(
|
||||
vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Panics
|
||||
@@ -181,6 +243,19 @@ impl ScoreDetails {
|
||||
details_map.insert(sort, sort_details);
|
||||
order += 1;
|
||||
}
|
||||
ScoreDetails::Vector(s) => {
|
||||
let vector = format!("vectorSort({:?})", s.target_vector);
|
||||
let value = s.value_similarity.as_ref().map(|(v, _)| v);
|
||||
let similarity = s.value_similarity.as_ref().map(|(_, s)| s);
|
||||
|
||||
let details = serde_json::json!({
|
||||
"order": order,
|
||||
"value": value,
|
||||
"similarity": similarity,
|
||||
});
|
||||
details_map.insert(vector, details);
|
||||
order += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
details_map
|
||||
@@ -297,15 +372,21 @@ impl Rank {
|
||||
pub fn global_score(details: impl Iterator<Item = Self>) -> f64 {
|
||||
let mut rank = Rank { rank: 1, max_rank: 1 };
|
||||
for inner_rank in details {
|
||||
rank.rank -= 1;
|
||||
|
||||
rank.rank *= inner_rank.max_rank;
|
||||
rank.max_rank *= inner_rank.max_rank;
|
||||
|
||||
rank.rank += inner_rank.rank;
|
||||
rank = Rank::merge(rank, inner_rank);
|
||||
}
|
||||
rank.local_score()
|
||||
}
|
||||
|
||||
pub fn merge(mut outer: Rank, inner: Rank) -> Rank {
|
||||
outer.rank = outer.rank.saturating_sub(1);
|
||||
|
||||
outer.rank *= inner.max_rank;
|
||||
outer.max_rank *= inner.max_rank;
|
||||
|
||||
outer.rank += inner.rank;
|
||||
|
||||
outer
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
|
||||
@@ -335,13 +416,78 @@ pub struct Sort {
|
||||
pub value: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
|
||||
impl PartialOrd for Sort {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
if self.field_name != other.field_name {
|
||||
return None;
|
||||
}
|
||||
if self.ascending != other.ascending {
|
||||
return None;
|
||||
}
|
||||
match (&self.value, &other.value) {
|
||||
(serde_json::Value::Null, serde_json::Value::Null) => Some(Ordering::Equal),
|
||||
(serde_json::Value::Null, _) => Some(Ordering::Less),
|
||||
(_, serde_json::Value::Null) => Some(Ordering::Greater),
|
||||
// numbers are always before strings
|
||||
(serde_json::Value::Number(_), serde_json::Value::String(_)) => Some(Ordering::Greater),
|
||||
(serde_json::Value::String(_), serde_json::Value::Number(_)) => Some(Ordering::Less),
|
||||
(serde_json::Value::Number(left), serde_json::Value::Number(right)) => {
|
||||
// FIXME: unwrap permitted here?
|
||||
let order = left.as_f64().unwrap().partial_cmp(&right.as_f64().unwrap())?;
|
||||
// 12 < 42, and when ascending, we want to see 12 first, so the smallest.
|
||||
// Hence, when ascending, smaller is better
|
||||
Some(if self.ascending { order.reverse() } else { order })
|
||||
}
|
||||
(serde_json::Value::String(left), serde_json::Value::String(right)) => {
|
||||
let order = left.cmp(right);
|
||||
// Taking e.g. "a" and "z"
|
||||
// "a" < "z", and when ascending, we want to see "a" first, so the smallest.
|
||||
// Hence, when ascending, smaller is better
|
||||
Some(if self.ascending { order.reverse() } else { order })
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct GeoSort {
|
||||
pub target_point: [f64; 2],
|
||||
pub ascending: bool,
|
||||
pub value: Option<[f64; 2]>,
|
||||
}
|
||||
|
||||
impl PartialOrd for GeoSort {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
if self.target_point != other.target_point {
|
||||
return None;
|
||||
}
|
||||
if self.ascending != other.ascending {
|
||||
return None;
|
||||
}
|
||||
Some(match (self.distance(), other.distance()) {
|
||||
(None, None) => Ordering::Equal,
|
||||
(None, Some(_)) => Ordering::Less,
|
||||
(Some(_), None) => Ordering::Greater,
|
||||
(Some(left), Some(right)) => {
|
||||
let order = left.partial_cmp(&right)?;
|
||||
if self.ascending {
|
||||
// when ascending, the one with the smallest distance has the best score
|
||||
order.reverse()
|
||||
} else {
|
||||
order
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd)]
|
||||
pub struct Vector {
|
||||
pub target_vector: Vec<f32>,
|
||||
pub value_similarity: Option<(Vec<f32>, f32)>,
|
||||
}
|
||||
|
||||
impl GeoSort {
|
||||
pub fn distance(&self) -> Option<f64> {
|
||||
self.value.map(|value| distance_between_two_points(&self.target_point, &value))
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user