Compare commits

..

10 Commits

Author SHA1 Message Date
fa27bf3513 fix clippy 2023-11-29 15:04:41 +01:00
09ebf7428b stream and chunk the data 2023-11-29 14:27:50 +01:00
76e0248cdb gzip the tasks 2023-11-29 13:09:04 +01:00
7f0abaf582 parse the url correctly 2023-11-28 16:28:11 +01:00
6f135457f8 update and fix the test 2023-11-28 15:55:48 +01:00
41f3a30b0b return a task view instead of a task 2023-11-28 15:08:13 +01:00
9090e0fe9d add a first working test with actixweb 2023-11-28 14:47:07 +01:00
b3098b9d9a start writing a test with actix but it doesn't works 2023-11-28 14:01:44 +01:00
9b1de777de Implement the webhook 2023-11-28 11:40:09 +01:00
00f0711207 add the option 2023-11-27 15:22:44 +01:00
303 changed files with 6594 additions and 30675 deletions

View File

@ -1,2 +0,0 @@
[alias]
xtask = "run --release --package xtask --"

View File

@ -2,13 +2,14 @@
name: New sprint issue
about: ⚠️ Should only be used by the engine team ⚠️
title: ''
labels: 'missing usage in PRD, impacts docs'
labels: ''
assignees: ''
---
Related product team resources: [PRD]() (_internal only_)
Related product discussion:
Related spec: WIP
## Motivation
@ -20,25 +21,12 @@ Related product discussion:
## TODO
<!---If necessary, create a list with technical/product steps-->
<!---Feel free to adapt this list with more technical/product steps-->
### Reminders when modifying the Setting API
<!--- Special steps to remind when adding a new index setting -->
- [ ] Ensure the new setting route is at least tested by the [`test_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/tests/settings/get_settings.rs#L276)
- [ ] Ensure Analytics are fully implemented
- [ ] `/settings/my-new-setting` configurated in the [`make_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L141-L165)
- [ ] global `/settings` route configurated in the [`update_all` function](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L655-L751)
- [ ] Ensure the dump serializing is consistent with the `/settings` route serializing, e.g., enums case can be different (`camelCase` in route and `PascalCase` in the dump)
#### Special cases when adding a setting for an experimental feature
- [ ] ⚠️ API stability: The setting does not appear on the main settings route when the feature has never been enabled (e.g. mark it `Unset` when returned from the index in this situation. See [an example](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch-types/src/settings.rs#L608))
- [ ] The setting cannot be set when the feature is disabled, either by the main settings route or the subroute (see [`validate_settings` function](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch/src/routes/indexes/settings.rs#L811))
- [ ] If possible, the setting is reset when the feature is disabled (hard if it requires reindexing)
- [ ] Release a prototype
- [ ] If prototype validated, merge changes into `main`
- [ ] Update the spec
## Impacted teams
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
<!---@meilisearch/docs-team when there is any API change, e.g. settings addition-->

View File

@ -1,30 +0,0 @@
name: Bench (manual)
on:
workflow_dispatch:
inputs:
workload:
description: 'The path to the workloads to execute (workloads/...)'
required: true
default: 'workloads/movies.json'
env:
WORKLOAD_NAME: ${{ github.event.inputs.workload }}
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
run: |
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Manual [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- ${WORKLOAD_NAME}

View File

@ -1,53 +0,0 @@
name: Bench (PR)
on:
issue_comment:
types: [created]
permissions:
issues: write
env:
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
run-benchmarks-on-comment:
if: startsWith(github.event.comment.body, '/bench')
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 180 # 3h
steps:
- name: Check for Command
id: command
uses: xt0rted/slash-command-action@v2
with:
command: bench
reaction-type: "rocket"
repo-token: ${{ env.GH_TOKEN }}
- uses: xt0rted/pull-request-comment-branch@v2
id: comment-branch
with:
repo_token: ${{ env.GH_TOKEN }}
- uses: actions/checkout@v3
if: success()
with:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run benchmarks on PR ${{ github.event.issue.id }}
run: |
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" \
--dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" \
--reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" \
-- ${{ steps.command.outputs.command-arguments }} > benchlinks.txt
- name: Send comment in PR
run: |
gh pr comment ${{github.event.issue.number}} --body-file benchlinks.txt

View File

@ -1,25 +0,0 @@
name: Indexing bench (push)
on:
push:
branches:
- main
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
run: |
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Push on `main` [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- workloads/*.json

View File

@ -110,44 +110,6 @@ jobs:
--milestone $MILESTONE_VERSION \
--assignee curquiza
create-update-version-issue:
needs: get-release-version
# Create the update-version issue even if the release is a patch release
if: github.event.action == 'created'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Update version in Cargo.toml for $MILESTONE_VERSION" \
--label 'maintenance' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
create-update-openapi-issue:
needs: get-release-version
# Create the openAPI issue if the release is not only a patch release
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-openapi-issue.md > $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Update Open API file for $MILESTONE_VERSION" \
--label 'maintenance' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
# ----------------
# MILESTONE CLOSED
# ----------------

View File

@ -97,7 +97,7 @@ jobs:
- name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@v3
uses: peter-evans/repository-dispatch@v2
with:
token: ${{ secrets.MEILI_BOT_GH_PAT }}
repository: meilisearch/meilisearch-cloud

View File

@ -22,7 +22,7 @@ jobs:
outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Define the Docker image we need to use
id: define-image
run: |
@ -46,11 +46,11 @@ jobs:
MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core
uses: actions/setup-dotnet@v4
uses: actions/setup-dotnet@v3
with:
dotnet-version: "6.0.x"
- name: Install dependencies
@ -75,12 +75,12 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dart
- uses: dart-lang/setup-dart@v1
with:
sdk: 'latest'
sdk: 3.1.1
- name: Install dependencies
run: dart pub get
- name: Run integration tests
@ -100,10 +100,10 @@ jobs:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version: stable
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-go
- name: Get dependencies
@ -129,11 +129,11 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-java
- name: Set up Java
uses: actions/setup-java@v4
uses: actions/setup-java@v3
with:
java-version: 8
distribution: 'zulu'
@ -156,7 +156,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js
- name: Setup node
@ -191,7 +191,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-php
- name: Install PHP
@ -220,11 +220,11 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-python
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v4
- name: Install pipenv
uses: dschep/install-pipenv-action@v1
- name: Install dependencies
@ -245,7 +245,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-ruby
- name: Set up Ruby 3
@ -270,7 +270,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rust
- name: Build
@ -291,7 +291,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-swift
- name: Run tests
@ -314,7 +314,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js-plugins
- name: Setup node
@ -345,7 +345,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rails
- name: Set up Ruby 3
@ -369,7 +369,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-symfony
- name: Install PHP

View File

@ -31,10 +31,17 @@ jobs:
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Setup test with Rust stable
if: github.event_name != 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Setup test with Rust nightly
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
- name: Run cargo check without any default features
@ -56,36 +63,14 @@ jobs:
matrix:
os: [macos-12, windows-2022]
steps:
- name: Check free disk space on C
run: |
fsutil volume diskfree c:
- name: Check free disk space on D
run: |
fsutil volume diskfree d:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Check free disk space on C
run: |
fsutil volume diskfree c:
- name: Check free disk space on D
run: |
fsutil volume diskfree d:
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
- name: Check free disk space on C
run: |
fsutil volume diskfree c:
- name: Check free disk space on D
run: |
fsutil volume diskfree d:
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
@ -93,7 +78,7 @@ jobs:
args: --locked --release --all
test-all-features:
name: Tests almost all features
name: Tests all features
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
@ -109,12 +94,16 @@ jobs:
with:
toolchain: stable
override: true
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
- name: Run cargo test with almost all features
run: |
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
- name: Run cargo build with all features
uses: actions-rs/cargo@v1
with:
command: build
args: --workspace --locked --release --all-features
- name: Run cargo test with all features
uses: actions-rs/cargo@v1
with:
command: test
args: --workspace --locked --release --all-features
test-disabled-tokenization:
name: Test disabled tokenization
@ -175,7 +164,7 @@ jobs:
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: 1.75.0
toolchain: 1.71.1
override: true
components: clippy
- name: Cache dependencies

2
.gitignore vendored
View File

@ -9,8 +9,6 @@
/data.ms
/snapshots
/dumps
/bench
/_xtask_benchmark.ms
# Snapshots
## ... large

View File

@ -1,365 +0,0 @@
# Benchmarks
Currently this repository hosts two kinds of benchmarks:
1. The older "milli benchmarks", that use [criterion](https://github.com/bheisler/criterion.rs) and live in the "benchmarks" directory.
2. The newer "bench" that are workload-based and so split between the [`workloads`](./workloads/) directory and the [`xtask::bench`](./xtask/src/bench/) module.
This document describes the newer "bench" benchmarks. For more details on the "milli benchmarks", see [benchmarks/README.md](./benchmarks/README.md).
## Design philosophy for the benchmarks
The newer "bench" benchmarks are **integration** benchmarks, in the sense that they spawn an actual Meilisearch server and measure its performance end-to-end, including HTTP request overhead.
Since this is prone to fluctuating, the benchmarks regain a bit of precision by measuring the runtime of the individual spans using the [logging machinery](./CONTRIBUTING.md#logging) of Meilisearch.
A span roughly translates to a function call. The benchmark runner collects all the spans by name using the [logs route](https://github.com/orgs/meilisearch/discussions/721) and sums their runtime. The processed results are then sent to the [benchmark dashboard](https://bench.meilisearch.dev), which is in charge of storing and presenting the data.
## Running the benchmarks
Benchmarks can run locally or in CI.
### Locally
#### With a local benchmark dashboard
The benchmarks dashboard lives in its [own repository](https://github.com/meilisearch/benchboard). We provide binaries for Ubuntu/Debian, but you can build from source for other platforms (MacOS should work as it was developed under that platform).
Run the `benchboard` binary to create a fresh database of results. By default it will serve the results and the API to gather results on `http://localhost:9001`.
From the Meilisearch repository, you can then run benchmarks with:
```sh
cargo xtask bench -- workloads/my_workload_1.json ..
```
This command will build and run Meilisearch locally on port 7700, so make sure that this port is available.
To run benchmarks on a different commit, just use the usual git command to get back to the desired commit.
#### Without a local benchmark dashboard
To work with the raw results, you can also skip using a local benchmark dashboard.
Run:
```sh
cargo xtask bench --no-dashboard -- workloads/my_workload_1.json workloads/my_workload_2.json ..
```
For processing the results, look at [Looking at benchmark results/Without dashboard](#without-dashboard).
### In CI
We have dedicated runners to run workloads on CI. Currently, there are three ways of running the CI:
1. Automatically, on every push to `main`.
2. Manually, by clicking the [`Run workflow`](https://github.com/meilisearch/meilisearch/actions/workflows/bench-manual.yml) button and specifying the target reference (tag, commit or branch) as well as one or multiple workloads to run. The workloads must exist in the Meilisearch repository (conventionally, in the [`workloads`](./workloads/) directory) on the target reference. Globbing (e.g., `workloads/*.json`) works.
3. Manually on a PR, by posting a comment containing a `/bench` command, followed by one or multiple workloads to run. Globbing works. The workloads must exist in the Meilisearch repository in the branch of the PR.
```
/bench workloads/movies*.json /hackernews_1M.json
```
## Looking at benchmark results
### On the dashboard
Results are available on the global dashboard used by CI at <https://bench.meilisearch.dev> or on your [local dashboard](#with-a-local-benchmark-dashboard).
The dashboard homepage presents three sections:
1. The latest invocations (a call to `cargo xtask bench`, either local or by CI) with their reason (generally set to some helpful link in CI) and their status.
2. The latest workloads ran on `main`.
3. The latest workloads ran on other references.
By default, the workload shows the total runtime delta with the latest applicable commit on `main`. The latest applicable commit is the latest commit for workload invocations that do not originate on `main`, and the latest previous commit for workload invocations that originate on `main`.
You can explicitly request a detailed comparison by span with the `main` branch, the branch or origin, or any previous commit, by clicking the links at the bottom of the workload invocation.
In the detailed comparison view, the spans are sorted by improvements, regressions, stable (no statistically significant change) and unstable (the span runtime is comparable to its standard deviation).
You can click on the name of any span to get a box plot comparing the target commit with multiple commits of the selected branch.
### Without dashboard
After the workloads are done running, the reports will live in the Meilisearch repository, in the `bench/reports` directory (by default).
You can then convert these reports into other formats.
- To [Firefox profiler](https://profiler.firefox.com) format. Run:
```sh
cd bench/reports
cargo run --release --bin trace-to-firefox -- my_workload_1-0-trace.json
```
You can then upload the resulting `firefox-my_workload_1-0-trace.json` file to the online profiler.
## Designing benchmark workloads
Benchmark workloads conventionally live in the `workloads` directory of the Meilisearch repository.
They are JSON files with the following structure (comments are not actually supported, to make your own, remove them or copy some existing workload file):
```jsonc
{
// Name of the workload. Must be unique to the workload, as it will be used to group results on the dashboard.
"name": "hackernews.ndjson_1M,no-threads",
// Number of consecutive runs of the commands that should be performed.
// Each run uses a fresh instance of Meilisearch and a fresh database.
// Each run produces its own report file.
"run_count": 3,
// List of arguments to add to the Meilisearch command line.
"extra_cli_args": ["--max-indexing-threads=1"],
// List of named assets that can be used in the commands.
"assets": {
// name of the asset.
// Must be unique at the workload level.
// For better results, the same asset (same sha256) should have the same name accross workloads.
// Having multiple assets with the same name and distinct hashes is supported accross workloads,
// but will lead to superfluous downloads.
//
// Assets are stored in the `bench/assets/` directory by default.
"hackernews-100_000.ndjson": {
// If the assets exists in the local filesystem (Meilisearch repository or for your local workloads)
// Its file path can be specified here.
// `null` if the asset should be downloaded from a remote location.
"local_location": null,
// URL of the remote location where the asset can be downloaded.
// Use the `--assets-key` of the runner to pass an API key in the `Authorization: Bearer` header of the download requests.
// `null` if the asset should be imported from a local location.
// if both local and remote locations are specified, then the local one is tried first, then the remote one
// if the file is locally missing or its hash differs.
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-100_000.ndjson",
// SHA256 of the asset.
// Optional, the `sha256` of the asset will be displayed during a run of the workload if it is missing.
// If present, the hash of the asset in the `bench/assets/` directory will be compared against this hash before
// running the workload. If the hashes differ, the asset will be downloaded anew.
"sha256": "60ecd23485d560edbd90d9ca31f0e6dba1455422f2a44e402600fbb5f7f1b213",
// Optional, one of "Auto", "Json", "NdJson" or "Raw".
// If missing, assumed to be "Auto".
// If "Auto", the format will be determined from the extension in the asset name.
"format": "NdJson"
},
"hackernews-200_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-200_000.ndjson",
"sha256": "785b0271fdb47cba574fab617d5d332276b835c05dd86e4a95251cf7892a1685"
},
"hackernews-300_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-300_000.ndjson",
"sha256": "de73c7154652eddfaf69cdc3b2f824d5c452f095f40a20a1c97bb1b5c4d80ab2"
},
"hackernews-400_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-400_000.ndjson",
"sha256": "c1b00a24689110f366447e434c201c086d6f456d54ed1c4995894102794d8fe7"
},
"hackernews-500_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-500_000.ndjson",
"sha256": "ae98f9dbef8193d750e3e2dbb6a91648941a1edca5f6e82c143e7996f4840083"
},
"hackernews-600_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-600_000.ndjson",
"sha256": "b495fdc72c4a944801f786400f22076ab99186bee9699f67cbab2f21f5b74dbe"
},
"hackernews-700_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-700_000.ndjson",
"sha256": "4b2c63974f3dabaa4954e3d4598b48324d03c522321ac05b0d583f36cb78a28b"
},
"hackernews-800_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-800_000.ndjson",
"sha256": "cb7b6afe0e6caa1be111be256821bc63b0771b2a0e1fad95af7aaeeffd7ba546"
},
"hackernews-900_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-900_000.ndjson",
"sha256": "e1154ddcd398f1c867758a93db5bcb21a07b9e55530c188a2917fdef332d3ba9"
},
"hackernews-1_000_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-1_000_000.ndjson",
"sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
}
},
// Core of the workload.
// A list of commands to run sequentially.
// Optional: A precommand is a request to the Meilisearch instance that is executed before the profiling runs.
"precommands": [
{
// Meilisearch route to call. `http://localhost:7700/` will be prepended.
"route": "indexes/movies/settings",
// HTTP method to call.
"method": "PATCH",
// If applicable, body of the request.
// Optional, if missing, the body will be empty.
"body": {
// One of "empty", "inline" or "asset".
// If using "empty", you can skip the entire "body" key.
"inline": {
// when "inline" is used, the body is the JSON object that is the value of the `"inline"` key.
"displayedAttributes": [
"title",
"by",
"score",
"time"
],
"searchableAttributes": [
"title"
],
"filterableAttributes": [
"by"
],
"sortableAttributes": [
"score",
"time"
]
}
},
// Whether to wait before running the next request.
// One of:
// - DontWait: run the next command without waiting the response to this one.
// - WaitForResponse: run the next command as soon as the response from the server is received.
// - WaitForTask: run the next command once **all** the Meilisearch tasks created up to now have finished processing.
"synchronous": "WaitForTask"
}
],
// A command is a request to the Meilisearch instance that is executed while the profiling runs.
"commands": [
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
// When using "asset", use the name of an asset as value to use the content of that asset as body.
// the content type is derived of the format of the asset:
// "NdJson" => "application/x-ndjson"
// "Json" => "application/json"
// "Raw" => "application/octet-stream"
// See [AssetFormat::to_content_type](https://github.com/meilisearch/meilisearch/blob/7b670a4afadb132ac4a01b6403108700501a391d/xtask/src/bench/assets.rs#L30)
// for details and up-to-date list.
"asset": "hackernews-100_000.ndjson"
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-200_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-300_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-400_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-500_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-600_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-700_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-800_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-900_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-1_000_000.ndjson"
},
"synchronous": "WaitForTask"
}
]
}
```
### Adding new assets
Assets reside in our DigitalOcean S3 space. Assuming you have team access to the DigitalOcean S3 space:
1. go to <https://cloud.digitalocean.com/spaces/milli-benchmarks?i=d1c552&path=bench%2Fdatasets%2F>
2. upload your dataset:
1. if your dataset is a single file, upload that single file using the "upload" button,
2. otherwise, create a folder using the "create folder" button, then inside that folder upload your individual files.
## Upgrading `https://bench.meilisearch.dev`
The URL of the server is in our password manager (look for "benchboard").
1. Make the needed modifications on the [benchboard repository](https://github.com/meilisearch/benchboard) and merge them to main.
2. Publish a new release to produce the Ubuntu/Debian binary.
3. Download the binary locally, send it to the server:
```
scp -6 ~/Downloads/benchboard root@\[<ipv6-address>\]:/bench/new-benchboard
```
Note that the ipv6 must be between escaped square brackets for SCP.
4. SSH to the server:
```
ssh root@<ipv6-address>
```
Note the ipv6 must **NOT** be between escaped square brackets for SSH 🥲
5. On the server, set the correct permissions for the new binary:
```
chown bench:bench /bench/new-benchboard
chmod 700 /bench/new-benchboard
```
6. On the server, move the new binary to the location of the running binary (if unsure, start by making a backup of the running binary):
```
mv /bench/{new-,}benchboard
```
7. Restart the benchboard service.
```
systemctl restart benchboard
```
8. Check that the service runs correctly.
```
systemctl status benchboard
```
9. Check the availability of the service by going to <https://bench.meilisearch.dev> on your browser.

View File

@ -4,7 +4,7 @@ First, thank you for contributing to Meilisearch! The goal of this document is t
Remember that there are many ways to contribute other than writing code: writing [tutorials or blog posts](https://github.com/meilisearch/awesome-meilisearch), improving [the documentation](https://github.com/meilisearch/documentation), submitting [bug reports](https://github.com/meilisearch/meilisearch/issues/new?assignees=&labels=&template=bug_report.md&title=) and [feature requests](https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal)...
Meilisearch can manage multiple indexes, handle the update store, and expose an HTTP API. Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/meilisearch/tree/main/milli), while tokenization is handled by [our `charabia` library](https://github.com/meilisearch/charabia/).
The code in this repository is only concerned with managing multiple indexes, handling the update store, and exposing an HTTP API. Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/milli), while tokenization is handled by [our `charabia` library](https://github.com/meilisearch/charabia/).
If Meilisearch does not offer optimized support for your language, please consider contributing to `charabia` by following the [CONTRIBUTING.md file](https://github.com/meilisearch/charabia/blob/main/CONTRIBUTING.md) and integrating your intended normalizer/segmenter.
@ -75,36 +75,6 @@ If you get a "Too many open files" error you might want to increase the open fil
ulimit -Sn 3000
```
#### Build tools
Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) workflow to provide some build tools.
Run `cargo xtask --help` from the root of the repository to find out what is available.
### Logging
Meilisearch uses [`tracing`](https://lib.rs/crates/tracing) for logging purposes. Tracing logs are structured and can be displayed as JSON to the end user, so prefer passing arguments as fields rather than interpolating them in the message.
Refer to the [documentation](https://docs.rs/tracing/0.1.40/tracing/index.html#using-the-macros) for the syntax of the spans and events.
Logging spans are used for 3 distinct purposes:
1. Regular logging
2. Profiling
3. Benchmarking
As a result, the spans should follow some rules:
- They should not be put on functions that are called too often. That is because opening and closing a span causes some overhead. For regular logging, avoid putting spans on functions that are taking less than a few hundred nanoseconds. For profiling or benchmarking, avoid putting spans on functions that are taking less than a few microseconds.
- For profiling and benchmarking, use the `TRACE` level.
- For profiling and benchmarking, use the following `target` prefixes:
- `indexing::` for spans meant when profiling the indexing operations.
- `search::` for spans meant when profiling the search operations.
### Benchmarking
See [BENCHMARKS.md](./BENCHMARKS.md)
## Git Guidelines
### Git Branches

3415
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -16,17 +16,11 @@ members = [
"json-depth-checker",
"benchmarks",
"fuzzers",
"tracing-trace",
"xtask",
"build-info",
]
[workspace.package]
version = "1.8.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
]
version = "1.5.0"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"
readme = "README.md"

View File

@ -1,5 +1,5 @@
# Compile
FROM rust:1.75.0-alpine3.18 AS compiler
FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
@ -8,7 +8,7 @@ WORKDIR /
ARG COMMIT_SHA
ARG COMMIT_DATE
ARG GIT_TAG
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG}
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019-2024 Meili SAS
Copyright (c) 2019-2022 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -41,10 +41,10 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
@ -61,6 +61,8 @@ You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
## ⚡ Supercharge your Meilisearch experience
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
@ -99,7 +101,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
Thank you for your support!

File diff suppressed because it is too large Load Diff

View File

@ -11,24 +11,24 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.79"
csv = "1.3.0"
anyhow = "1.0.70"
csv = "1.2.1"
milli = { path = "../milli" }
mimalloc = { version = "0.1.39", default-features = false }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
mimalloc = { version = "0.1.37", default-features = false }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5"
rand_chacha = "0.3.1"
roaring = "0.10.2"
roaring = "0.10.1"
[build-dependencies]
anyhow = "1.0.79"
bytes = "1.5.0"
anyhow = "1.0.70"
bytes = "1.4.0"
convert_case = "0.6.0"
flate2 = "1.0.28"
reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false }
flate2 = "1.0.25"
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]

View File

@ -36,7 +36,7 @@ fn setup_index() -> Index {
}
fn setup_settings<'t>(
wtxn: &mut RwTxn<'t>,
wtxn: &mut RwTxn<'t, '_>,
index: &'t Index,
primary_key: &str,
searchable_fields: &[&str],

View File

@ -1,18 +0,0 @@
[package]
name = "build-info"
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
time = { version = "0.3.34", features = ["parsing"] }
[build-dependencies]
anyhow = "1.0.80"
vergen-git2 = "1.0.0-beta.2"

View File

@ -1,22 +0,0 @@
fn main() {
if let Err(err) = emit_git_variables() {
println!("cargo:warning=vergen: {}", err);
}
}
fn emit_git_variables() -> anyhow::Result<()> {
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
// in the corresponding GitHub workflow (publish_docker.yml).
// This is due to the Dockerfile building the binary outside of the git directory.
let mut builder = vergen_git2::Git2Builder::default();
builder.branch(true);
builder.commit_timestamp(true);
builder.commit_message(true);
builder.describe(true, true, None);
builder.sha(false);
let git2 = builder.build()?;
vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
}

View File

@ -1,203 +0,0 @@
use time::format_description::well_known::Iso8601;
#[derive(Debug, Clone)]
pub struct BuildInfo {
pub branch: Option<&'static str>,
pub describe: Option<DescribeResult>,
pub commit_sha1: Option<&'static str>,
pub commit_msg: Option<&'static str>,
pub commit_timestamp: Option<time::OffsetDateTime>,
}
impl BuildInfo {
pub fn from_build() -> Self {
let branch: Option<&'static str> = option_env!("VERGEN_GIT_BRANCH");
let describe = DescribeResult::from_build();
let commit_sha1 = option_env!("VERGEN_GIT_SHA");
let commit_msg = option_env!("VERGEN_GIT_COMMIT_MESSAGE");
let commit_timestamp = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP");
let commit_timestamp = commit_timestamp.and_then(|commit_timestamp| {
time::OffsetDateTime::parse(commit_timestamp, &Iso8601::DEFAULT).ok()
});
Self { branch, describe, commit_sha1, commit_msg, commit_timestamp }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum DescribeResult {
Prototype { name: &'static str },
Release { version: &'static str, major: u64, minor: u64, patch: u64 },
Prerelease { version: &'static str, major: u64, minor: u64, patch: u64, rc: u64 },
NotATag { describe: &'static str },
}
impl DescribeResult {
pub fn new(describe: &'static str) -> Self {
if let Some(name) = prototype_name(describe) {
Self::Prototype { name }
} else if let Some(release) = release_version(describe) {
release
} else if let Some(prerelease) = prerelease_version(describe) {
prerelease
} else {
Self::NotATag { describe }
}
}
pub fn from_build() -> Option<Self> {
let describe: &'static str = option_env!("VERGEN_GIT_DESCRIBE")?;
Some(Self::new(describe))
}
pub fn as_tag(&self) -> Option<&'static str> {
match self {
DescribeResult::Prototype { name } => Some(name),
DescribeResult::Release { version, .. } => Some(version),
DescribeResult::Prerelease { version, .. } => Some(version),
DescribeResult::NotATag { describe: _ } => None,
}
}
pub fn as_prototype(&self) -> Option<&'static str> {
match self {
DescribeResult::Prototype { name } => Some(name),
DescribeResult::Release { .. }
| DescribeResult::Prerelease { .. }
| DescribeResult::NotATag { .. } => None,
}
}
}
/// Parses the input as a prototype name.
///
/// Returns `Some(prototype_name)` if the following conditions are met on this value:
///
/// 1. starts with `prototype-`,
/// 2. ends with `-<some_number>`,
/// 3. does not end with `<some_number>-<some_number>`.
///
/// Otherwise, returns `None`.
fn prototype_name(describe: &'static str) -> Option<&'static str> {
if !describe.starts_with("prototype-") {
return None;
}
let mut rsplit_prototype = describe.rsplit('-');
// last component MUST be a number
rsplit_prototype.next()?.parse::<u64>().ok()?;
// before than last component SHALL NOT be a number
rsplit_prototype.next()?.parse::<u64>().err()?;
Some(describe)
}
fn release_version(describe: &'static str) -> Option<DescribeResult> {
if !describe.starts_with('v') {
return None;
}
// full release version don't contain a `-`
if describe.contains('-') {
return None;
}
// full release version parse as vX.Y.Z, with X, Y, Z numbers.
let mut dots = describe[1..].split('.');
let major: u64 = dots.next()?.parse().ok()?;
let minor: u64 = dots.next()?.parse().ok()?;
let patch: u64 = dots.next()?.parse().ok()?;
if dots.next().is_some() {
return None;
}
Some(DescribeResult::Release { version: describe, major, minor, patch })
}
fn prerelease_version(describe: &'static str) -> Option<DescribeResult> {
// prerelease version is in the shape vM.N.P-rc.C
let mut hyphen = describe.rsplit('-');
let prerelease = hyphen.next()?;
if !prerelease.starts_with("rc.") {
return None;
}
let rc: u64 = prerelease[3..].parse().ok()?;
let release = hyphen.next()?;
let DescribeResult::Release { version: _, major, minor, patch } = release_version(release)?
else {
return None;
};
Some(DescribeResult::Prerelease { version: describe, major, minor, patch, rc })
}
#[cfg(test)]
mod test {
use super::DescribeResult;
fn assert_not_a_tag(describe: &'static str) {
assert_eq!(DescribeResult::NotATag { describe }, DescribeResult::new(describe))
}
fn assert_proto(describe: &'static str) {
assert_eq!(DescribeResult::Prototype { name: describe }, DescribeResult::new(describe))
}
fn assert_release(describe: &'static str, major: u64, minor: u64, patch: u64) {
assert_eq!(
DescribeResult::Release { version: describe, major, minor, patch },
DescribeResult::new(describe)
)
}
fn assert_prerelease(describe: &'static str, major: u64, minor: u64, patch: u64, rc: u64) {
assert_eq!(
DescribeResult::Prerelease { version: describe, major, minor, patch, rc },
DescribeResult::new(describe)
)
}
#[test]
fn not_a_tag() {
assert_not_a_tag("whatever-fuzzy");
assert_not_a_tag("whatever-fuzzy-5-ggg-dirty");
assert_not_a_tag("whatever-fuzzy-120-ggg-dirty");
// technically a tag, but not a proto nor a version, so not parsed as a tag
assert_not_a_tag("whatever");
// dirty version
assert_not_a_tag("v1.7.0-1-ggga-dirty");
assert_not_a_tag("v1.7.0-rc.1-1-ggga-dirty");
// after version
assert_not_a_tag("v1.7.0-1-ggga");
assert_not_a_tag("v1.7.0-rc.1-1-ggga");
// after proto
assert_not_a_tag("protoype-tag-0-1-ggga");
assert_not_a_tag("protoype-tag-0-1-ggga-dirty");
}
#[test]
fn prototype() {
assert_proto("prototype-tag-0");
assert_proto("prototype-tag-10");
assert_proto("prototype-long-name-tag-10");
}
#[test]
fn release() {
assert_release("v1.7.2", 1, 7, 2);
}
#[test]
fn prerelease() {
assert_prerelease("v1.7.2-rc.3", 1, 7, 2, 3);
}
}

View File

@ -129,6 +129,3 @@ experimental_enable_metrics = false
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
experimental_reduce_indexing_memory_usage = false
# Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
# experimental_max_number_of_batched_tasks = 100

View File

@ -11,22 +11,22 @@ readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.79"
flate2 = "1.0.28"
http = "0.2.11"
anyhow = "1.0.70"
flate2 = "1.0.25"
http = "0.2.9"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0"
regex = "1.10.2"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
once_cell = "1.17.1"
regex = "1.7.3"
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
tar = "0.4.38"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"

View File

@ -256,8 +256,8 @@ pub(crate) mod test {
pub fn create_test_settings() -> Settings<Checked> {
let settings = Settings {
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
displayed_attributes: Setting::Set(vec![S("race"), S("name")]),
searchable_attributes: Setting::Set(vec![S("name"), S("race")]),
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
sortable_attributes: Setting::Set(btreeset! { S("age") }),
ranking_rules: Setting::NotSet,
@ -267,7 +267,6 @@ pub(crate) mod test {
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::Set(FacetingSettings {
max_values_per_facet: Setting::Set(111),
@ -276,8 +275,6 @@ pub(crate) mod test {
),
}),
pagination: Setting::NotSet,
embedders: Setting::NotSet,
search_cutoff_ms: Setting::NotSet,
_kind: std::marker::PhantomData,
};
settings.check()

View File

@ -120,7 +120,7 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
criterion.as_ref().map(ToString::to_string)
}
Err(()) => {
tracing::warn!(
log::warn!(
"Could not import the following ranking rule: `{}`.",
ranking_rule
);
@ -152,11 +152,11 @@ impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
use v2::updates::UpdateStatus as UpdateStatusV2;
Some(match source {
UpdateStatusV1::Enqueued { content } => {
tracing::warn!(
log::warn!(
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
content.update_id
);
tracing::warn!("Task will be skipped in the queue of imported tasks.");
log::warn!("Task will be skipped in the queue of imported tasks.");
return None;
}
@ -229,7 +229,7 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
Some(match source {
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
v1::update::UpdateType::Customs => {
tracing::warn!("Ignoring task with type 'Customs' that is no longer supported");
log::warn!("Ignoring task with type 'Customs' that is no longer supported");
return None;
}
v1::update::UpdateType::DocumentsAddition { .. } => {
@ -296,7 +296,7 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
v1::settings::RankingRule::WordsPosition => {
tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
None
}
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),

View File

@ -1,3 +1,4 @@
use std::convert::TryInto;
use std::str::FromStr;
use time::OffsetDateTime;
@ -145,8 +146,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
started_processing_at: processing.started_processing_at,
}),
Err(e) => {
tracing::warn!("Error with task {}: {}", processing.from.update_id, e);
tracing::warn!("Task will be marked as `Failed`.");
log::warn!("Error with task {}: {}", processing.from.update_id, e);
log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
@ -171,8 +172,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
enqueued_at: enqueued.enqueued_at,
}),
Err(e) => {
tracing::warn!("Error with task {}: {}", enqueued.update_id, e);
tracing::warn!("Task will be marked as `Failed`.");
log::warn!("Error with task {}: {}", enqueued.update_id, e);
log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
@ -352,7 +353,7 @@ impl From<String> for v3::Code {
"malformed_payload" => v3::Code::MalformedPayload,
"missing_payload" => v3::Code::MissingPayload,
other => {
tracing::warn!("Unknown error code {}", other);
log::warn!("Unknown error code {}", other);
v3::Code::UnretrievableErrorCode
}
}

View File

@ -76,20 +76,20 @@ impl CompatV3ToV4 {
let index_uid = match index_uid {
Some(uid) => uid,
None => {
tracing::warn!(
log::warn!(
"Error while importing the update {}.",
task.update.id()
);
tracing::warn!(
log::warn!(
"The index associated to the uuid `{}` could not be retrieved.",
task.uuid.to_string()
);
if task.update.is_finished() {
// we're fucking with his history but not his data, that's ok-ish.
tracing::warn!("The index-uuid will be set as `unknown`.");
log::warn!("The index-uuid will be set as `unknown`.");
String::from("unknown")
} else {
tracing::warn!("The task will be ignored.");
log::warn!("The task will be ignored.");
return None;
}
}

View File

@ -305,7 +305,7 @@ impl From<v4::ResponseError> for v5::ResponseError {
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
other => {
tracing::warn!("Unknown error code {}", other);
log::warn!("Unknown error code {}", other);
v5::Code::UnretrievableErrorCode
}
};

View File

@ -304,7 +304,7 @@ impl From<v5::ResponseError> for v6::ResponseError {
"immutable_field" => v6::Code::BadRequest,
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
other => {
tracing::warn!("Unknown error code {}", other);
log::warn!("Unknown error code {}", other);
v6::Code::UnretrievableErrorCode
}
};
@ -315,8 +315,8 @@ impl From<v5::ResponseError> for v6::ResponseError {
impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
fn from(settings: v5::Settings<T>) -> Self {
v6::Settings {
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
displayed_attributes: settings.displayed_attributes.into(),
searchable_attributes: settings.searchable_attributes.into(),
filterable_attributes: settings.filterable_attributes.into(),
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: {
@ -329,7 +329,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
new_ranking_rules.push(new_rule);
}
Err(_) => {
tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
}
}
}
@ -345,7 +345,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
dictionary: v6::Setting::NotSet,
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
proximity_precision: v6::Setting::NotSet,
typo_tolerance: match settings.typo_tolerance {
v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
enabled: typo.enabled.into(),
@ -378,8 +377,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
},
embedders: v6::Setting::NotSet,
search_cutoff_ms: v6::Setting::NotSet,
_kind: std::marker::PhantomData,
}
}

View File

@ -13,12 +13,12 @@ use crate::{Result, Version};
mod compat;
mod v1;
mod v2;
mod v3;
mod v4;
mod v5;
mod v6;
pub(self) mod v1;
pub(self) mod v2;
pub(self) mod v3;
pub(self) mod v4;
pub(self) mod v5;
pub(self) mod v6;
pub type Document = serde_json::Map<String, serde_json::Value>;
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
@ -197,140 +197,6 @@ pub(crate) mod test {
use super::*;
use crate::reader::v6::RuntimeTogglableFeatures;
#[test]
fn import_dump_v6_with_vectors() {
// dump containing two indexes
//
// "vector", configured with an embedder
// contains:
// - one document with an overriden vector,
// - one document with a natural vector
// - one document with a _vectors map containing one additional embedder name and a natural vector
// - one document with a _vectors map containing one additional embedder name and an overriden vector
//
// "novector", no embedder
// contains:
// - a document without vector
// - a document with a random _vectors field
let dump = File::open("tests/assets/v6-with-vectors.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"278f63325ef06ca04d01df98d8207b94");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_none());
assert!(update_files[2].is_none());
assert!(update_files[3].is_none());
assert!(update_files[4].is_none());
assert!(update_files[5].is_none());
assert!(update_files[6].is_none());
assert!(update_files[7].is_none());
assert!(update_files[8].is_none());
assert!(update_files[9].is_none());
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut vector_index = indexes.pop().unwrap();
let mut novector_index = indexes.pop().unwrap();
assert!(indexes.is_empty());
// vector
insta::assert_json_snapshot!(vector_index.metadata(), @r###"
{
"uid": "vector",
"primaryKey": "id",
"createdAt": "2024-05-16T15:33:17.240962Z",
"updatedAt": "2024-05-16T15:40:55.723052Z"
}
"###);
{
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
let mut documents = documents.unwrap();
assert_eq!(documents.len(), 4);
documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
}
// novector
insta::assert_json_snapshot!(novector_index.metadata(), @r###"
{
"uid": "novector",
"primaryKey": "id",
"createdAt": "2024-05-16T15:33:03.568055Z",
"updatedAt": "2024-05-16T15:33:07.530217Z"
}
"###);
insta::assert_json_snapshot!(novector_index.settings().unwrap().embedders, @"null");
{
let documents: Result<Vec<_>> = novector_index.documents().unwrap().collect();
let mut documents = documents.unwrap();
assert_eq!(documents.len(), 2);
documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document, @r###"
{
"id": "e1",
"other": "random1",
"_vectors": "toto"
}
"###);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document, @r###"
{
"id": "e0",
"other": "random0"
}
"###);
}
}
assert_eq!(
dump.features().unwrap().unwrap(),
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
);
}
#[test]
fn import_dump_v6_experimental() {
let dump = File::open("tests/assets/v6-with-experimental.dump").unwrap();

View File

@ -1,783 +0,0 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e3",
"desc": "overriden vector + map",
"_vectors": {
"default": [
0.2,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1
],
"toto": [
0.1
]
}
}

View File

@ -1,786 +0,0 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e2",
"desc": "natural vector + map",
"_vectors": {
"toto": [],
"default": {
"embeddings": [
[
-0.05189208313822746,
-0.9273212552070618,
0.1443813145160675,
0.0932632014155388,
0.2665371894836426,
0.36266782879829407,
0.6402910947799683,
0.32014018297195435,
0.030915971845388412,
-0.9312191605567932,
-0.3718109726905823,
-0.2700554132461548,
-1.1014580726623535,
0.9154956936836244,
-0.3406888246536255,
1.0077725648880005,
0.6577560901641846,
-0.3955195546150207,
-0.4148270785808563,
0.1855088472366333,
0.5062315464019775,
-0.3632686734199524,
-0.2277890294790268,
0.2560805082321167,
-0.3853609561920166,
-0.1604762226343155,
-0.13947471976280212,
-0.20147813856601715,
-0.4466346800327301,
-0.3761846721172333,
0.1443382054567337,
0.18205296993255615,
0.49359792470932007,
-0.22538000345230105,
-0.4996317625045776,
-0.22734887897968292,
-0.6034309267997742,
-0.7857939600944519,
-0.34923747181892395,
-0.3466345965862274,
0.21176661550998688,
-0.5101462006568909,
-0.3403083384037018,
0.000315118464641273,
0.236465722322464,
-0.10246097296476364,
-1.3013339042663574,
0.3419138789176941,
-0.32963496446609497,
-0.0901619717478752,
-0.5426247119903564,
0.22656650841236117,
-0.44758284091949463,
0.14151698350906372,
-0.1089438870549202,
0.5500766634941101,
-0.670711100101471,
-0.6227269768714905,
0.3894464075565338,
-0.27609574794769287,
0.7028202414512634,
-0.19697771966457367,
0.328511506319046,
0.5063360929489136,
0.4065195322036743,
0.2614171802997589,
-0.30274391174316406,
1.0393824577331543,
-0.7742937207221985,
-0.7874112129211426,
-0.6749666929244995,
0.5190866589546204,
0.004123548045754433,
-0.28312963247299194,
-0.038731709122657776,
-1.0142987966537476,
-0.09519586712121964,
0.8755272626876831,
0.4876938760280609,
0.7811151742935181,
0.85174959897995,
0.11826585978269576,
0.5373436808586121,
0.3649002015590668,
0.19064077734947205,
-0.00287026260048151,
-0.7305403351783752,
-0.015206154435873032,
-0.7899249196052551,
0.19407285749912265,
0.08596625179052353,
-0.28976231813430786,
-0.1525907665491104,
0.3798313438892365,
0.050306469202041626,
-0.5697937607765198,
0.4219021201133728,
0.276252806186676,
0.1559903472661972,
0.10030482709407806,
-0.4043720066547394,
-0.1969818025827408,
0.5739826560020447,
0.2116064727306366,
-1.4620544910430908,
-0.7802462577819824,
-0.24739810824394223,
-0.09791352599859238,
-0.4413802027702331,
0.21549351513385773,
-0.9520436525344848,
-0.08762510865926743,
0.08154498040676117,
-0.6154940724372864,
-1.01079523563385,
0.885427713394165,
0.6967288851737976,
0.27186504006385803,
-0.43194177746772766,
-0.11248451471328735,
0.7576630711555481,
0.4998855590820313,
0.0264343973249197,
0.9872855544090272,
0.5634694695472717,
0.053698331117630005,
0.19410227239131927,
0.3570743501186371,
-0.23670297861099243,
-0.9114483594894408,
0.07884842902421951,
0.7318344116210938,
0.44630110263824463,
0.08745364099740982,
-0.347101628780365,
-0.4314247667789459,
-0.5060274004936218,
0.003706763498485088,
0.44320008158683777,
-0.00788921769708395,
-0.1368623524904251,
-0.17391923069953918,
0.14473655819892883,
0.10927865654230118,
0.6974599361419678,
0.005052129738032818,
-0.016953065991401672,
-0.1256176233291626,
-0.036742497235536575,
0.5591985583305359,
-0.37619709968566895,
0.22429119050502777,
0.5403043031692505,
-0.8603790998458862,
-0.3456307053565979,
0.9292937517166138,
0.5074859261512756,
0.6310645937919617,
-0.3091641068458557,
0.46902573108673096,
0.7891915440559387,
0.4499550759792328,
0.2744995653629303,
0.2712305784225464,
-0.04349074140191078,
-0.3638863265514374,
0.7839881777763367,
0.7352104783058167,
-0.19457511603832245,
-0.5957832932472229,
-0.43704694509506226,
-1.084769368171692,
0.4904985725879669,
0.5385226011276245,
0.1891629993915558,
0.12338479608297348,
0.8315675258636475,
-0.07830192148685455,
1.0916285514831543,
-0.28066861629486084,
-1.3585069179534912,
0.5203898549079895,
0.08678033947944641,
-0.2566044330596924,
0.09484415501356123,
-0.0180208683013916,
1.0264745950698853,
-0.023572135716676712,
0.5864979028701782,
0.7625196576118469,
-0.2543414533138275,
-0.8877770900726318,
0.7611982822418213,
-0.06220436468720436,
0.937336564064026,
0.2704363465309143,
-0.37733694911003113,
0.5076137781143188,
-0.30641937255859375,
0.6252772808074951,
-0.0823579877614975,
-0.03736555948853493,
0.4131673276424408,
-0.6514252424240112,
0.12918265163898468,
-0.4483584463596344,
0.6750786304473877,
-0.37008383870124817,
-0.02324833907186985,
0.38027650117874146,
-0.26374951004981995,
0.4346931278705597,
0.42882832884788513,
-0.48798441886901855,
1.1882442235946655,
0.5132288336753845,
0.5284568667411804,
-0.03538886830210686,
0.29620853066444397,
-1.0683696269989014,
0.25936177372932434,
0.10404160618782043,
-0.25796034932136536,
0.027896970510482788,
-0.09225251525640488,
1.4811025857925415,
0.641173779964447,
-0.13838383555412292,
-0.3437179923057556,
0.5667019486427307,
-0.5400741696357727,
0.31090837717056274,
0.6470608115196228,
-0.3747067153453827,
-0.7364534735679626,
-0.07431528717279434,
0.5173454880714417,
-0.6578747034072876,
0.7107478976249695,
-0.7918999791145325,
-0.0648345872759819,
0.609937846660614,
-0.7329513430595398,
0.9741371870040894,
0.17912346124649048,
-0.02658769302070141,
0.5162150859832764,
-0.3978803157806397,
-0.7833885550498962,
-0.6497276425361633,
-0.3898126780986786,
-0.0952848568558693,
0.2663288116455078,
-0.1604052186012268,
0.373076468706131,
-0.8357769250869751,
-0.05217683315277099,
-0.2680160701274872,
0.8389158248901367,
0.6833611130714417,
-0.6712407469749451,
0.7406917214393616,
-0.44522786140441895,
-0.34645363688468933,
-0.27384576201438904,
-0.9878405928611756,
-0.8166060447692871,
0.06268279999494553,
0.38567957282066345,
-0.3274703919887543,
0.5296315550804138,
-0.11810623109340668,
0.23029841482639313,
0.08616159111261368,
-0.2195747196674347,
0.09430307894945145,
0.4057176411151886,
0.4892159104347229,
-0.1636916548013687,
-0.6071445345878601,
0.41256585717201233,
0.622254490852356,
-0.41223976016044617,
-0.6686707139015198,
-0.7474371790885925,
-0.8509522080421448,
-0.16754287481307983,
-0.9078601002693176,
-0.29653599858283997,
-0.5020652413368225,
0.4692700505256653,
0.01281109917908907,
-0.16071580350399017,
0.03388889133930206,
-0.020511148497462273,
0.5027827024459839,
-0.20729811489582065,
0.48107290267944336,
0.33669769763946533,
-0.5275911688804626,
0.48271527886390686,
0.2738940715789795,
-0.033152539283037186,
-0.13629786670207977,
-0.05965912342071533,
-0.26200807094573975,
0.04002794995903969,
-0.34095603227615356,
-3.986898899078369,
-0.46819332242012024,
-0.422744482755661,
-0.169097900390625,
0.6008929014205933,
0.058016058057546616,
-0.11401277780532836,
-0.3077819049358368,
-0.09595538675785063,
0.6723822355270386,
0.19367831945419312,
0.28304359316825867,
0.1609862744808197,
0.7567598819732666,
0.6889985799789429,
0.06907720118761063,
-0.04188092052936554,
-0.7434936165809631,
0.13321782648563385,
0.8456063270568848,
-0.10364038497209548,
-0.45084846019744873,
-0.4758241474628449,
0.43882066011428833,
-0.6432598829269409,
0.7217311859130859,
-0.24189773201942444,
0.12737572193145752,
-1.1008601188659668,
-0.3305315673351288,
0.14614742994308472,
-0.7819333076477051,
0.5287120342254639,
-0.055538054555654526,
0.1877404749393463,
-0.6907662153244019,
0.5616975426673889,
-0.4611121714115143,
-0.26109233498573303,
-0.12898315489292145,
-0.3724522292613983,
-0.7191406488418579,
-0.4425233602523804,
-0.644108235836029,
0.8424481153488159,
0.17532426118850708,
-0.5121750235557556,
-0.6467239260673523,
-0.0008507720194756985,
0.7866212129592896,
-0.02644744887948036,
-0.005045140627771616,
0.015782782807946205,
0.16334445774555206,
-0.1913367658853531,
-0.13697923719882965,
-0.6684983372688293,
0.18346354365348816,
-0.341105580329895,
0.5427411198616028,
0.3779832422733307,
-0.6778115034103394,
-0.2931850254535675,
-0.8805161714553833,
-0.4212774932384491,
-0.5368952751159668,
-1.3937891721725464,
-1.225494146347046,
0.4276703894138336,
1.1205668449401855,
-0.6005299687385559,
0.15732505917549133,
-0.3914784789085388,
-1.357046604156494,
-0.4707142114639282,
-0.1497287154197693,
-0.25035548210144043,
-0.34328439831733704,
0.39083412289619446,
0.1623048633337021,
-0.9275814294815063,
-0.6430015563964844,
0.2973862886428833,
0.5580436587333679,
-0.6232585310935974,
-0.6611042022705078,
0.4015969038009643,
-1.0232892036437988,
-0.2585645020008087,
-0.5431421399116516,
0.5021264553070068,
-0.48601630330085754,
-0.010242084041237833,
0.5862035155296326,
0.7316920161247253,
0.4036808013916016,
0.4269520044326782,
-0.705938458442688,
0.7747307419776917,
0.10164368897676468,
0.7887958884239197,
-0.9612497091293336,
0.12755516171455383,
0.06812842190265656,
-0.022603651508688927,
0.14722754061222076,
-0.5588505268096924,
-0.20689940452575684,
0.3557641804218292,
-0.6812759637832642,
0.2860803008079529,
-0.38954633474349976,
0.1759403496980667,
-0.5678874850273132,
-0.1692986786365509,
-0.14578519761562347,
0.5711379051208496,
1.0208125114440918,
0.7759483456611633,
-0.372348427772522,
-0.5460885763168335,
0.7190321683883667,
-0.6914990544319153,
0.13365162909030914,
-0.4854792356491089,
0.4054908752441406,
0.4502798914909363,
-0.3041122555732727,
-0.06726965308189392,
-0.05570871382951737,
-0.0455719493329525,
0.4785125255584717,
0.8867972493171692,
0.4107886850833893,
0.6121342182159424,
-0.20477132499217987,
-0.5598517656326294,
-0.6443566679954529,
-0.5905212759971619,
-0.5571200251579285,
0.17573799192905426,
-0.28621870279312134,
0.1685224026441574,
0.09719007462263109,
-0.04223639518022537,
-0.28623101115226746,
-0.1449810117483139,
-0.3789580464363098,
-0.5227636098861694,
-0.049728814512491226,
0.7849089503288269,
0.16792525351047516,
0.9849340915679932,
-0.6559549570083618,
0.35723909735679626,
-0.6822739243507385,
1.2873116731643677,
0.19993330538272855,
0.03512010723352432,
-0.6972134113311768,
0.18453484773635864,
-0.2437680810689926,
0.2156416028738022,
0.5230382680892944,
0.22020135819911957,
0.8314080238342285,
0.15627102553844452,
-0.7330264449119568,
0.3888184726238251,
-0.22034703195095065,
0.5457669496536255,
-0.48084837198257446,
-0.45576658844947815,
-0.09287727624177931,
-0.06968110054731369,
0.35125672817230225,
-0.4278119504451752,
0.2038476765155792,
0.11392722278833388,
0.9433983564376832,
-0.4097744226455689,
0.035297419875860214,
-0.4274404048919678,
-0.25100165605545044,
1.0943366289138794,
-0.07634022831916809,
-0.2925529479980469,
-0.7512530088424683,
0.2649727463722229,
-0.4078235328197479,
-0.3372223973274231,
0.05190162733197212,
0.005654910113662481,
-0.0001571219472680241,
-0.35445958375930786,
-0.7837416529655457,
0.1500556766986847,
0.4383024573326111,
0.6099548935890198,
0.05951934307813645,
-0.21325334906578064,
0.0199207104742527,
-0.22704418003559113,
-0.6481077671051025,
0.37442275881767273,
-1.015955924987793,
0.38637226819992065,
-0.06489371508359909,
-0.494120329618454,
0.3469836115837097,
0.15402406454086304,
-0.7660972476005554,
-0.7053225040435791,
-0.25964751839637756,
0.014004424214363098,
-0.2860170006752014,
-0.17565494775772095,
-0.45117494463920593,
-0.0031954257283359766,
0.09676837921142578,
-0.514464259147644,
0.41698193550109863,
-0.21642713248729703,
-0.5398141145706177,
-0.3647628426551819,
0.37005379796028137,
0.239425927400589,
-0.08833975344896317,
0.934946596622467,
-0.48340797424316406,
0.6241437792778015,
-0.7253676652908325,
-0.04303571209311485,
1.1125205755233765,
-0.15692919492721558,
-0.2914651036262512,
-0.5117168426513672,
0.21365483105182648,
0.4924402534961701,
0.5269662141799927,
0.0352792888879776,
-0.149167999625206,
-0.6019760370254517,
0.08245442807674408,
0.4900692105293274,
0.518824577331543,
-0.00005570516441366635,
-0.553304135799408,
0.22217543423175812,
0.5047767758369446,
0.135724738240242,
1.1511540412902832,
-0.3541218340396881,
-0.9712511897087096,
0.8353699445724487,
-0.39227569103240967,
-0.9117669463157654,
-0.26349931955337524,
0.05597023293375969,
0.20695461332798004,
0.3178807199001312,
1.0663238763809204,
0.5062212347984314,
0.7288597822189331,
0.09899299591779707,
0.553720235824585,
0.675009548664093,
-0.20067055523395536,
0.3138423264026642,
-0.6886593103408813,
-0.2910398542881012,
-1.3186300992965698,
-0.4684459865093231,
-0.095743365585804,
-0.1257995069026947,
-0.4858281314373016,
-0.4935407340526581,
-0.3266896903514862,
-0.3928797245025635,
-0.40803104639053345,
-0.9975396394729614,
0.4229583740234375,
0.37309643626213074,
0.4431034922599793,
0.30364808440208435,
-0.3765178918838501,
0.5616499185562134,
0.16904796659946442,
-0.7343707084655762,
0.2560209631919861,
0.6166825294494629,
0.3200829327106476,
-0.4483652710914612,
0.16224201023578644,
-0.31495288014411926,
-0.42713335156440735,
0.7270734906196594,
0.7049484848976135,
-0.0571461021900177,
0.04477125033736229,
-0.6647796034812927,
1.183672308921814,
0.36199676990509033,
0.046881116926670074,
0.4515796303749085,
0.9278061985969543,
0.31471705436706543,
-0.7073333859443665,
-0.3443860113620758,
0.5440067052841187,
-0.15020819008350372,
-0.541202962398529,
0.5203295946121216,
1.2192286252975464,
-0.9983593225479126,
-0.18758884072303772,
0.2758221924304962,
-0.6511523723602295,
-0.1584404855966568,
-0.236241415143013,
0.2692437767982483,
-0.4941152036190033,
0.4987454116344452,
-0.3331359028816223,
0.3163745701313019,
0.745529294013977,
-0.2905873656272888,
0.13602906465530396,
0.4679684340953827,
1.0555986166000366,
1.075700044631958,
0.5368486046791077,
-0.5118206739425659,
0.8668332099914551,
-0.5726966857910156,
-0.7811751961708069,
0.1938626915216446,
-0.1929349899291992,
0.1757766306400299,
0.6384295225143433,
0.26462844014167786,
0.9542630314826964,
0.19313029944896695,
1.264248013496399,
-0.6304428577423096,
0.0487106591463089,
-0.16211535036563873,
-0.7894763350486755,
0.3582514822483063,
-0.04153040423989296,
0.635784387588501,
0.6554391980171204,
-0.47010496258735657,
-0.8302040696144104,
-0.1350124627351761,
0.2568812072277069,
0.13614831864833832,
-0.2563649117946625,
-1.0434694290161133,
0.3232482671737671,
0.47882452607154846,
0.4298652410507202,
1.0563770532608032,
-0.28917592763900757,
-0.8533256649971008,
0.10648339986801147,
0.6376127004623413,
-0.20832888782024384,
0.2370245456695557,
0.0018312990432605147,
-0.2034837007522583,
0.01051164511591196,
-1.105310082435608,
0.29724350571632385,
0.15604574978351593,
0.1973688006401062,
0.44394731521606445,
0.3974513411521912,
-0.13625948131084442,
0.9571986198425292,
0.2257384955883026,
0.2323588728904724,
-0.5583669543266296,
-0.7854922413825989,
0.1647188365459442,
-1.6098142862319946,
0.318587988615036,
-0.13399995863437653,
-0.2172701060771942,
-0.767514705657959,
-0.5813586711883545,
-0.3195130527019501,
-0.04894036799669266,
0.2929930090904236,
-0.8213384747505188,
0.07181350141763687,
0.7469993829727173,
0.6407455801963806,
0.16365697979927063,
0.7870153188705444,
0.6524736881256104,
0.6399973630905151,
-0.04992736503481865,
-0.03959266096353531,
-0.2512352466583252,
0.8448855876922607,
-0.1422702670097351,
0.1216789186000824,
-1.2647287845611572,
0.5931149125099182,
0.7186052203178406,
-0.06118432432413101,
-1.1942816972732544,
-0.17677085101604462,
0.31543800234794617,
-0.32252824306488037,
0.8255583047866821,
-0.14529970288276672,
-0.2695446312427521,
-0.33378756046295166,
-0.1653425395488739,
0.1454019844532013,
-0.3920115828514099,
0.912214994430542,
-0.7279734015464783,
0.7374742031097412,
0.933980405330658,
0.13429680466651917,
-0.514870285987854,
0.3989711999893189,
-0.11613689363002776,
0.4022413492202759,
-0.9990655779838562,
-0.33749932050704956,
-0.4334589838981629,
-1.376373291015625,
-0.2993924915790558,
-0.09454808384180068,
-0.01314175222069025,
-0.001090060803107917,
0.2137461006641388,
0.2938512861728668,
0.17508235573768616,
0.8260607123374939,
-0.7218498587608337,
0.2414487451314926,
-0.47296759486198425,
-0.3002610504627228,
-1.238540768623352,
0.08663805574178696,
0.6805586218833923,
0.5909030437469482,
-0.42807504534721375,
-0.22887496650218964,
0.47537800669670105,
-1.0474627017974854,
0.6338009238243103,
0.06548397243022919,
0.4971011281013489,
1.3484878540039063
]
],
"userProvided": false
}
}
}

View File

@ -1,785 +0,0 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e1",
"desc": "natural vector",
"_vectors": {
"default": {
"embeddings": [
[
-0.2979458272457123,
-0.5288640856742859,
-0.019957859069108963,
-0.18495318293571472,
0.7429973483085632,
0.5238497257232666,
0.432366281747818,
0.32744166254997253,
0.0020762972999364138,
-0.9507834911346436,
-0.35097137093544006,
0.08469701558351517,
-1.4176613092422483,
0.4647577106952667,
-0.69340580701828,
1.0372896194458008,
0.3716741800308227,
0.06031008064746857,
-0.6152024269104004,
0.007914665155112743,
0.7954924702644348,
-0.20773003995418549,
0.09376765787601472,
0.04508133605122566,
-0.2084471583366394,
-0.1518009901046753,
0.018195509910583496,
-0.07044368237257004,
-0.18119366466999057,
-0.4480230510234833,
0.3822529911994934,
0.1911812424659729,
0.4674372375011444,
0.06963984668254852,
-0.09341949224472046,
0.005675444379448891,
-0.6774799227714539,
-0.7066726684570313,
-0.39256376028060913,
0.04005039855837822,
0.2084812968969345,
-0.7872875928878784,
-0.8205880522727966,
0.2919981777667999,
-0.06004738807678223,
-0.4907574355602264,
-1.5937862396240234,
0.24249385297298431,
-0.14709846675395966,
-0.11860740929841997,
-0.8299489617347717,
0.472964346408844,
-0.497518390417099,
-0.22205302119255063,
-0.4196169078350067,
0.32697558403015137,
-0.360930860042572,
-0.9789686799049376,
0.1887447088956833,
-0.403737336397171,
0.18524253368377688,
0.3768732249736786,
0.3666233420372009,
0.3511938452720642,
0.6985810995101929,
0.41721710562705994,
0.09754953533411026,
0.6204307079315186,
-1.0762996673583984,
-0.06263761967420578,
-0.7376511693000793,
0.6849768161773682,
-0.1745152473449707,
-0.40449759364128113,
0.20757411420345304,
-0.8424443006515503,
0.330015629529953,
0.3489064872264862,
1.0954371690750122,
0.8487558960914612,
1.1076823472976685,
0.61430823802948,
0.4155903458595276,
0.4111340939998626,
0.05753209814429283,
-0.06429877132177353,
-0.765606164932251,
-0.41703930497169495,
-0.508820652961731,
0.19859947264194489,
-0.16607828438282013,
-0.28112146258354187,
0.11032675206661224,
0.38809511065483093,
-0.36498191952705383,
-0.48671194911003113,
0.6755134463310242,
0.03958442434668541,
0.4478721618652344,
-0.10335399955511092,
-0.9546685814857484,
-0.6087718605995178,
0.17498846352100372,
0.08320838958024979,
-1.4478336572647097,
-0.605027437210083,
-0.5867993235588074,
-0.14711688458919525,
-0.5447602272033691,
-0.026259321719408035,
-0.6997418403625488,
-0.07349082082509995,
0.10638900846242905,
-0.7133527398109436,
-0.9396815299987792,
1.087092399597168,
1.1885089874267578,
0.4011896848678589,
-0.4089202582836151,
-0.10938972979784012,
0.6726722121238708,
0.24576938152313232,
-0.24247920513153076,
1.1499971151351929,
0.47813335061073303,
-0.05331678315997124,
0.32338133454322815,
0.4870913326740265,
-0.23144258558750153,
-1.2023426294326782,
0.2349330335855484,
1.080536961555481,
0.29334118962287903,
0.391574501991272,
-0.15818795561790466,
-0.2948290705680847,
-0.024689948186278343,
0.06602869182825089,
0.5937030911445618,
-0.047901444137096405,
-0.512734591960907,
-0.35780075192451477,
0.28751692175865173,
0.4298716187477112,
0.9242428541183472,
-0.17208744585514069,
0.11515070497989656,
-0.0335976779460907,
-0.3422986567020416,
0.5344581604003906,
0.19895796477794647,
0.33001241087913513,
0.6390730142593384,
-0.6074934005737305,
-0.2553696632385254,
0.9644920229911804,
0.2699219584465027,
0.6403993368148804,
-0.6380003690719604,
-0.027310986071825027,
0.638815701007843,
0.27719101309776306,
-0.13553589582443237,
0.750195324420929,
0.1224869191646576,
-0.20613941550254825,
0.8444448709487915,
0.16200250387191772,
-0.24750925600528717,
-0.739950954914093,
-0.28443849086761475,
-1.176282525062561,
0.516107976436615,
0.3774825632572174,
0.10906043648719788,
0.07962015271186829,
0.7384604215621948,
-0.051241904497146606,
1.1730090379714966,
-0.4828610122203827,
-1.404372215270996,
0.8811132311820984,
-0.3839482367038727,
0.022516896948218346,
-0.0491158664226532,
-0.43027013540267944,
1.2049334049224854,
-0.27309560775756836,
0.6883630752563477,
0.8264574408531189,
-0.5020735263824463,
-0.4874092042446137,
0.6007202863693237,
-0.4965405762195587,
1.1302915811538696,
0.032572727650403976,
-0.3731859028339386,
0.658271849155426,
-0.9023059010505676,
0.7400162220001221,
0.014550759457051754,
-0.19699542224407196,
0.2319706380367279,
-0.789058268070221,
-0.14905710518360138,
-0.5826214551925659,
0.207652747631073,
-0.4507439732551574,
-0.3163885474205017,
0.3604124188423157,
-0.45119962096214294,
0.3428427278995514,
0.3005594313144684,
-0.36026081442832947,
1.1014249324798584,
0.40884315967559814,
0.34991952776908875,
-0.1806638240814209,
0.27440476417541504,
-0.7118373513221741,
0.4645499587059021,
0.214790478348732,
-0.2343102991580963,
0.10500429570674896,
-0.28034430742263794,
1.2267805337905884,
1.0561333894729614,
-0.497364342212677,
-0.6143305897712708,
0.24963727593421936,
-0.33136463165283203,
-0.01473914459347725,
0.495918869972229,
-0.6985538005828857,
-1.0033197402954102,
0.35937801003456116,
0.6325868368148804,
-0.6808838844299316,
1.0354058742523191,
-0.7214401960372925,
-0.33318862318992615,
0.874398410320282,
-0.6594992280006409,
0.6830640435218811,
-0.18534131348133087,
0.024834271520376205,
0.19901277124881744,
-0.5992477536201477,
-1.2126628160476685,
-0.9245557188987732,
-0.3898217976093292,
-0.1286519467830658,
0.4217943847179413,
-0.1143646091222763,
0.5630772709846497,
-0.5240639448165894,
0.21152715384960177,
-0.3792001008987427,
0.8266305327415466,
1.170984387397766,
-0.8072142004966736,
0.11382893472909927,
-0.17953898012638092,
-0.1789460331201553,
-0.15078622102737427,
-1.2082908153533936,
-0.7812382578849792,
-0.10903695970773696,
0.7303897142410278,
-0.39054441452026367,
0.19511254131793976,
-0.09121843427419662,
0.22400228679180145,
0.30143046379089355,
0.1141919493675232,
0.48112115263938904,
0.7307931780815125,
0.09701362252235413,
-0.2795647978782654,
-0.3997688889503479,
0.5540812611579895,
0.564578115940094,
-0.40065160393714905,
-0.3629159033298493,
-0.3789091110229492,
-0.7298538088798523,
-0.6996853351593018,
-0.4477842152118683,
-0.289089560508728,
-0.6430277824401855,
0.2344944179058075,
0.3742927014827728,
-0.5079357028007507,
0.28841453790664673,
0.06515737622976303,
0.707315981388092,
0.09498685598373412,
0.8365515470504761,
0.10002726316452026,
-0.7695478200912476,
0.6264724135398865,
0.7562043070793152,
-0.23112858831882477,
-0.2871039807796478,
-0.25010058283805847,
0.2783474028110504,
-0.03224996477365494,
-0.9119359850883484,
-3.6940200328826904,
-0.5099936127662659,
-0.1604711413383484,
0.17453284561634064,
0.41759559512138367,
0.1419190913438797,
-0.11362407356500626,
-0.33312007784843445,
0.11511333286762238,
0.4667884409427643,
-0.0031647447030991316,
0.15879854559898376,
0.3042248487472534,
0.5404849052429199,
0.8515422344207764,
0.06286454200744629,
0.43790125846862793,
-0.8682025074958801,
-0.06363756954669952,
0.5547921657562256,
-0.01483887154608965,
-0.07361344993114471,
-0.929947018623352,
0.3502565622329712,
-0.5080993175506592,
1.0380364656448364,
-0.2017953395843506,
0.21319580078125,
-1.0763001441955566,
-0.556368887424469,
0.1949922740459442,
-0.6445739269256592,
0.6791343688964844,
0.21188358962535855,
0.3736183941364288,
-0.21800459921360016,
0.7597446441650391,
-0.3732394874095917,
-0.4710160195827484,
0.025146087631583217,
0.05341297015547752,
-0.9522109627723694,
-0.6000866889953613,
-0.08469046652317047,
0.5966026186943054,
0.3444081246852875,
-0.461188405752182,
-0.5279349088668823,
0.10296865552663804,
0.5175143480300903,
-0.20671147108078003,
0.13392412662506104,
0.4812754988670349,
0.2993808686733246,
-0.3005635440349579,
0.5141698122024536,
-0.6239235401153564,
0.2877119481563568,
-0.4452739953994751,
0.5621107816696167,
0.5047508478164673,
-0.4226335883140564,
-0.18578553199768064,
-1.1967322826385498,
0.28178197145462036,
-0.8692031502723694,
-1.1812998056411743,
-1.4526212215423584,
0.4645712077617645,
0.9327932000160216,
-0.6560136675834656,
0.461549699306488,
-0.5621527433395386,
-1.328449010848999,
-0.08676894754171371,
0.00021918353741057217,
-0.18864136934280396,
0.1259666532278061,
0.18240638077259064,
-0.14919660985469818,
-0.8965857625007629,
-0.7539900541305542,
0.013973715715110302,
0.504276692867279,
-0.704748272895813,
-0.6428424119949341,
0.6303996443748474,
-0.5404738187789917,
-0.31176653504371643,
-0.21262824535369873,
0.18736739456653595,
-0.7998970746994019,
0.039946746081113815,
0.7390344738960266,
0.4283199906349182,
0.3795057237148285,
0.07204607129096985,
-0.9230587482452391,
0.9440426230430604,
0.26272690296173096,
0.5598306655883789,
-1.0520871877670288,
-0.2677186131477356,
-0.1888762265443802,
0.30426350235939026,
0.4746131896972656,
-0.5746733546257019,
-0.4197768568992615,
0.8565112948417664,
-0.6767723560333252,
0.23448683321475983,
-0.2010004222393036,
0.4112907350063324,
-0.6497949957847595,
-0.418667733669281,
-0.4950824975967407,
0.44438859820365906,
1.026281714439392,
0.482397586107254,
-0.26220494508743286,
-0.3640787005424499,
0.5907743573188782,
-0.8771642446517944,
0.09708411991596222,
-0.3671700060367584,
0.4331349730491638,
0.619417667388916,
-0.2684665620326996,
-0.5123821496963501,
-0.1502324342727661,
-0.012190685607492924,
0.3580845892429352,
0.8617186546325684,
0.3493645489215851,
1.0270192623138428,
0.18297909200191495,
-0.5881339311599731,
-0.1733516901731491,
-0.5040576457977295,
-0.340370237827301,
-0.26767754554748535,
-0.28570041060447693,
-0.032928116619586945,
0.6029254794120789,
0.17397655546665192,
0.09346921741962431,
0.27815181016921997,
-0.46699589490890503,
-0.8148876428604126,
-0.3964351713657379,
0.3812595009803772,
0.13547226786613464,
0.7126688361167908,
-0.3473474085330963,
-0.06573959439992905,
-0.6483767032623291,
1.4808889627456665,
0.30924928188323975,
-0.5085946917533875,
-0.8613000512123108,
0.3048902451992035,
-0.4241599142551422,
0.15909206867218018,
0.5764641761779785,
-0.07879110425710678,
1.015336513519287,
0.07599356025457382,
-0.7025855779647827,
0.30047643184661865,
-0.35094937682151794,
0.2522146999835968,
-0.2338722199201584,
-0.8326804637908936,
-0.13695412874221802,
-0.03452421352267265,
0.47974953055381775,
-0.18385636806488037,
0.32438594102859497,
0.1797013282775879,
0.787494957447052,
-0.12579888105392456,
-0.07507286965847015,
-0.4389670491218567,
0.2720070779323578,
0.8138866424560547,
0.01974171027541161,
-0.3057698905467987,
-0.6709924936294556,
0.0885881632566452,
-0.2862754464149475,
0.03475658595561981,
-0.1285519152879715,
0.3838353455066681,
-0.2944154739379883,
-0.4204859137535095,
-0.4416137933731079,
0.13426260650157928,
0.36733248829841614,
0.573428750038147,
-0.14928072690963745,
-0.026076916605234143,
0.33286052942276,
-0.5340145826339722,
-0.17279052734375,
-0.01154550164937973,
-0.6620771884918213,
0.18390542268753052,
-0.08265615254640579,
-0.2489682286977768,
0.2429984211921692,
-0.044153645634651184,
-0.986578404903412,
-0.33574509620666504,
-0.5387663841247559,
0.19767941534519196,
0.12540718913078308,
-0.3403128981590271,
-0.4154576361179352,
0.17275673151016235,
0.09407442808151244,
-0.5414086580276489,
0.4393929839134216,
0.1725579798221588,
-0.4998118281364441,
-0.6926208138465881,
0.16552448272705078,
0.6659538149833679,
-0.10949844866991044,
0.986426830291748,
0.01748848147690296,
0.4003709554672241,
-0.5430638194084167,
0.35347291827201843,
0.6887399554252625,
0.08274628221988678,
0.13407137989997864,
-0.591465950012207,
0.3446292281150818,
0.6069018244743347,
0.1935492902994156,
-0.0989871397614479,
0.07008486241102219,
-0.8503749370574951,
-0.09507356584072112,
0.6259510517120361,
0.13934025168418884,
0.06392545253038406,
-0.4112265408039093,
-0.08475656062364578,
0.4974113404750824,
-0.30606114864349365,
1.111435890197754,
-0.018766529858112335,
-0.8422622680664063,
0.4325508773326874,
-0.2832120656967163,
-0.4859798848628998,
-0.41498348116874695,
0.015977520495653152,
0.5292825698852539,
0.4538311660289765,
1.1328668594360352,
0.22632671892642975,
0.7918671369552612,
0.33401933312416077,
0.7306135296821594,
0.3548600673675537,
0.12506209313869476,
0.8573207855224609,
-0.5818327069282532,
-0.6953738927841187,
-1.6171947717666626,
-0.1699674427509308,
0.6318262815475464,
-0.05671752244234085,
-0.28145185112953186,
-0.3976689279079437,
-0.2041076272726059,
-0.5495951175689697,
-0.5152917504310608,
-0.9309796094894408,
0.101932130753994,
0.1367802917957306,
0.1490798443555832,
0.5304336547851563,
-0.5082434415817261,
0.06688683480024338,
0.14657628536224365,
-0.782435953617096,
0.2962816655635834,
0.6965363621711731,
0.8496337532997131,
-0.3042965829372406,
0.04343798756599426,
0.0330701619386673,
-0.5662598013877869,
1.1086925268173218,
0.756072998046875,
-0.204134538769722,
0.2404300570487976,
-0.47848284244537354,
1.3659011125564575,
0.5645433068275452,
-0.15836156904697418,
0.43395575881004333,
0.5944653749465942,
1.0043466091156006,
-0.49446743726730347,
-0.5954391360282898,
0.5341240763664246,
0.020598189905285835,
-0.4036853015422821,
0.4473709762096405,
1.1998231410980225,
-0.9317775368690492,
-0.23321466147899628,
0.2052552700042725,
-0.7423108816146851,
-0.19917210936546328,
-0.1722569614648819,
-0.034072667360305786,
-0.00671181408688426,
0.46396249532699585,
-0.1372445821762085,
0.053376372903585434,
0.7392690777778625,
-0.38447609543800354,
0.07497968524694443,
0.5197252631187439,
1.3746477365493774,
0.9060075879096984,
0.20000585913658145,
-0.4053704142570496,
0.7497360110282898,
-0.34087055921554565,
-1.101803183555603,
0.273650586605072,
-0.5125769376754761,
0.22472351789474487,
0.480757474899292,
-0.19845178723335263,
0.8857700824737549,
0.30752456188201904,
1.1109285354614258,
-0.6768012642860413,
0.524367094039917,
-0.22495046257972717,
-0.4224412739276886,
0.40753406286239624,
-0.23133376240730288,
0.3297771215438843,
0.4905449151992798,
-0.6813114285469055,
-0.7543983459472656,
-0.5599071383476257,
0.14351597428321838,
-0.029278717935085297,
-0.3970443606376648,
-0.303079217672348,
0.24161772429943085,
0.008353390730917454,
-0.0062365154735744,
1.0824860334396362,
-0.3704061508178711,
-1.0337258577346802,
0.04638749733567238,
1.163011074066162,
-0.31737643480300903,
0.013986887410283089,
0.19223114848136905,
-0.2260770797729492,
-0.210910826921463,
-1.0191949605941772,
0.22356095910072327,
0.09353553503751756,
0.18096882104873657,
0.14867214858531952,
0.43408671021461487,
-0.33312076330184937,
0.8173948526382446,
0.6428242921829224,
0.20215003192424777,
-0.6634518504142761,
-0.4132290482521057,
0.29815030097961426,
-1.579406976699829,
-0.0981958732008934,
-0.03941014781594277,
0.1709178239107132,
-0.5481140613555908,
-0.5338194966316223,
-0.3528362512588501,
-0.11561278253793716,
-0.21793591976165771,
-1.1570470333099363,
0.2157980799674988,
0.42083489894866943,
0.9639263153076172,
0.09747201204299928,
0.15671424567699432,
0.4034591615200043,
0.6728067994117737,
-0.5216875672340393,
0.09657668322324751,
-0.2416689097881317,
0.747975766658783,
0.1021689772605896,
0.11652665585279463,
-1.0484966039657593,
0.8489304780960083,
0.7169828414916992,
-0.09012343734502792,
-1.3173753023147583,
0.057890523225069046,
-0.006231260951608419,
-0.1018214002251625,
0.936040461063385,
-0.0502331368625164,
-0.4284322261810303,
-0.38209280371665955,
-0.22668412327766416,
0.0782942995429039,
-0.4881664514541626,
0.9268959760665894,
0.001867273123934865,
0.42261114716529846,
0.8283362984657288,
0.4256294071674347,
-0.7965338826179504,
0.4840078353881836,
-0.19861412048339844,
0.33977967500686646,
-0.4604192078113556,
-0.3107339143753052,
-0.2839638590812683,
-1.5734281539916992,
0.005220232997089624,
0.09239906817674635,
-0.7828494906425476,
-0.1397123783826828,
0.2576255202293396,
0.21372435986995697,
-0.23169949650764465,
0.4016408920288086,
-0.462497353553772,
-0.2186472862958908,
-0.5617868900299072,
-0.3649831712245941,
-1.1585862636566162,
-0.08222806453704834,
0.931126832962036,
0.4327389597892761,
-0.46451422572135925,
-0.5430706143379211,
-0.27434298396110535,
-0.9479129314422609,
0.1845661848783493,
0.3972720205783844,
0.4883299469947815,
1.04031240940094
]
],
"userProvided": false
}
}
}

View File

@ -1,780 +0,0 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e0",
"desc": "overriden vector",
"_vectors": {
"default": [
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1
]
}
}

View File

@ -56,7 +56,8 @@ pub enum RankingRule {
Desc(String),
}
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(asc|desc)\(([\w_-]+)\)").unwrap());
static ASC_DESC_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
impl FromStr for RankingRule {
type Err = ();

View File

@ -61,7 +61,7 @@ pub enum IndexDocumentsMethod {
#[cfg_attr(test, derive(serde::Serialize))]
#[non_exhaustive]
pub enum UpdateFormat {
/// The given update is a real **comma separated** CSV with headers on the first line.
/// The given update is a real **comma seperated** CSV with headers on the first line.
Csv,
/// The given update is a JSON array with documents inside.
Json,

View File

@ -1,6 +1,5 @@
use serde::{Deserialize, Serialize};
#[allow(clippy::enum_variant_names)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code {
// index related error

View File

@ -95,7 +95,6 @@ impl fmt::Display for ErrorType {
}
}
#[allow(clippy::enum_variant_names)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code {
// index related error

View File

@ -31,7 +31,6 @@ impl ResponseError {
}
}
#[allow(clippy::enum_variant_names)]
#[derive(Deserialize, Debug, Clone, Copy)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum Code {

View File

@ -2,10 +2,10 @@ use std::fs::{self, File};
use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path;
use log::debug;
pub use meilisearch_types::milli;
use tempfile::TempDir;
use time::OffsetDateTime;
use tracing::debug;
use uuid::Uuid;
use super::Document;

View File

@ -219,7 +219,7 @@ pub(crate) mod test {
fn _create_directory_hierarchy(dir: &Path, depth: usize) -> String {
let mut ret = String::new();
// the entries are not guaranteed to be returned in the same order thus we need to sort them.
// the entries are not guarenteed to be returned in the same order thus we need to sort them.
let mut entries =
fs::read_dir(dir).unwrap().collect::<std::result::Result<Vec<_>, _>>().unwrap();

View File

@ -11,10 +11,9 @@ edition.workspace = true
license.workspace = true
[dependencies]
tempfile = "3.9.0"
thiserror = "1.0.56"
tracing = "0.1.40"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
tempfile = "3.5.0"
thiserror = "1.0.40"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
faux = "0.1.10"
faux = "0.1.9"

View File

@ -1,5 +1,5 @@
use std::fs::File as StdFile;
use std::io::Write;
use std::ops::{Deref, DerefMut};
use std::path::{Path, PathBuf};
use std::str::FromStr;
@ -22,6 +22,20 @@ pub enum Error {
pub type Result<T> = std::result::Result<T, Error>;
impl Deref for File {
type Target = NamedTempFile;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl DerefMut for File {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.file
}
}
#[derive(Clone, Debug)]
pub struct FileStore {
path: PathBuf,
@ -42,7 +56,7 @@ impl FileStore {
let file = NamedTempFile::new_in(&self.path)?;
let uuid = Uuid::new_v4();
let path = self.path.join(uuid.to_string());
let update_file = File { file: Some(file), path };
let update_file = File { file, path };
Ok((uuid, update_file))
}
@ -53,7 +67,7 @@ impl FileStore {
let file = NamedTempFile::new_in(&self.path)?;
let uuid = Uuid::from_u128(uuid);
let path = self.path.join(uuid.to_string());
let update_file = File { file: Some(file), path };
let update_file = File { file, path };
Ok((uuid, update_file))
}
@ -61,13 +75,7 @@ impl FileStore {
/// Returns the file corresponding to the requested uuid.
pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
let path = self.get_update_path(uuid);
let file = match StdFile::open(path) {
Ok(file) => file,
Err(e) => {
tracing::error!("Can't access update file {uuid}: {e}");
return Err(e.into());
}
};
let file = StdFile::open(path)?;
Ok(file)
}
@ -102,12 +110,8 @@ impl FileStore {
pub fn delete(&self, uuid: Uuid) -> Result<()> {
let path = self.path.join(uuid.to_string());
if let Err(e) = std::fs::remove_file(path) {
tracing::error!("Can't delete file {uuid}: {e}");
Err(e.into())
} else {
Ok(())
}
std::fs::remove_file(path)?;
Ok(())
}
/// List the Uuids of the files in the FileStore
@ -132,40 +136,16 @@ impl FileStore {
pub struct File {
path: PathBuf,
file: Option<NamedTempFile>,
file: NamedTempFile,
}
impl File {
pub fn dry_file() -> Result<Self> {
Ok(Self { path: PathBuf::new(), file: None })
}
pub fn persist(self) -> Result<()> {
if let Some(file) = self.file {
file.persist(&self.path)?;
}
self.file.persist(&self.path)?;
Ok(())
}
}
impl Write for File {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if let Some(file) = self.file.as_mut() {
file.write(buf)
} else {
Ok(buf.len())
}
}
fn flush(&mut self) -> std::io::Result<()> {
if let Some(file) = self.file.as_mut() {
file.flush()
} else {
Ok(())
}
}
}
#[cfg(test)]
mod test {
use std::io::Write;

View File

@ -13,8 +13,8 @@ license.workspace = true
[dependencies]
nom = "7.1.3"
nom_locate = "4.2.0"
unescaper = "0.1.3"
nom_locate = "4.1.0"
unescaper = "0.1.2"
[dev-dependencies]
insta = "1.34.0"
insta = "1.29.0"

View File

@ -564,11 +564,11 @@ pub mod tests {
#[test]
fn parse_escaped() {
insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequences
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequencies
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
}

View File

@ -42,7 +42,7 @@ fn quoted_by(quote: char, input: Span) -> IResult<Token> {
)));
}
}
// if it was preceded by a `\` or if it was anything else we can continue to advance
// if it was preceeded by a `\` or if it was anything else we can continue to advance
}
Ok((
@ -270,8 +270,8 @@ pub mod test {
("aaaa", "", rtok("", "aaaa"), "aaaa"),
(r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
(r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
(r"aa\\\aa", r#""#, rtok("", r"aa\\\aa"), r"aa\\\aa"),
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r"aa\\"), r"aa\\"),
(r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
(r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
(r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
];
@ -301,12 +301,12 @@ pub mod test {
);
// simple quote
assert_eq!(
unescape(Span::new_extra(r"Hello \'World\'", ""), '\''),
unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
r#"Hello 'World'"#.to_string()
);
assert_eq!(
unescape(Span::new_extra(r"Hello \\\'World\\\'", ""), '\''),
r"Hello \\'World\\'".to_string()
unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
r#"Hello \\'World\\'"#.to_string()
);
}
@ -335,19 +335,19 @@ pub mod test {
("\"cha'nnel\"", "cha'nnel", false),
("I'm tamo", "I", false),
// escaped thing but not quote
(r#""\\""#, r"\", true),
(r#""\\\\\\""#, r"\\\", true),
(r#""aa\\aa""#, r"aa\aa", true),
(r#""\\""#, r#"\"#, true),
(r#""\\\\\\""#, r#"\\\"#, true),
(r#""aa\\aa""#, r#"aa\aa"#, true),
// with double quote
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
(r#""\"\"""#, r#""""#, true),
// with simple quote
(r"'Hello \'world\''", r#"Hello 'world'"#, true),
(r"'Hello \\\'world\\\''", r"Hello \'world\'", true),
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
(r"'\'\''", r#"''"#, true),
(r#"'\'\''"#, r#"''"#, true),
];
for (input, expected, escaped) in test_case {

View File

@ -11,10 +11,10 @@ edition.workspace = true
license.workspace = true
[dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] }
clap = { version = "4.4.17", features = ["derive"] }
fastrand = "2.0.1"
arbitrary = { version = "1.3.0", features = ["derive"] }
clap = { version = "4.3.0", features = ["derive"] }
fastrand = "2.0.0"
milli = { path = "../milli" }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
tempfile = "3.9.0"
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
tempfile = "3.5.0"

View File

@ -113,7 +113,7 @@ fn main() {
index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed);
}
wtxn.abort();
wtxn.abort().unwrap();
});
if let err @ Err(_) = handle.join() {
stop.store(true, Ordering::Relaxed);

View File

@ -11,36 +11,32 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.79"
anyhow = "1.0.70"
bincode = "1.3.3"
csv = "1.3.0"
csv = "1.2.1"
derive_builder = "0.12.0"
dump = { path = "../dump" }
enum-iterator = "1.5.0"
enum-iterator = "1.4.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
rayon = "1.8.1"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
puffin = { version = "0.16.0", features = ["serialization"] }
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tracing = "0.1.40"
ureq = "2.9.7"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
ureq = "2.9.1"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"
crossbeam = "0.8.4"
insta = { version = "1.34.0", features = ["json", "redactions"] }
crossbeam = "0.8.2"
insta = { version = "1.29.0", features = ["json", "redactions"] }
meili-snap = { path = "../meili-snap" }
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}

View File

@ -870,7 +870,7 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whit what
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whith what
// follows because we first need to process the erronous batch.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");

View File

@ -13,7 +13,7 @@ We can combine the two tasks in a single batch:
1. import documents X and Y
Processing this batch is functionally equivalent to processing the two
tasks individually, but should be much faster since we are only performing
tasks individally, but should be much faster since we are only performing
one indexing operation.
*/
@ -24,6 +24,7 @@ use std::fs::{self, File};
use std::io::BufWriter;
use dump::IndexMetadata;
use log::{debug, error, info, trace};
use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
@ -31,10 +32,7 @@ use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
};
use meilisearch_types::milli::vector::parsed_vectors::{
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
};
use meilisearch_types::milli::{self, Filter};
use meilisearch_types::milli::{self, Filter, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@ -62,7 +60,7 @@ pub(crate) enum Batch {
/// The list of tasks that were processing when this task cancelation appeared.
previous_processing_tasks: RoaringBitmap,
},
TaskDeletions(Vec<Task>),
TaskDeletion(Task),
SnapshotCreation(Vec<Task>),
Dump(Task),
IndexOperation {
@ -145,28 +143,23 @@ pub(crate) enum IndexOperation {
impl Batch {
/// Return the task ids associated with this batch.
pub fn ids(&self) -> RoaringBitmap {
pub fn ids(&self) -> Vec<TaskId> {
match self {
Batch::TaskCancelation { task, .. }
| Batch::TaskDeletion(task)
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::IndexUpdate { task, .. } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
Batch::SnapshotCreation(tasks)
| Batch::TaskDeletions(tasks)
| Batch::IndexDeletion { tasks, .. } => {
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
| Batch::IndexUpdate { task, .. } => vec![task.uid],
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. }
| IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => {
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
}
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
tasks.iter().map(|task| task.uid).collect()
}
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => vec![task.uid],
IndexOperation::SettingsAndDocumentOperation {
document_import_tasks: tasks,
settings_tasks: other,
@ -176,11 +169,9 @@ impl Batch {
cleared_tasks: tasks,
settings_tasks: other,
..
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
} => tasks.iter().chain(other).map(|task| task.uid).collect(),
},
Batch::IndexSwap { task } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
Batch::IndexSwap { task } => vec![task.uid],
}
}
@ -189,7 +180,7 @@ impl Batch {
use Batch::*;
match self {
TaskCancelation { .. }
| TaskDeletions(_)
| TaskDeletion(_)
| SnapshotCreation(_)
| Dump(_)
| IndexSwap { .. } => None,
@ -208,7 +199,7 @@ impl fmt::Display for Batch {
let tasks = self.ids();
match self {
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?,
Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
Batch::Dump(_) => f.write_str("Dump")?,
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
@ -524,11 +515,12 @@ impl IndexScheduler {
/// 3. We get the *next* snapshot to process.
/// 4. We get the *next* dump to process.
/// 5. We get the *next* tasks to process for a specific index.
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> {
#[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
puffin::profile_function!();
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
@ -547,9 +539,9 @@ impl IndexScheduler {
// 2. we get the next task to delete
let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
if !to_delete.is_empty() {
let tasks = self.get_existing_tasks(rtxn, to_delete)?;
return Ok(Some(Batch::TaskDeletions(tasks)));
if let Some(task_id) = to_delete.min() {
let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
return Ok(Some(Batch::TaskDeletion(task)));
}
// 3. we batch the snapshot.
@ -592,9 +584,7 @@ impl IndexScheduler {
let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;
// If autobatching is disabled we only take one task at a time.
// Otherwise, we take only a maximum of tasks to create batches.
let tasks_limit =
if self.autobatching_enabled { self.max_number_of_batched_tasks } else { 1 };
let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
let enqueued = index_tasks
.into_iter()
@ -628,7 +618,6 @@ impl IndexScheduler {
/// The list of tasks that were processed. The metadata of each task in the returned
/// list is updated accordingly, with the exception of the its date fields
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> {
#[cfg(test)]
{
@ -637,6 +626,8 @@ impl IndexScheduler {
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
}
puffin::profile_function!(batch.to_string());
match batch {
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
@ -676,10 +667,9 @@ impl IndexScheduler {
Ok(()) => {
for content_uuid in canceled_tasks_content_uuids {
if let Err(error) = self.delete_update_file(content_uuid) {
tracing::error!(
file_content_uuid = %content_uuid,
%error,
"Failed deleting content file"
error!(
"We failed deleting the content file indentified as {}: {}",
content_uuid, error
)
}
}
@ -689,43 +679,31 @@ impl IndexScheduler {
Ok(vec![task])
}
Batch::TaskDeletions(mut tasks) => {
Batch::TaskDeletion(mut task) => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
let mut matched_tasks = RoaringBitmap::new();
for task in tasks.iter() {
let matched_tasks =
if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
matched_tasks |= tasks;
tasks
} else {
unreachable!()
}
}
let mut wtxn = self.env.write_txn()?;
let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
wtxn.commit()?;
for task in tasks.iter_mut() {
task.status = Status::Succeeded;
let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
unreachable!()
};
let deleted_tasks_count = deleted_tasks.intersection_len(tasks);
deleted_tasks -= tasks;
let mut wtxn = self.env.write_txn()?;
let deleted_tasks_count = self.delete_matched_tasks(&mut wtxn, matched_tasks)?;
match &mut task.details {
Some(Details::TaskDeletion {
matched_tasks: _,
deleted_tasks,
original_filter: _,
}) => {
*deleted_tasks = Some(deleted_tasks_count);
}
_ => unreachable!(),
task.status = Status::Succeeded;
match &mut task.details {
Some(Details::TaskDeletion {
matched_tasks: _,
deleted_tasks,
original_filter: _,
}) => {
*deleted_tasks = Some(deleted_tasks_count);
}
_ => unreachable!(),
}
Ok(tasks)
wtxn.commit()?;
Ok(vec![task])
}
Batch::SnapshotCreation(mut tasks) => {
fs::create_dir_all(&self.snapshots_path)?;
@ -737,7 +715,7 @@ impl IndexScheduler {
// 2. Snapshot the index-scheduler LMDB env
//
// When we call copy_to_file, LMDB opens a read transaction by itself,
// When we call copy_to_path, LMDB opens a read transaction by itself,
// we can't provide our own. It is an issue as we would like to know
// the update files to copy but new ones can be enqueued between the copy
// of the env and the new transaction we open to retrieve the enqueued tasks.
@ -750,7 +728,7 @@ impl IndexScheduler {
// 2.1 First copy the LMDB env of the index-scheduler
let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?;
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?;
@ -775,7 +753,7 @@ impl IndexScheduler {
let index = self.index_mapper.index(&rtxn, name)?;
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?;
index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
}
drop(rtxn);
@ -784,13 +762,11 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?;
// TODO We can't use the open_auth_store_env function here but we should
let auth = unsafe {
milli::heed::EnvOpenOptions::new()
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.auth_path)
}?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
let auth = milli::heed::EnvOpenOptions::new()
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.auth_path)?;
auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot
// 5.1 Find the original name of the database
@ -915,64 +891,13 @@ impl IndexScheduler {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (id, doc) = ret?;
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
'inject_vectors: {
let embeddings = index.embeddings(&rtxn, id)?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
.external_id_of(&rtxn, std::iter::once(id))
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={id}")
}
},
value: vectors.clone(),
},
)
.into());
};
for (embedder_name, embeddings) in embeddings {
// don't change the entry if it already exists, because it was user-provided
vectors.entry(embedder_name).or_insert_with(|| {
let embeddings = ExplicitVectors {
embeddings: VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
),
user_provided: false,
};
serde_json::to_value(embeddings).unwrap()
});
}
}
let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
}
// 3.2. Dump the settings
let settings = meilisearch_types::settings::settings(
index,
&rtxn,
meilisearch_types::settings::SecretPolicy::RevealSecrets,
)?;
let settings = meilisearch_types::settings::settings(index, &rtxn)?;
index_dumper.settings(&settings)?;
Ok(())
})?;
@ -1009,8 +934,8 @@ impl IndexScheduler {
};
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
self.index_mapper
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
*self.currently_updating_index.write().unwrap() =
Some((index_uid.clone(), index.clone()));
let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
@ -1031,10 +956,7 @@ impl IndexScheduler {
match res {
Ok(_) => (),
Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
Err(e) => error!("Could not write the stats of the index {}", e),
}
Ok(tasks)
@ -1062,7 +984,7 @@ impl IndexScheduler {
builder.set_primary_key(primary_key);
let must_stop_processing = self.must_stop_processing.clone();
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
index_wtxn.commit()?;
@ -1089,10 +1011,7 @@ impl IndexScheduler {
match res {
Ok(_) => (),
Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
Err(e) => error!("Could not write the stats of the index {}", e),
}
Ok(vec![task])
@ -1187,7 +1106,7 @@ impl IndexScheduler {
for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
swap_index_uid_in_task(&mut task, (lhs, rhs));
self.all_tasks.put(wtxn, &task_id, &task)?;
self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?;
}
// 4. remove the task from indexuid = before_name
@ -1211,17 +1130,14 @@ impl IndexScheduler {
///
/// ## Return
/// The list of processed tasks.
#[tracing::instrument(
level = "trace",
skip(self, index_wtxn, index),
target = "indexing::scheduler"
)]
fn apply_index_operation<'i>(
&self,
index_wtxn: &mut RwTxn<'i>,
index_wtxn: &mut RwTxn<'i, '_>,
index: &'i Index,
operation: IndexOperation,
) -> Result<Vec<Task>> {
puffin::profile_function!();
match operation {
IndexOperation::DocumentClear { mut tasks, .. } => {
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
@ -1274,7 +1190,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.set_primary_key(primary_key);
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(),
)?;
primary_key_has_been_set = true;
@ -1284,16 +1200,12 @@ impl IndexScheduler {
let config = IndexDocumentsConfig { update_method: method, ..Default::default() };
let embedder_configs = index.embedding_configs(index_wtxn)?;
// TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
let embedders = self.embedders(embedder_configs)?;
let mut builder = milli::update::IndexDocuments::new(
index_wtxn,
index,
indexer_config,
config,
|indexing_step| tracing::trace!(?indexing_step, "Update"),
|indexing_step| trace!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
@ -1306,8 +1218,6 @@ impl IndexScheduler {
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
builder = builder.with_embedders(embedders.clone());
let received_documents =
if let Some(Details::DocumentAdditionOrUpdate {
received_documents,
@ -1365,7 +1275,7 @@ impl IndexScheduler {
if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?;
tracing::info!(indexing_result = ?addition, "document indexing done");
info!("document addition done: {:?}", addition);
} else if primary_key_has_been_set {
// Everything failed but we've set a primary key.
// We need to remove it.
@ -1373,7 +1283,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.reset_primary_key();
builder.execute(
|indexing_step| tracing::trace!(update = ?indexing_step),
|indexing_step| trace!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(),
)?;
}
@ -1443,7 +1353,7 @@ impl IndexScheduler {
let must_stop_processing = self.must_stop_processing.clone();
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
@ -1517,11 +1427,7 @@ impl IndexScheduler {
/// Delete each given task from all the databases (if it is deleteable).
///
/// Return the number of tasks that were actually deleted.
fn delete_matched_tasks(
&self,
wtxn: &mut RwTxn,
matched_tasks: &RoaringBitmap,
) -> Result<RoaringBitmap> {
fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
// 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
@ -1573,9 +1479,10 @@ impl IndexScheduler {
}
for task in to_delete_tasks.iter() {
self.all_tasks.delete(wtxn, &task)?;
self.all_tasks.delete(wtxn, &BEU32::new(task))?;
}
for canceled_by in affected_canceled_by {
let canceled_by = BEU32::new(canceled_by);
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
tasks -= &to_delete_tasks;
if tasks.is_empty() {
@ -1586,7 +1493,7 @@ impl IndexScheduler {
}
}
Ok(to_delete_tasks)
Ok(to_delete_tasks.len())
}
/// Cancel each given task from all the databases (if it is cancelable).
@ -1623,14 +1530,14 @@ impl IndexScheduler {
task.details = task.details.map(|d| d.to_failed());
self.update_task(wtxn, &task)?;
}
self.canceled_by.put(wtxn, &cancel_task_id, &tasks_to_cancel)?;
self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?;
Ok(content_files_to_delete)
}
}
fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a>,
wtxn: &mut RwTxn<'a, '_>,
filter: &serde_json::Value,
indexer_config: &IndexerConfig,
must_stop_processing: MustStopProcessing,
@ -1655,7 +1562,7 @@ fn delete_document_by_filter<'a>(
index,
indexer_config,
config,
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;

View File

@ -48,8 +48,6 @@ impl From<DateField> for Code {
pub enum Error {
#[error("{1}")]
WithCustomErrorCode(Code, Box<Self>),
#[error("Received bad task id: {received} should be >= to {expected}.")]
BadTaskId { received: TaskId, expected: TaskId },
#[error("Index `{0}` not found.")]
IndexNotFound(String),
#[error("Index `{0}` already exists.")]
@ -163,7 +161,6 @@ impl Error {
match self {
Error::IndexNotFound(_)
| Error::WithCustomErrorCode(_, _)
| Error::BadTaskId { .. }
| Error::IndexAlreadyExists(_)
| Error::SwapDuplicateIndexFound(_)
| Error::SwapDuplicateIndexesFound(_)
@ -208,7 +205,6 @@ impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
Error::WithCustomErrorCode(code, _) => *code,
Error::BadTaskId { .. } => Code::BadRequest,
Error::IndexNotFound(_) => Code::IndexNotFound,
Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,

View File

@ -30,6 +30,19 @@ impl RoFeatures {
self.runtime
}
pub fn check_score_details(&self) -> Result<()> {
if self.runtime.score_details {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Computing score details",
feature: "score details",
issue_link: "https://github.com/meilisearch/product/discussions/674",
}
.into())
}
}
pub fn check_metrics(&self) -> Result<()> {
if self.runtime.metrics {
Ok(())
@ -43,27 +56,27 @@ impl RoFeatures {
}
}
pub fn check_logs_route(&self) -> Result<()> {
if self.runtime.logs_route {
pub fn check_vector(&self) -> Result<()> {
if self.runtime.vector_store {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Modifying logs through the `/logs/*` routes",
feature: "logs route",
issue_link: "https://github.com/orgs/meilisearch/discussions/721",
disabled_action: "Passing `vector` as a query parameter",
feature: "vector store",
issue_link: "https://github.com/meilisearch/product/discussions/677",
}
.into())
}
}
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.vector_store {
pub fn check_puffin(&self) -> Result<()> {
if self.runtime.export_puffin_reports {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "vector store",
issue_link: "https://github.com/meilisearch/product/discussions/677",
disabled_action: "Outputting Puffin reports to disk",
feature: "export puffin reports",
issue_link: "https://github.com/meilisearch/product/discussions/693",
}
.into())
}
@ -81,7 +94,6 @@ impl FeatureData {
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: instance_features.metrics || persisted_features.metrics,
logs_route: instance_features.logs_route || persisted_features.logs_route,
..persisted_features
}));

View File

@ -1,8 +1,12 @@
/// the map size to use when we don't succeed in reading it in indexes.
const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
use std::collections::BTreeMap;
use std::path::Path;
use std::time::Duration;
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
use meilisearch_types::heed::flags::Flags;
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
use meilisearch_types::milli::Index;
use time::OffsetDateTime;
use uuid::Uuid;
@ -232,7 +236,7 @@ impl IndexMap {
enable_mdb_writemap: bool,
map_size_growth: usize,
) {
let map_size = index.map_size() + map_size_growth;
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
let closing_event = index.prepare_for_closing();
let generation = self.next_generation();
self.unavailable.insert(
@ -305,7 +309,7 @@ fn create_or_open_index(
options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024);
if enable_mdb_writemap {
unsafe { options.flags(EnvFlags::WRITE_MAP) };
unsafe { options.flag(Flags::MdbWriteMap) };
}
if let Some((created, updated)) = date {
@ -384,7 +388,7 @@ mod tests {
fn assert_index_size(index: Index, expected: usize) {
let expected = clamp_to_page_size(expected);
let index_map_size = index.map_size();
let index_map_size = index.map_size().unwrap();
assert_eq!(index_map_size, expected);
}
}

View File

@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock};
use std::time::Duration;
use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tracing::error;
use uuid::Uuid;
use self::index_map::IndexMap;
@ -69,10 +69,6 @@ pub struct IndexMapper {
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
enable_mdb_writemap: bool,
pub indexer_config: Arc<IndexerConfig>,
/// A few types of long running batches of tasks that act on a single index set this field
/// so that a handle to the index is available from other threads (search) in an optimized manner.
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
}
/// Whether the index is available for use or is forbidden to be inserted back in the index map
@ -155,7 +151,6 @@ impl IndexMapper {
index_growth_amount,
enable_mdb_writemap,
indexer_config: Arc::new(indexer_config),
currently_updating_index: Default::default(),
})
}
@ -308,14 +303,6 @@ impl IndexMapper {
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
if let Some((current_name, current_index)) =
self.currently_updating_index.read().unwrap().as_ref()
{
if current_name == name {
return Ok(current_index.clone());
}
}
let uuid = self
.index_mapping
.get(rtxn, name)?
@ -487,8 +474,4 @@ impl IndexMapper {
pub fn indexer_config(&self) -> &IndexerConfig {
&self.indexer_config
}
pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
*self.currently_updating_index.write().unwrap() = index;
}
}

View File

@ -1,7 +1,7 @@
use std::collections::BTreeSet;
use std::fmt::Write;
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Task};
@ -15,7 +15,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let IndexScheduler {
autobatching_enabled,
cleanup_enabled: _,
must_stop_processing: _,
processing_tasks,
file_store,
@ -31,18 +30,17 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
index_mapper,
features: _,
max_number_of_tasks: _,
max_number_of_batched_tasks: _,
puffin_frame: _,
wake_up: _,
dumps_path: _,
snapshots_path: _,
auth_path: _,
version_file_path: _,
webhook_url: _,
webhook_authorization_header: _,
test_breakpoint_sdr: _,
planned_failures: _,
run_loop_iteration: _,
embedders: _,
currently_updating_index: _,
} = scheduler;
let rtxn = env.read_txn().unwrap();
@ -118,7 +116,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
snap
}
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) -> String {
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
@ -128,7 +126,10 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) ->
snap
}
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
pub fn snapshot_date_db(
rtxn: &RoTxn,
db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
@ -248,7 +249,10 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>)
}
snap
}
pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec>) -> String {
pub fn snapshot_canceled_by(
rtxn: &RoTxn,
db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {

File diff suppressed because it is too large Load Diff

View File

@ -1,19 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: doc
---
{
"doggo": "kefir",
"breed": "patou",
"_vectors": {
"A_fakerest": {
"embeddings": "[vector]",
"userProvided": true
},
"noise": [
0.1,
0.2,
0.3
]
}
}

View File

@ -1,20 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: task.details
---
{
"embedders": {
"A_fakerest": {
"source": "rest",
"apiKey": "MyXXXX...",
"dimensions": 384,
"url": "http://localhost:7777"
},
"B_small_hf": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}} the {{doc.breed}} best doggo"
}
}
}

View File

@ -1,23 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: fakerest_config.embedder_options
---
{
"Rest": {
"api_key": "My super secret",
"distribution": null,
"dimensions": 384,
"url": "http://localhost:7777",
"query": null,
"input_field": [
"input"
],
"path_to_embeddings": [
"data"
],
"embedding_object": [
"embedding"
],
"input_type": "text"
}
}

View File

@ -1,11 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: simple_hf_config.embedder_options
---
{
"HuggingFace": {
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"distribution": null
}
}

View File

@ -1,19 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: doc
---
{
"doggo": "Intel",
"breed": "beagle",
"_vectors": {
"A_fakerest": {
"embeddings": "[vector]",
"userProvided": true
},
"noise": [
0.1,
0.2,
0.3
]
}
}

View File

@ -1,20 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: task.details
---
{
"embedders": {
"A_fakerest": {
"source": "rest",
"apiKey": "MyXXXX...",
"dimensions": 384,
"url": "http://localhost:7777"
},
"B_small_hf": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}} the {{doc.breed}} best doggo"
}
}
}

View File

@ -1,14 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: task.details
---
{
"embedders": {
"default": {
"source": "rest",
"apiKey": "MyXXXX...",
"dimensions": 4,
"url": "http://localhost:7777"
}
}
}

View File

@ -1,23 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: embedding_config.embedder_options
---
{
"Rest": {
"api_key": "My super secret",
"distribution": null,
"dimensions": 4,
"url": "http://localhost:7777",
"query": null,
"input_field": [
"input"
],
"path_to_embeddings": [
"data"
],
"embedding_object": [
"embedding"
],
"input_type": "text"
}
}

View File

@ -1,14 +0,0 @@
---
source: index-scheduler/src/lib.rs
expression: task.details
---
{
"embedders": {
"default": {
"source": "rest",
"apiKey": "MyXXXX...",
"dimensions": 4,
"url": "http://localhost:7777"
}
}
}

View File

@ -1,49 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,2,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,48 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [2,]
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,2,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000001
----------------------------------------------------------------------

View File

@ -1,45 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,44 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -1,36 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,40 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -34,10 +34,12 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} }
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
[timestamp] [2,3,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [2,3,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000001

View File

@ -1,90 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"uid": 0,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "succeeded",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 1,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": {
"message": "Index `doggo` already exists.",
"code": "index_already_exists",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_already_exists"
},
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "failed",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 2,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "enqueued",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 3,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "enqueued",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
}
]

View File

@ -1,90 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"uid": 0,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "succeeded",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 1,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": {
"message": "Index `doggo` already exists.",
"code": "index_already_exists",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_already_exists"
},
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "failed",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 2,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "enqueued",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 3,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "enqueued",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
}
]

View File

@ -1,36 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,40 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -3,9 +3,9 @@
use std::collections::{BTreeSet, HashSet};
use std::ops::Bound;
use meilisearch_types::heed::types::DecodeIgnore;
use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::CboRoaringBitmapCodec;
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
@ -18,7 +18,7 @@ impl IndexScheduler {
}
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k.get() + 1))
}
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
@ -26,7 +26,7 @@ impl IndexScheduler {
}
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
Ok(self.all_tasks.get(rtxn, &task_id)?)
Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
}
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
@ -88,7 +88,7 @@ impl IndexScheduler {
}
}
self.all_tasks.put(wtxn, &task.uid, task)?;
self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
Ok(())
}
@ -169,11 +169,11 @@ impl IndexScheduler {
pub(crate) fn insert_task_datetime(
wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
let timestamp = time.unix_timestamp_nanos();
let timestamp = BEI128::new(time.unix_timestamp_nanos());
let mut task_ids = database.get(wtxn, &timestamp)?.unwrap_or_default();
task_ids.insert(task_id);
database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(task_ids))?;
@ -182,11 +182,11 @@ pub(crate) fn insert_task_datetime(
pub(crate) fn remove_task_datetime(
wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
let timestamp = time.unix_timestamp_nanos();
let timestamp = BEI128::new(time.unix_timestamp_nanos());
if let Some(mut existing) = database.get(wtxn, &timestamp)? {
existing.remove(task_id);
if existing.is_empty() {
@ -202,7 +202,7 @@ pub(crate) fn remove_task_datetime(
pub(crate) fn keep_tasks_within_datetimes(
rtxn: &RoTxn,
tasks: &mut RoaringBitmap,
database: Database<BEI128, CboRoaringBitmapCodec>,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
after: Option<OffsetDateTime>,
before: Option<OffsetDateTime>,
) -> Result<()> {
@ -213,8 +213,8 @@ pub(crate) fn keep_tasks_within_datetimes(
(Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
};
let mut collected_task_ids = RoaringBitmap::new();
let start = map_bound(start, |b| b.unix_timestamp_nanos());
let end = map_bound(end, |b| b.unix_timestamp_nanos());
let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
let iter = database.range(rtxn, &(start, end))?;
for r in iter {
let (_timestamp, task_ids) = r?;
@ -272,9 +272,9 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
}
for index_uid in index_uids {
if index_uid == swap.0 {
swap.1.clone_into(index_uid);
*index_uid = swap.1.to_owned();
} else if index_uid == swap.1 {
swap.0.clone_into(index_uid);
*index_uid = swap.0.to_owned();
}
}
}
@ -337,6 +337,8 @@ impl IndexScheduler {
let rtxn = self.env.read_txn().unwrap();
for task in self.all_tasks.iter(&rtxn).unwrap() {
let (task_id, task) = task.unwrap();
let task_id = task_id.get();
let task_index_uid = task.index_uid().map(ToOwned::to_owned);
let Task {
@ -359,13 +361,16 @@ impl IndexScheduler {
.unwrap()
.contains(task.uid));
}
let db_enqueued_at =
self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap();
let db_enqueued_at = self
.enqueued_at
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
assert!(db_enqueued_at.contains(task_id));
if let Some(started_at) = started_at {
let db_started_at = self
.started_at
.get(&rtxn, &started_at.unix_timestamp_nanos())
.get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
assert!(db_started_at.contains(task_id));
@ -373,7 +378,7 @@ impl IndexScheduler {
if let Some(finished_at) = finished_at {
let db_finished_at = self
.finished_at
.get(&rtxn, &finished_at.unix_timestamp_nanos())
.get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
assert!(db_finished_at.contains(task_id));

View File

@ -1,6 +1,7 @@
use std::borrow::Cow;
use std::convert::TryInto;
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
use meilisearch_types::heed::{BytesDecode, BytesEncode};
use uuid::Uuid;
/// A heed codec for value of struct Uuid.
@ -9,15 +10,15 @@ pub struct UuidCodec;
impl<'a> BytesDecode<'a> for UuidCodec {
type DItem = Uuid;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
bytes.try_into().ok().map(Uuid::from_bytes)
}
}
impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
Some(Cow::Borrowed(item.as_bytes()))
}
}

View File

@ -11,6 +11,6 @@ edition.workspace = true
license.workspace = true
[dependencies]
insta = { version = "^1.34.0", features = ["json", "redactions"] }
insta = { version = "^1.29.0", features = ["json", "redactions"] }
md5 = "0.7.0"
once_cell = "1.19"
once_cell = "1.17"

View File

@ -11,16 +11,16 @@ edition.workspace = true
license.workspace = true
[dependencies]
base64 = "0.21.7"
enum-iterator = "1.5.0"
base64 = "0.21.0"
enum-iterator = "1.4.0"
hmac = "0.12.1"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
sha2 = "0.10.8"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.6.1", features = ["serde", "v4"] }
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
sha2 = "0.10.6"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }

View File

@ -1,22 +1,20 @@
use std::borrow::Cow;
use std::cmp::Reverse;
use std::collections::HashSet;
use std::convert::{TryFrom, TryInto};
use std::fs::create_dir_all;
use std::path::Path;
use std::result::Result as StdResult;
use std::str;
use std::str::FromStr;
use std::sync::Arc;
use hmac::{Hmac, Mac};
use meilisearch_types::heed::BoxedError;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId;
use meilisearch_types::milli;
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use sha2::Sha256;
use thiserror::Error;
use time::OffsetDateTime;
use uuid::fmt::Hyphenated;
use uuid::Uuid;
@ -32,7 +30,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat
#[derive(Clone)]
pub struct HeedAuthStore {
env: Arc<Env>,
keys: Database<Bytes, SerdeJson<Key>>,
keys: Database<ByteSlice, SerdeJson<Key>>,
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
should_close_on_drop: bool,
}
@ -49,7 +47,7 @@ pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env>
let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2);
unsafe { options.open(path) }
options.open(path)
}
impl HeedAuthStore {
@ -278,7 +276,7 @@ impl HeedAuthStore {
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
let mut iter = self
.action_keyid_index_expiration
.remap_types::<Bytes, DecodeIgnore>()
.remap_types::<ByteSlice, DecodeIgnore>()
.prefix_iter_mut(wtxn, key.as_bytes())?;
while iter.next().transpose()?.is_some() {
// safety: we don't keep references from inside the LMDB database.
@ -296,24 +294,23 @@ pub struct KeyIdActionCodec;
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
type DItem = (KeyId, Action, Option<&'a [u8]>);
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
let (key_id_bytes, action_bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let (&action_byte, index) =
match try_split_array_at(action_bytes).ok_or(SliceTooShortError)? {
([action], []) => (action, None),
([action], index) => (action, Some(index)),
};
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (key_id_bytes, action_bytes) = try_split_array_at(bytes)?;
let (action_bytes, index) = match try_split_array_at(action_bytes)? {
(action, []) => (action, None),
(action, index) => (action, Some(index)),
};
let key_id = Uuid::from_bytes(*key_id_bytes);
let action = Action::from_repr(action_byte).ok_or(InvalidActionError { action_byte })?;
let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?;
Ok((key_id, action, index))
Some((key_id, action, index))
}
}
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
fn bytes_encode((key_id, action, index): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::new();
bytes.extend_from_slice(key_id.as_bytes());
@ -323,20 +320,10 @@ impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
bytes.extend_from_slice(index);
}
Ok(Cow::Owned(bytes))
Some(Cow::Owned(bytes))
}
}
#[derive(Error, Debug)]
#[error("the slice is too short")]
pub struct SliceTooShortError;
#[derive(Error, Debug)]
#[error("cannot construct a valid Action from {action_byte}")]
pub struct InvalidActionError {
pub action_byte: u8,
}
pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String {
// format uid as hyphenated allowing user to generate their own keys.
let mut uid_buffer = [0; Hyphenated::LENGTH];

View File

@ -11,31 +11,31 @@ edition.workspace = true
license.workspace = true
[dependencies]
actix-web = { version = "4.5.1", default-features = false }
anyhow = "1.0.79"
actix-web = { version = "4.3.1", default-features = false }
anyhow = "1.0.70"
convert_case = "0.6.0"
csv = "1.3.0"
deserr = { version = "0.6.1", features = ["actix-web"] }
either = { version = "1.9.0", features = ["serde"] }
enum-iterator = "1.5.0"
csv = "1.2.1"
deserr = { version = "0.6.0", features = ["actix-web"]}
either = { version = "1.8.1", features = ["serde"] }
enum-iterator = "1.4.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
flate2 = "1.0.25"
fst = "0.4.7"
memmap2 = "0.7.1"
milli = { path = "../milli" }
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde-cs = "0.2.4"
serde_json = "1.0.111"
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.35"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
serde_json = "1.0.95"
tar = "0.4.38"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.27"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
insta = "1.34.0"
insta = "1.29.0"
meili-snap = { path = "../meili-snap" }
[features]
@ -44,7 +44,6 @@ all-tokenizations = ["milli/all-tokenizations"]
# chinese specialized tokenization
chinese = ["milli/chinese"]
chinese-pinyin = ["milli/chinese-pinyin"]
# hebrew specialized tokenization
hebrew = ["milli/hebrew"]
# japanese specialized tokenization
@ -55,7 +54,3 @@ thai = ["milli/thai"]
greek = ["milli/greek"]
# allow khmer specialized tokenization
khmer = ["milli/khmer"]
# allow vietnamese specialized tokenization
vietnamese = ["milli/vietnamese"]
# force swedish character recomposition
swedish-recomposition = ["milli/swedish-recomposition"]

View File

@ -26,7 +26,7 @@ pub type DeserrQueryParamError<C = BadRequest> = DeserrError<DeserrQueryParam, C
/// A request deserialization error.
///
/// The first generic parameter is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
/// The first generic paramater is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
/// The second generic parameter is the default error code for the deserialization error, in case it is not given.
pub struct DeserrError<Format, C: Default + ErrorCode> {
pub msg: String,
@ -188,5 +188,3 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError);
merge_with_error_impl_take_error_message!(ParseTaskKindError);
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
merge_with_error_impl_take_error_message!(IndexUidFormatError);
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
merge_with_error_impl_take_error_message!(InvalidSimilarId);

View File

@ -1,6 +1,6 @@
use std::fmt::{self, Debug, Display};
use std::fs::File;
use std::io::{self, BufWriter, Write};
use std::io::{self, Seek, Write};
use std::marker::PhantomData;
use memmap2::MmapOptions;
@ -104,8 +104,8 @@ impl ErrorCode for DocumentFormatError {
}
/// Reads CSV from input and write an obkv batch to writer.
pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
let mmap = unsafe { MmapOptions::new().map(file)? };
let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
@ -116,9 +116,9 @@ pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result<u64> {
Ok(count as u64)
}
/// Reads JSON from temporary file and write an obkv batch to writer.
pub fn read_json(file: &File, writer: impl Write) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
/// Reads JSON from temporary file and write an obkv batch to writer.
pub fn read_json(file: &File, writer: impl Write + Seek) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
let mmap = unsafe { MmapOptions::new().map(file)? };
let mut deserializer = serde_json::Deserializer::from_slice(&mmap);
@ -151,8 +151,8 @@ pub fn read_json(file: &File, writer: impl Write) -> Result<u64> {
}
/// Reads JSON from temporary file and write an obkv batch to writer.
pub fn read_ndjson(file: &File, writer: impl Write) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
pub fn read_ndjson(file: &File, writer: impl Write + Seek) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
let mmap = unsafe { MmapOptions::new().map(file)? };
for result in serde_json::Deserializer::from_slice(&mmap).into_iter() {

View File

@ -2,7 +2,6 @@ use std::{fmt, io};
use actix_web::http::StatusCode;
use actix_web::{self as aweb, HttpResponseBuilder};
use aweb::http::header;
use aweb::rt::task::JoinError;
use convert_case::Casing;
use milli::heed::{Error as HeedError, MdbError};
@ -57,14 +56,7 @@ where
impl aweb::error::ResponseError for ResponseError {
fn error_response(&self) -> aweb::HttpResponse {
let json = serde_json::to_vec(self).unwrap();
let mut builder = HttpResponseBuilder::new(self.status_code());
builder.content_type("application/json");
if self.code == StatusCode::SERVICE_UNAVAILABLE {
builder.insert_header((header::RETRY_AFTER, "10"));
}
builder.body(json)
HttpResponseBuilder::new(self.status_code()).content_type("application/json").body(json)
}
fn status_code(&self) -> StatusCode {
@ -230,8 +222,6 @@ InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidEmbedder , InvalidRequest , BAD_REQUEST ;
InvalidHybridQuery , InvalidRequest , BAD_REQUEST ;
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
@ -239,23 +229,17 @@ InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
InvalidSimilarFilter , InvalidRequest , BAD_REQUEST ;
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ;
InvalidSimilarLimit , InvalidRequest , BAD_REQUEST ;
InvalidSearchLimit , InvalidRequest , BAD_REQUEST ;
InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
InvalidSimilarOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
@ -264,18 +248,13 @@ InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ;
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ;
@ -315,22 +294,15 @@ MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
MissingIndexUid , InvalidRequest , BAD_REQUEST ;
MissingMasterKey , Auth , UNAUTHORIZED ;
MissingPayload , InvalidRequest , BAD_REQUEST ;
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
TooManySearchRequests , System , SERVICE_UNAVAILABLE ;
TaskNotFound , InvalidRequest , NOT_FOUND ;
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
TooManyVectors , InvalidRequest , BAD_REQUEST ;
UnretrievableDocument , Internal , BAD_REQUEST ;
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
// Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
NotFoundSimilarId , InvalidRequest , BAD_REQUEST
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE
}
impl ErrorCode for JoinError {
@ -363,17 +335,6 @@ impl ErrorCode for milli::Error {
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
Code::InvalidDocumentId
}
UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
UserError::InvalidFieldForSource { .. }
| UserError::MissingFieldForSource { .. }
| UserError::InvalidOpenAiModel { .. }
| UserError::InvalidOpenAiModelDimensions { .. }
| UserError::InvalidOpenAiModelDimensionsMax { .. }
| UserError::InvalidSettingsDimensions { .. }
| UserError::InvalidUrl { .. }
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
Code::IndexPrimaryKeyMultipleCandidatesFound
@ -391,14 +352,11 @@ impl ErrorCode for milli::Error {
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
UserError::SortError(_) => Code::InvalidSearchSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidSettingsTypoTolerance
}
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
}
}
}
@ -428,10 +386,11 @@ impl ErrorCode for HeedError {
HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile,
HeedError::Io(e) => e.error_code(),
HeedError::Mdb(_)
| HeedError::Encoding(_)
| HeedError::Decoding(_)
| HeedError::Encoding
| HeedError::Decoding
| HeedError::InvalidDatabaseTyping
| HeedError::DatabaseClosing
| HeedError::BadOpenOptions { .. } => Code::Internal,
| HeedError::BadOpenOptions => Code::Internal,
}
}
}
@ -485,26 +444,6 @@ impl fmt::Display for DeserrParseIntError {
}
}
impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`."
)
}
}
impl fmt::Display for deserr_codes::InvalidSimilarId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"the value of `id` is invalid. \
A document identifier can be of type integer or string, \
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."
)
}
}
#[macro_export]
macro_rules! internal_error {
($target:ty : $($other:path), *) => {

View File

@ -3,13 +3,13 @@ use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
#[serde(rename_all = "camelCase", default)]
pub struct RuntimeTogglableFeatures {
pub score_details: bool,
pub vector_store: bool,
pub metrics: bool,
pub logs_route: bool,
pub export_puffin_reports: bool,
}
#[derive(Default, Debug, Clone, Copy)]
pub struct InstanceTogglableFeatures {
pub metrics: bool,
pub logs_route: bool,
}

View File

@ -3,12 +3,11 @@ use std::convert::Infallible;
use std::fmt;
use std::marker::PhantomData;
use std::num::NonZeroUsize;
use std::ops::{ControlFlow, Deref};
use std::ops::ControlFlow;
use std::str::FromStr;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer;
use milli::proximity::ProximityPrecision;
use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize, Serializer};
@ -143,13 +142,21 @@ impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRa
)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct Settings<T> {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[serde(
default,
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Setting::is_not_set"
)]
#[deserr(default, error = DeserrJsonError<InvalidSettingsDisplayedAttributes>)]
pub displayed_attributes: WildcardSetting,
pub displayed_attributes: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[serde(
default,
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Setting::is_not_set"
)]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchableAttributes>)]
pub searchable_attributes: WildcardSetting,
pub searchable_attributes: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
@ -179,9 +186,6 @@ pub struct Settings<T> {
#[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
pub distinct_attribute: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
pub proximity_precision: Setting<ProximityPrecisionView>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub typo_tolerance: Setting<TypoSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
@ -191,60 +195,16 @@ pub struct Settings<T> {
#[deserr(default, error = DeserrJsonError<InvalidSettingsPagination>)]
pub pagination: Setting<PaginationSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchCutoffMs>)]
pub search_cutoff_ms: Setting<u64>,
#[serde(skip)]
#[deserr(skip)]
pub _kind: PhantomData<T>,
}
impl<T> Settings<T> {
pub fn hide_secrets(&mut self) {
let Setting::Set(embedders) = &mut self.embedders else {
return;
};
for mut embedder in embedders.values_mut() {
let Setting::Set(embedder) = &mut embedder else {
continue;
};
let Setting::Set(api_key) = &mut embedder.api_key else {
continue;
};
Self::hide_secret(api_key);
}
}
fn hide_secret(secret: &mut String) {
match secret.len() {
x if x < 10 => {
secret.replace_range(.., "XXX...");
}
x if x < 20 => {
secret.replace_range(2.., "XXXX...");
}
x if x < 30 => {
secret.replace_range(3.., "XXXXX...");
}
_x => {
secret.replace_range(5.., "XXXXXX...");
}
}
}
}
impl Settings<Checked> {
pub fn cleared() -> Settings<Checked> {
Settings {
displayed_attributes: Setting::Reset.into(),
searchable_attributes: Setting::Reset.into(),
displayed_attributes: Setting::Reset,
searchable_attributes: Setting::Reset,
filterable_attributes: Setting::Reset,
sortable_attributes: Setting::Reset,
ranking_rules: Setting::Reset,
@ -254,12 +214,9 @@ impl Settings<Checked> {
separator_tokens: Setting::Reset,
dictionary: Setting::Reset,
distinct_attribute: Setting::Reset,
proximity_precision: Setting::Reset,
typo_tolerance: Setting::Reset,
faceting: Setting::Reset,
pagination: Setting::Reset,
embedders: Setting::Reset,
search_cutoff_ms: Setting::Reset,
_kind: PhantomData,
}
}
@ -277,12 +234,9 @@ impl Settings<Checked> {
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
embedders,
search_cutoff_ms,
..
} = self;
@ -298,12 +252,9 @@ impl Settings<Checked> {
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
embedders,
search_cutoff_ms,
_kind: PhantomData,
}
}
@ -311,7 +262,7 @@ impl Settings<Checked> {
impl Settings<Unchecked> {
pub fn check(self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes.0 {
let displayed_attributes = match self.displayed_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
@ -322,7 +273,7 @@ impl Settings<Unchecked> {
otherwise => otherwise,
};
let searchable_attributes = match self.searchable_attributes.0 {
let searchable_attributes = match self.searchable_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
@ -334,8 +285,8 @@ impl Settings<Unchecked> {
};
Settings {
displayed_attributes: displayed_attributes.into(),
searchable_attributes: searchable_attributes.into(),
displayed_attributes,
searchable_attributes,
filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
ranking_rules: self.ranking_rules,
@ -345,30 +296,12 @@ impl Settings<Unchecked> {
separator_tokens: self.separator_tokens,
dictionary: self.dictionary,
distinct_attribute: self.distinct_attribute,
proximity_precision: self.proximity_precision,
typo_tolerance: self.typo_tolerance,
faceting: self.faceting,
pagination: self.pagination,
embedders: self.embedders,
search_cutoff_ms: self.search_cutoff_ms,
_kind: PhantomData,
}
}
pub fn validate(self) -> Result<Self, milli::Error> {
self.validate_embedding_settings()
}
fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
for (name, config) in configs.iter_mut() {
let config_to_check = std::mem::take(config);
let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
*config = checked_config
}
self.embedders = Setting::Set(configs);
Ok(self)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -383,40 +316,19 @@ pub fn apply_settings_to_builder(
settings: &Settings<Checked>,
builder: &mut milli::update::Settings,
) {
let Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
non_separator_tokens,
separator_tokens,
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
embedders,
search_cutoff_ms,
_kind,
} = settings;
match searchable_attributes.deref() {
match settings.searchable_attributes {
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
Setting::Reset => builder.reset_searchable_fields(),
Setting::NotSet => (),
}
match displayed_attributes.deref() {
match settings.displayed_attributes {
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
Setting::Reset => builder.reset_displayed_fields(),
Setting::NotSet => (),
}
match filterable_attributes {
match settings.filterable_attributes {
Setting::Set(ref facets) => {
builder.set_filterable_fields(facets.clone().into_iter().collect())
}
@ -424,13 +336,13 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match sortable_attributes {
match settings.sortable_attributes {
Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()),
Setting::Reset => builder.reset_sortable_fields(),
Setting::NotSet => (),
}
match ranking_rules {
match settings.ranking_rules {
Setting::Set(ref criteria) => {
builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect())
}
@ -438,13 +350,13 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match stop_words {
match settings.stop_words {
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
Setting::Reset => builder.reset_stop_words(),
Setting::NotSet => (),
}
match non_separator_tokens {
match settings.non_separator_tokens {
Setting::Set(ref non_separator_tokens) => {
builder.set_non_separator_tokens(non_separator_tokens.clone())
}
@ -452,7 +364,7 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match separator_tokens {
match settings.separator_tokens {
Setting::Set(ref separator_tokens) => {
builder.set_separator_tokens(separator_tokens.clone())
}
@ -460,31 +372,25 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match dictionary {
match settings.dictionary {
Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
Setting::Reset => builder.reset_dictionary(),
Setting::NotSet => (),
}
match synonyms {
match settings.synonyms {
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
Setting::Reset => builder.reset_synonyms(),
Setting::NotSet => (),
}
match distinct_attribute {
match settings.distinct_attribute {
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
Setting::Reset => builder.reset_distinct_field(),
Setting::NotSet => (),
}
match proximity_precision {
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
Setting::Reset => builder.reset_proximity_precision(),
Setting::NotSet => (),
}
match typo_tolerance {
match settings.typo_tolerance {
Setting::Set(ref value) => {
match value.enabled {
Setting::Set(val) => builder.set_autorize_typos(val),
@ -539,7 +445,7 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match faceting {
match &settings.faceting {
Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => {
match max_values_per_facet {
Setting::Set(val) => builder.set_max_values_per_facet(*val),
@ -561,7 +467,7 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match pagination {
match settings.pagination {
Setting::Set(ref value) => match value.max_total_hits {
Setting::Set(val) => builder.set_pagination_max_total_hits(val),
Setting::Reset => builder.reset_pagination_max_total_hits(),
@ -570,29 +476,11 @@ pub fn apply_settings_to_builder(
Setting::Reset => builder.reset_pagination_max_total_hits(),
Setting::NotSet => (),
}
match embedders {
Setting::Set(value) => builder.set_embedder_settings(value.clone()),
Setting::Reset => builder.reset_embedder_settings(),
Setting::NotSet => (),
}
match search_cutoff_ms {
Setting::Set(cutoff) => builder.set_search_cutoff(*cutoff),
Setting::Reset => builder.reset_search_cutoff(),
Setting::NotSet => (),
}
}
pub enum SecretPolicy {
RevealSecrets,
HideSecrets,
}
pub fn settings(
index: &Index,
rtxn: &crate::heed::RoTxn,
secret_policy: SecretPolicy,
) -> Result<Settings<Checked>, milli::Error> {
let displayed_attributes =
index.displayed_fields(rtxn)?.map(|fields| fields.into_iter().map(String::from).collect());
@ -621,8 +509,6 @@ pub fn settings(
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
let proximity_precision = index.proximity_precision(rtxn)?.map(ProximityPrecisionView::from);
let synonyms = index.user_defined_synonyms(rtxn)?;
let min_typo_word_len = MinWordSizeTyposSetting {
@ -646,10 +532,7 @@ pub fn settings(
let faceting = FacetingSettings {
max_values_per_facet: Setting::Set(
index
.max_values_per_facet(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET),
index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET),
),
sort_facet_values_by: Setting::Set(
index
@ -662,33 +545,19 @@ pub fn settings(
let pagination = PaginationSettings {
max_total_hits: Setting::Set(
index
.pagination_max_total_hits(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
),
};
let embedders: BTreeMap<_, _> = index
.embedding_configs(rtxn)?
.into_iter()
.map(|(name, config)| (name, Setting::Set(config.into())))
.collect();
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
let search_cutoff_ms = index.search_cutoff(rtxn)?;
let mut settings = Settings {
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
}
.into(),
},
searchable_attributes: match searchable_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
}
.into(),
},
filterable_attributes: Setting::Set(filterable_attributes),
sortable_attributes: Setting::Set(sortable_attributes),
ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
@ -700,24 +569,12 @@ pub fn settings(
Some(field) => Setting::Set(field),
None => Setting::Reset,
},
proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()),
synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination),
embedders,
search_cutoff_ms: match search_cutoff_ms {
Some(cutoff) => Setting::Set(cutoff),
None => Setting::Reset,
},
_kind: PhantomData,
};
if let SecretPolicy::HideSecrets = secret_policy {
settings.hide_secrets()
}
Ok(settings)
})
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
@ -816,67 +673,6 @@ impl From<RankingRuleView> for Criterion {
}
}
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
pub enum ProximityPrecisionView {
#[default]
ByWord,
ByAttribute,
}
impl From<ProximityPrecision> for ProximityPrecisionView {
fn from(value: ProximityPrecision) -> Self {
match value {
ProximityPrecision::ByWord => ProximityPrecisionView::ByWord,
ProximityPrecision::ByAttribute => ProximityPrecisionView::ByAttribute,
}
}
}
impl From<ProximityPrecisionView> for ProximityPrecision {
fn from(value: ProximityPrecisionView) -> Self {
match value {
ProximityPrecisionView::ByWord => ProximityPrecision::ByWord,
ProximityPrecisionView::ByAttribute => ProximityPrecision::ByAttribute,
}
}
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)]
pub struct WildcardSetting(Setting<Vec<String>>);
impl From<Setting<Vec<String>>> for WildcardSetting {
fn from(setting: Setting<Vec<String>>) -> Self {
Self(setting)
}
}
impl Serialize for WildcardSetting {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serialize_with_wildcard(&self.0, serializer)
}
}
impl<E: deserr::DeserializeError> Deserr<E> for WildcardSetting {
fn deserialize_from_value<V: deserr::IntoValue>(
value: deserr::Value<V>,
location: ValuePointerRef<'_>,
) -> Result<Self, E> {
Ok(Self(Setting::deserialize_from_value(value, location)?))
}
}
impl std::ops::Deref for WildcardSetting {
type Target = Setting<Vec<String>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[cfg(test)]
pub(crate) mod test {
use super::*;
@ -885,8 +681,8 @@ pub(crate) mod test {
fn test_setting_check() {
// test no changes
let settings = Settings {
displayed_attributes: Setting::Set(vec![String::from("hello")]).into(),
searchable_attributes: Setting::Set(vec![String::from("hello")]).into(),
displayed_attributes: Setting::Set(vec![String::from("hello")]),
searchable_attributes: Setting::Set(vec![String::from("hello")]),
filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet,
@ -896,12 +692,9 @@ pub(crate) mod test {
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
embedders: Setting::NotSet,
search_cutoff_ms: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
@ -912,9 +705,8 @@ pub(crate) mod test {
// test wildcard
// test no changes
let settings = Settings {
displayed_attributes: Setting::Set(vec![String::from("*")]).into(),
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")])
.into(),
displayed_attributes: Setting::Set(vec![String::from("*")]),
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet,
@ -924,17 +716,14 @@ pub(crate) mod test {
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
embedders: Setting::NotSet,
search_cutoff_ms: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
let checked = settings.check();
assert_eq!(checked.displayed_attributes, Setting::Reset.into());
assert_eq!(checked.searchable_attributes, Setting::Reset.into());
assert_eq!(checked.displayed_attributes, Setting::Reset);
assert_eq!(checked.searchable_attributes, Setting::Reset);
}
}

View File

@ -86,8 +86,7 @@ impl From<Details> for DetailsView {
..DetailsView::default()
}
}
Details::SettingsUpdate { mut settings } => {
settings.hide_secrets();
Details::SettingsUpdate { settings } => {
DetailsView { settings: Some(settings), ..DetailsView::default() }
}
Details::IndexInfo { primary_key } => {

View File

@ -13,125 +13,124 @@ license.workspace = true
default-run = "meilisearch"
[dependencies]
actix-cors = "0.7.0"
actix-http = { version = "3.6.0", default-features = false, features = [
actix-cors = "0.6.4"
actix-http = { version = "3.3.1", default-features = false, features = [
"compress-brotli",
"compress-gzip",
"rustls-0_21",
"rustls",
] }
actix-utils = "3.0.1"
actix-web = { version = "4.5.1", default-features = false, features = [
actix-web = { version = "4.3.1", default-features = false, features = [
"macros",
"compress-brotli",
"compress-gzip",
"cookies",
"rustls-0_21",
"rustls",
] }
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] }
anyhow = { version = "1.0.70", features = ["backtrace"] }
async-stream = "0.3.5"
async-trait = "0.1.77"
bstr = "1.9.0"
async-trait = "0.1.68"
bstr = "1.4.0"
byte-unit = { version = "4.0.19", default-features = false, features = [
"std",
"serde",
] }
bytes = "1.5.0"
clap = { version = "4.4.17", features = ["derive", "env"] }
crossbeam-channel = "0.5.11"
deserr = { version = "0.6.1", features = ["actix-web"] }
bytes = "1.4.0"
clap = { version = "4.2.1", features = ["derive", "env"] }
crossbeam-channel = "0.5.8"
deserr = { version = "0.6.0", features = ["actix-web"]}
dump = { path = "../dump" }
either = "1.9.0"
either = "1.8.1"
env_logger = "0.10.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
flate2 = "1.0.25"
fst = "0.4.7"
futures = "0.3.30"
futures-util = "0.3.30"
http = "0.2.11"
futures = "0.3.28"
futures-util = "0.3.28"
http = "0.2.9"
index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.1.0", features = ["serde"] }
is-terminal = "0.4.10"
indexmap = { version = "2.0.0", features = ["serde"] }
is-terminal = "0.4.8"
itertools = "0.11.0"
jsonwebtoken = "9.2.0"
jsonwebtoken = "8.3.0"
lazy_static = "1.4.0"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.39", default-features = false }
mimalloc = { version = "0.1.37", default-features = false }
mime = "0.3.17"
num_cpus = "1.16.0"
obkv = "0.2.1"
once_cell = "1.19.0"
ordered-float = "4.2.0"
num_cpus = "1.15.0"
obkv = "0.2.0"
once_cell = "1.17.1"
ordered-float = "3.7.0"
parking_lot = "0.12.1"
permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.13"
pin-project-lite = "0.2.9"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] }
puffin = { version = "0.16.0", features = ["serialization"] }
rand = "0.8.5"
rayon = "1.8.0"
regex = "1.10.2"
reqwest = { version = "0.11.23", features = [
rayon = "1.7.0"
regex = "1.7.3"
reqwest = { version = "0.11.16", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = "0.21.12"
rustls = "0.20.8"
rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
sha2 = "0.10.8"
siphasher = "1.0.0"
slice-group-by = "0.3.1"
segment = { version = "0.2.2", optional = true }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
sha2 = "0.10.6"
siphasher = "0.3.10"
slice-group-by = "0.3.0"
static-files = { version = "0.2.3", optional = true }
sysinfo = "0.30.5"
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
sysinfo = "0.29.7"
tar = "0.4.38"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = { version = "1.35.1", features = ["full"] }
tokio-stream = "0.1.14"
toml = "0.8.8"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
walkdir = "2.4.0"
tokio = { version = "1.27.0", features = ["full"] }
tokio-stream = "0.1.12"
toml = "0.7.3"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
walkdir = "2.3.3"
yaup = "0.2.1"
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
termcolor = "1.2.0"
url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.9"
build-info = { version = "1.7.0", path = "../build-info" }
[dev-dependencies]
actix-rt = "2.9.0"
actix-rt = "2.8.0"
assert-json-diff = "2.0.2"
brotli = "3.4.0"
insta = "1.34.0"
manifest-dir-macros = "0.1.18"
brotli = "3.3.4"
insta = "1.29.0"
manifest-dir-macros = "0.1.16"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
urlencoding = "2.1.3"
temp-env = "0.3.3"
urlencoding = "2.1.2"
yaup = "0.2.1"
[build-dependencies]
anyhow = { version = "1.0.79", optional = true }
cargo_toml = { version = "0.18.0", optional = true }
anyhow = { version = "1.0.70", optional = true }
cargo_toml = { version = "0.15.2", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.11.23", features = [
reqwest = { version = "0.11.16", features = [
"blocking",
"rustls-tls",
], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.9.0", optional = true }
zip = { version = "0.6.6", optional = true }
tempfile = { version = "3.5.0", optional = true }
vergen = { version = "7.5.1", default-features = false, features = ["git"] }
zip = { version = "0.6.4", optional = true }
[features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
@ -148,15 +147,12 @@ mini-dashboard = [
"zip",
]
chinese = ["meilisearch-types/chinese"]
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"]
vietnamese = ["meilisearch-types/vietnamese"]
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"

View File

@ -1,4 +1,17 @@
use vergen::{vergen, Config, SemverKind};
fn main() {
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
// in the corresponding GitHub workflow (publish_docker.yml).
// This is due to the Dockerfile building the binary outside of the git directory.
let mut config = Config::default();
// allow using non-annotated tags
*config.git_mut().semver_kind_mut() = SemverKind::Lightweight;
if let Err(e) = vergen(config) {
println!("cargo:warning=vergen: {}", e);
}
#[cfg(feature = "mini-dashboard")]
mini_dashboard::setup_mini_dashboard().expect("Could not load the mini-dashboard assets");
}

View File

@ -7,6 +7,7 @@ use serde_json::Value;
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::tasks::TasksFilterQuery;
use crate::Opt;
pub struct MockAnalytics {
@ -25,18 +26,6 @@ impl SearchAggregator {
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct SimilarAggregator;
#[allow(dead_code)]
impl SimilarAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct MultiSearchAggregator;
@ -78,8 +67,6 @@ impl Analytics for MockAnalytics {
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn get_similar(&self, _aggregate: super::SimilarAggregator) {}
fn post_similar(&self, _aggregate: super::SimilarAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
fn add_documents(
@ -99,4 +86,6 @@ impl Analytics for MockAnalytics {
}
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn get_tasks(&self, _query: &TasksFilterQuery, _request: &HttpRequest) {}
fn health_seen(&self, _request: &HttpRequest) {}
}

View File

@ -14,6 +14,7 @@ use platform_dirs::AppDirs;
use serde_json::Value;
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::tasks::TasksFilterQuery;
// if the analytics feature is disabled
// the `SegmentAnalytics` point to the mock instead of the real analytics
@ -22,8 +23,6 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
#[cfg(not(feature = "analytics"))]
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(not(feature = "analytics"))]
pub type SimilarAggregator = mock_analytics::SimilarAggregator;
#[cfg(not(feature = "analytics"))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
#[cfg(not(feature = "analytics"))]
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
@ -34,8 +33,6 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
#[cfg(feature = "analytics")]
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(feature = "analytics")]
pub type SimilarAggregator = segment_analytics::SimilarAggregator;
#[cfg(feature = "analytics")]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
#[cfg(feature = "analytics")]
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
@ -90,12 +87,6 @@ pub trait Analytics: Sync + Send {
/// This method should be called to aggregate a post search
fn post_search(&self, aggregate: SearchAggregator);
/// This method should be called to aggregate a get similar request
fn get_similar(&self, aggregate: SimilarAggregator);
/// This method should be called to aggregate a post similar request
fn post_similar(&self, aggregate: SimilarAggregator);
/// This method should be called to aggregate a post array of searches
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
@ -126,4 +117,10 @@ pub trait Analytics: Sync + Send {
index_creation: bool,
request: &HttpRequest,
);
// this method should be called to aggregate the get tasks requests.
fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest);
// this method should be called to aggregate a add documents request
fn health_seen(&self, request: &HttpRequest);
}

View File

@ -18,7 +18,7 @@ use segment::message::{Identify, Track, User};
use segment::{AutoBatcher, Batcher, HttpClient};
use serde::Serialize;
use serde_json::{json, Value};
use sysinfo::{Disks, System};
use sysinfo::{DiskExt, System, SystemExt};
use time::OffsetDateTime;
use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender};
@ -28,17 +28,15 @@ use super::{
config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH,
};
use crate::analytics::Analytics;
use crate::option::{
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
};
use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::indexes::facet_search::FacetSearchQuery;
use crate::routes::tasks::TasksFilterQuery;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEMANTIC_RATIO,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
use crate::Opt;
@ -74,8 +72,6 @@ pub enum AnalyticsMsg {
BatchMessage(Track),
AggregateGetSearch(SearchAggregator),
AggregatePostSearch(SearchAggregator),
AggregateGetSimilar(SimilarAggregator),
AggregatePostSimilar(SimilarAggregator),
AggregatePostMultiSearch(MultiSearchAggregator),
AggregatePostFacetSearch(FacetSearchAggregator),
AggregateAddDocuments(DocumentsAggregator),
@ -83,6 +79,8 @@ pub enum AnalyticsMsg {
AggregateUpdateDocuments(DocumentsAggregator),
AggregateGetFetchDocuments(DocumentsFetchAggregator),
AggregatePostFetchDocuments(DocumentsFetchAggregator),
AggregateTasks(TasksAggregator),
AggregateHealth(HealthAggregator),
}
pub struct SegmentAnalytics {
@ -152,8 +150,8 @@ impl SegmentAnalytics {
update_documents_aggregator: DocumentsAggregator::default(),
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
get_similar_aggregator: SimilarAggregator::default(),
post_similar_aggregator: SimilarAggregator::default(),
get_tasks_aggregator: TasksAggregator::default(),
health_aggregator: HealthAggregator::default(),
});
tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
@ -189,14 +187,6 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
}
fn get_similar(&self, aggregate: SimilarAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSimilar(aggregate));
}
fn post_similar(&self, aggregate: SimilarAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSimilar(aggregate));
}
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
}
@ -239,6 +229,16 @@ impl super::Analytics for SegmentAnalytics {
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
}
fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest) {
let aggregate = TasksAggregator::from_query(query, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate));
}
fn health_seen(&self, request: &HttpRequest) {
let aggregate = HealthAggregator::from_query(request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateHealth(aggregate));
}
}
/// This structure represent the `infos` field we send in the analytics.
@ -250,13 +250,7 @@ impl super::Analytics for SegmentAnalytics {
struct Infos {
env: String,
experimental_enable_metrics: bool,
experimental_search_queue_size: usize,
experimental_logs_mode: LogMode,
experimental_replication_parameters: bool,
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize,
gpu_enabled: bool,
db_path: bool,
import_dump: bool,
dump_dir: bool,
@ -269,8 +263,6 @@ struct Infos {
ignore_snapshot_if_db_exists: bool,
http_addr: bool,
http_payload_size_limit: Byte,
task_queue_webhook: bool,
task_webhook_authorization_header: bool,
log_level: String,
max_indexing_memory: MaxMemory,
max_indexing_threads: MaxThreads,
@ -292,17 +284,11 @@ impl From<Opt> for Infos {
let Opt {
db_path,
experimental_enable_metrics,
experimental_search_queue_size,
experimental_logs_mode,
experimental_replication_parameters,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks,
http_addr,
master_key: _,
env,
task_webhook_url,
task_webhook_authorization_header,
task_webhook_url: _,
max_index_size: _,
max_task_db_size: _,
http_payload_size_limit,
@ -342,12 +328,7 @@ impl From<Opt> for Infos {
Self {
env,
experimental_enable_metrics,
experimental_search_queue_size,
experimental_logs_mode,
experimental_replication_parameters,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),
dump_dir: dump_dir != PathBuf::from("dumps/"),
@ -360,9 +341,6 @@ impl From<Opt> for Infos {
ignore_snapshot_if_db_exists,
http_addr: http_addr != default_http_addr(),
http_payload_size_limit,
experimental_max_number_of_batched_tasks,
task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(),
max_indexing_memory,
max_indexing_threads,
@ -392,25 +370,24 @@ pub struct Segment {
update_documents_aggregator: DocumentsAggregator,
get_fetch_documents_aggregator: DocumentsFetchAggregator,
post_fetch_documents_aggregator: DocumentsFetchAggregator,
get_similar_aggregator: SimilarAggregator,
post_similar_aggregator: SimilarAggregator,
get_tasks_aggregator: TasksAggregator,
health_aggregator: HealthAggregator,
}
impl Segment {
fn compute_traits(opt: &Opt, stats: Stats) -> Value {
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
static SYSTEM: Lazy<Value> = Lazy::new(|| {
let disks = Disks::new_with_refreshed_list();
let mut sys = System::new_all();
sys.refresh_all();
let kernel_version = System::kernel_version()
.and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
let kernel_version =
sys.kernel_version().and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
json!({
"distribution": System::name(),
"distribution": sys.name(),
"kernel_version": kernel_version,
"cores": sys.cpus().len(),
"ram_size": sys.total_memory(),
"disk_size": disks.iter().map(|disk| disk.total_space()).max(),
"disk_size": sys.disks().iter().map(|disk| disk.total_space()).max(),
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
})
});
@ -456,8 +433,8 @@ impl Segment {
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostSimilar(agreg)) => self.post_similar_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg),
None => (),
}
}
@ -474,9 +451,7 @@ impl Segment {
create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default())
{
// Replace the version number with the prototype name if any.
let version = if let Some(prototype) = build_info::DescribeResult::from_build()
.and_then(|describe| describe.as_prototype())
{
let version = if let Some(prototype) = crate::prototype_name() {
prototype
} else {
env!("CARGO_PKG_VERSION")
@ -511,8 +486,8 @@ impl Segment {
update_documents_aggregator,
get_fetch_documents_aggregator,
post_fetch_documents_aggregator,
get_similar_aggregator,
post_similar_aggregator,
get_tasks_aggregator,
health_aggregator,
} = self;
if let Some(get_search) =
@ -560,17 +535,11 @@ impl Segment {
{
let _ = self.batcher.push(post_fetch_documents).await;
}
if let Some(get_similar_documents) =
take(get_similar_aggregator).into_event(user, "Similar GET")
{
let _ = self.batcher.push(get_similar_documents).await;
if let Some(get_tasks) = take(get_tasks_aggregator).into_event(user, "Tasks Seen") {
let _ = self.batcher.push(get_tasks).await;
}
if let Some(post_similar_documents) =
take(post_similar_aggregator).into_event(user, "Similar POST")
{
let _ = self.batcher.push(post_similar_documents).await;
if let Some(health) = take(health_aggregator).into_event(user, "Health Seen") {
let _ = self.batcher.push(health).await;
}
let _ = self.batcher.flush().await;
}
@ -586,8 +555,6 @@ pub struct SearchAggregator {
// requests
total_received: usize,
total_succeeded: usize,
total_degraded: usize,
total_used_negative_operator: usize,
time_spent: BinaryHeap<usize>,
// sort
@ -617,11 +584,6 @@ pub struct SearchAggregator {
// vector
// The maximum number of floats in a vector request
max_vector_size: usize,
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
semantic_ratio: bool,
// Whether a non-default embedder was specified
embedder: bool,
hybrid: bool,
// every time a search is done, we increment the counter linked to the used settings
matching_strategy: HashMap<String, usize>,
@ -675,7 +637,6 @@ impl SearchAggregator {
crop_marker,
matching_strategy,
attributes_to_search_on,
hybrid,
} = query;
let mut ret = Self::default();
@ -749,12 +710,6 @@ impl SearchAggregator {
ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details;
if let Some(hybrid) = hybrid {
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
ret.embedder = hybrid.embedder.is_some();
ret.hybrid = true;
}
ret
}
@ -762,22 +717,14 @@ impl SearchAggregator {
let SearchResult {
hits: _,
query: _,
vector: _,
processing_time_ms,
hits_info: _,
semantic_hit_count: _,
facet_distribution: _,
facet_stats: _,
degraded,
used_negative_operator,
} = result;
self.total_succeeded = self.total_succeeded.saturating_add(1);
if *degraded {
self.total_degraded = self.total_degraded.saturating_add(1);
}
if *used_negative_operator {
self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
}
self.time_spent.push(*processing_time_ms as usize);
}
@ -816,11 +763,6 @@ impl SearchAggregator {
facets_total_number_of_facets,
show_ranking_score,
show_ranking_score_details,
semantic_ratio,
embedder,
hybrid,
total_degraded,
total_used_negative_operator,
} = other;
if self.timestamp.is_none() {
@ -835,9 +777,6 @@ impl SearchAggregator {
// request
self.total_received = self.total_received.saturating_add(total_received);
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
self.total_used_negative_operator =
self.total_used_negative_operator.saturating_add(total_used_negative_operator);
self.time_spent.append(time_spent);
// sort
@ -869,9 +808,6 @@ impl SearchAggregator {
// vector
self.max_vector_size = self.max_vector_size.max(max_vector_size);
self.semantic_ratio |= semantic_ratio;
self.hybrid |= hybrid;
self.embedder |= embedder;
// pagination
self.max_limit = self.max_limit.max(max_limit);
@ -940,11 +876,6 @@ impl SearchAggregator {
facets_total_number_of_facets,
show_ranking_score,
show_ranking_score_details,
semantic_ratio,
embedder,
hybrid,
total_degraded,
total_used_negative_operator,
} = self;
if total_received == 0 {
@ -964,8 +895,6 @@ impl SearchAggregator {
"total_succeeded": total_succeeded,
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received,
"total_degraded": total_degraded,
"total_used_negative_operator": total_used_negative_operator,
},
"sort": {
"with_geoPoint": sort_with_geo_point,
@ -986,11 +915,6 @@ impl SearchAggregator {
"vector": {
"max_vector_size": max_vector_size,
},
"hybrid": {
"enabled": hybrid,
"semantic_ratio": semantic_ratio,
"embedder": embedder,
},
"pagination": {
"max_limit": max_limit,
"max_offset": max_offset,
@ -1086,7 +1010,6 @@ impl MultiSearchAggregator {
crop_marker: _,
matching_strategy: _,
attributes_to_search_on: _,
hybrid: _,
} = query;
index_uid.as_str()
@ -1233,7 +1156,6 @@ impl FacetSearchAggregator {
filter,
matching_strategy,
attributes_to_search_on,
hybrid,
} = query;
let mut ret = Self::default();
@ -1247,8 +1169,7 @@ impl FacetSearchAggregator {
|| vector.is_some()
|| filter.is_some()
|| *matching_strategy != MatchingStrategy::default()
|| attributes_to_search_on.is_some()
|| hybrid.is_some();
|| attributes_to_search_on.is_some();
ret
}
@ -1507,6 +1428,176 @@ impl DocumentsDeletionAggregator {
}
}
#[derive(Default, Serialize)]
pub struct TasksAggregator {
#[serde(skip)]
timestamp: Option<OffsetDateTime>,
// context
#[serde(rename = "user-agent")]
user_agents: HashSet<String>,
filtered_by_uid: bool,
filtered_by_index_uid: bool,
filtered_by_type: bool,
filtered_by_status: bool,
filtered_by_canceled_by: bool,
filtered_by_before_enqueued_at: bool,
filtered_by_after_enqueued_at: bool,
filtered_by_before_started_at: bool,
filtered_by_after_started_at: bool,
filtered_by_before_finished_at: bool,
filtered_by_after_finished_at: bool,
total_received: usize,
}
impl TasksAggregator {
pub fn from_query(query: &TasksFilterQuery, request: &HttpRequest) -> Self {
let TasksFilterQuery {
limit: _,
from: _,
uids,
index_uids,
types,
statuses,
canceled_by,
before_enqueued_at,
after_enqueued_at,
before_started_at,
after_started_at,
before_finished_at,
after_finished_at,
} = query;
Self {
timestamp: Some(OffsetDateTime::now_utc()),
user_agents: extract_user_agents(request).into_iter().collect(),
filtered_by_uid: uids.is_some(),
filtered_by_index_uid: index_uids.is_some(),
filtered_by_type: types.is_some(),
filtered_by_status: statuses.is_some(),
filtered_by_canceled_by: canceled_by.is_some(),
filtered_by_before_enqueued_at: before_enqueued_at.is_some(),
filtered_by_after_enqueued_at: after_enqueued_at.is_some(),
filtered_by_before_started_at: before_started_at.is_some(),
filtered_by_after_started_at: after_started_at.is_some(),
filtered_by_before_finished_at: before_finished_at.is_some(),
filtered_by_after_finished_at: after_finished_at.is_some(),
total_received: 1,
}
}
/// Aggregate one [TasksAggregator] into another.
pub fn aggregate(&mut self, other: Self) {
let Self {
timestamp,
user_agents,
total_received,
filtered_by_uid,
filtered_by_index_uid,
filtered_by_type,
filtered_by_status,
filtered_by_canceled_by,
filtered_by_before_enqueued_at,
filtered_by_after_enqueued_at,
filtered_by_before_started_at,
filtered_by_after_started_at,
filtered_by_before_finished_at,
filtered_by_after_finished_at,
} = other;
if self.timestamp.is_none() {
self.timestamp = timestamp;
}
// we can't create a union because there is no `into_union` method
for user_agent in user_agents {
self.user_agents.insert(user_agent);
}
self.filtered_by_uid |= filtered_by_uid;
self.filtered_by_index_uid |= filtered_by_index_uid;
self.filtered_by_type |= filtered_by_type;
self.filtered_by_status |= filtered_by_status;
self.filtered_by_canceled_by |= filtered_by_canceled_by;
self.filtered_by_before_enqueued_at |= filtered_by_before_enqueued_at;
self.filtered_by_after_enqueued_at |= filtered_by_after_enqueued_at;
self.filtered_by_before_started_at |= filtered_by_before_started_at;
self.filtered_by_after_started_at |= filtered_by_after_started_at;
self.filtered_by_before_finished_at |= filtered_by_before_finished_at;
self.filtered_by_after_finished_at |= filtered_by_after_finished_at;
self.filtered_by_after_finished_at |= filtered_by_after_finished_at;
self.total_received = self.total_received.saturating_add(total_received);
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
// if we had no timestamp it means we never encountered any events and
// thus we don't need to send this event.
let timestamp = self.timestamp?;
Some(Track {
timestamp: Some(timestamp),
user: user.clone(),
event: event_name.to_string(),
properties: serde_json::to_value(self).ok()?,
..Default::default()
})
}
}
#[derive(Default, Serialize)]
pub struct HealthAggregator {
#[serde(skip)]
timestamp: Option<OffsetDateTime>,
// context
#[serde(rename = "user-agent")]
user_agents: HashSet<String>,
#[serde(rename = "requests.total_received")]
total_received: usize,
}
impl HealthAggregator {
pub fn from_query(request: &HttpRequest) -> Self {
Self {
timestamp: Some(OffsetDateTime::now_utc()),
user_agents: extract_user_agents(request).into_iter().collect(),
total_received: 1,
}
}
/// Aggregate one [HealthAggregator] into another.
pub fn aggregate(&mut self, other: Self) {
let Self { timestamp, user_agents, total_received } = other;
if self.timestamp.is_none() {
self.timestamp = timestamp;
}
// we can't create a union because there is no `into_union` method
for user_agent in user_agents {
self.user_agents.insert(user_agent);
}
self.total_received = self.total_received.saturating_add(total_received);
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
// if we had no timestamp it means we never encountered any events and
// thus we don't need to send this event.
let timestamp = self.timestamp?;
Some(Track {
timestamp: Some(timestamp),
user: user.clone(),
event: event_name.to_string(),
properties: serde_json::to_value(self).ok()?,
..Default::default()
})
}
}
#[derive(Default, Serialize)]
pub struct DocumentsFetchAggregator {
#[serde(skip)]
@ -1589,235 +1680,3 @@ impl DocumentsFetchAggregator {
})
}
}
#[derive(Default)]
pub struct SimilarAggregator {
timestamp: Option<OffsetDateTime>,
// context
user_agents: HashSet<String>,
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// filter
filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool,
// every time a request has a filter, this field must be incremented by the number of terms it contains
filter_sum_of_criteria_terms: usize,
// every time a request has a filter, this field must be incremented by one
filter_total_number_of_criteria: usize,
used_syntax: HashMap<String, usize>,
// Whether a non-default embedder was specified
embedder: bool,
// pagination
max_limit: usize,
max_offset: usize,
// formatting
max_attributes_to_retrieve: usize,
// scoring
show_ranking_score: bool,
show_ranking_score_details: bool,
}
impl SimilarAggregator {
#[allow(clippy::field_reassign_with_default)]
pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
let SimilarQuery {
id: _,
embedder,
offset,
limit,
attributes_to_retrieve: _,
show_ranking_score,
show_ranking_score_details,
filter,
} = query;
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.total_received = 1;
ret.user_agents = extract_user_agents(request).into_iter().collect();
if let Some(ref filter) = filter {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
ret.filter_total_number_of_criteria = 1;
let syntax = match filter {
Value::String(_) => "string".to_string(),
Value::Array(values) => {
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
"mixed".to_string()
} else {
"array".to_string()
}
}
_ => "none".to_string(),
};
// convert the string to a HashMap
ret.used_syntax.insert(syntax, 1);
let stringified_filters = filter.to_string();
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
}
ret.max_limit = *limit;
ret.max_offset = *offset;
ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details;
ret.embedder = embedder.is_some();
ret
}
pub fn succeed(&mut self, result: &SimilarResult) {
let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
self.total_succeeded = self.total_succeeded.saturating_add(1);
self.time_spent.push(*processing_time_ms as usize);
}
/// Aggregate one [SimilarAggregator] into another.
pub fn aggregate(&mut self, mut other: Self) {
let Self {
timestamp,
user_agents,
total_received,
total_succeeded,
ref mut time_spent,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
filter_total_number_of_criteria,
used_syntax,
max_limit,
max_offset,
max_attributes_to_retrieve,
show_ranking_score,
show_ranking_score_details,
embedder,
} = other;
if self.timestamp.is_none() {
self.timestamp = timestamp;
}
// context
for user_agent in user_agents.into_iter() {
self.user_agents.insert(user_agent);
}
// request
self.total_received = self.total_received.saturating_add(total_received);
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
self.time_spent.append(time_spent);
// filter
self.filter_with_geo_radius |= filter_with_geo_radius;
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
self.filter_sum_of_criteria_terms =
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
self.filter_total_number_of_criteria =
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
for (key, value) in used_syntax.into_iter() {
let used_syntax = self.used_syntax.entry(key).or_insert(0);
*used_syntax = used_syntax.saturating_add(value);
}
self.embedder |= embedder;
// pagination
self.max_limit = self.max_limit.max(max_limit);
self.max_offset = self.max_offset.max(max_offset);
// formatting
self.max_attributes_to_retrieve =
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
// scoring
self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
let Self {
timestamp,
user_agents,
total_received,
total_succeeded,
time_spent,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
filter_total_number_of_criteria,
used_syntax,
max_limit,
max_offset,
max_attributes_to_retrieve,
show_ranking_score,
show_ranking_score_details,
embedder,
} = self;
if total_received == 0 {
None
} else {
// we get all the values in a sorted manner
let time_spent = time_spent.into_sorted_vec();
// the index of the 99th percentage of value
let percentile_99th = time_spent.len() * 99 / 100;
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th);
let properties = json!({
"user-agent": user_agents,
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": total_succeeded,
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received,
},
"filter": {
"with_geoRadius": filter_with_geo_radius,
"with_geoBoundingBox": filter_with_geo_bounding_box,
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
},
"hybrid": {
"embedder": embedder,
},
"pagination": {
"max_limit": max_limit,
"max_offset": max_offset,
},
"formatting": {
"max_attributes_to_retrieve": max_attributes_to_retrieve,
},
"scoring": {
"show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details,
},
});
Some(Track {
timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}

View File

@ -12,8 +12,6 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error("The `/logs/stream` route is currently in use by someone else.")]
AlreadyUsedLogRoute,
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
@ -29,10 +27,6 @@ pub enum MeilisearchHttpError {
InvalidExpression(&'static [&'static str], Value),
#[error("A {0} payload is missing.")]
MissingPayload(PayloadType),
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
TooManySearchRequests(usize),
#[error("Internal error: Search limiter is down.")]
SearchLimiterIsDown,
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_bytes(*.0 as u64).get_appropriate_unit(true))]
PayloadTooLarge(usize),
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
@ -57,15 +51,12 @@ pub enum MeilisearchHttpError {
DocumentFormat(#[from] DocumentFormatError),
#[error(transparent)]
Join(#[from] JoinError),
#[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
MissingSearchHybrid,
}
impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
@ -73,8 +64,6 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentFilter,
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
MeilisearchHttpError::TooManySearchRequests(_) => Code::TooManySearchRequests,
MeilisearchHttpError::SearchLimiterIsDown => Code::Internal,
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
MeilisearchHttpError::IndexUid(e) => e.error_code(),
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
@ -85,7 +74,6 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::FileStore(_) => Code::Internal,
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
MeilisearchHttpError::Join(_) => Code::Internal,
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
}
}
}

View File

@ -131,7 +131,6 @@ gen_seq! { SeqFromRequestFut3; A B C }
gen_seq! { SeqFromRequestFut4; A B C D }
gen_seq! { SeqFromRequestFut5; A B C D E }
gen_seq! { SeqFromRequestFut6; A B C D E F }
gen_seq! { SeqFromRequestFut7; A B C D E F G }
pin_project! {
#[project = ExtractProj]

View File

@ -9,14 +9,12 @@ pub mod middleware;
pub mod option;
pub mod routes;
pub mod search;
pub mod search_queue;
use std::fs::File;
use std::io::{BufReader, BufWriter};
use std::num::NonZeroUsize;
use std::path::Path;
use std::sync::Arc;
use std::thread::{self, available_parallelism};
use std::thread;
use std::time::Duration;
use actix_cors::Cors;
@ -31,6 +29,7 @@ use error::PayloadError;
use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::error;
use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@ -40,9 +39,6 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use option::ScheduleSnapshot;
use search_queue::SearchQueue;
use tracing::{error, info_span};
use tracing_subscriber::filter::Targets;
use crate::error::MeilisearchHttpError;
@ -90,35 +86,10 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
}
}
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
pub type LogRouteHandle =
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
pub type LogRouteType = tracing_subscriber::filter::Filtered<
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
Targets,
tracing_subscriber::Registry,
>;
pub type SubscriberForSecondLayer = tracing_subscriber::layer::Layered<
tracing_subscriber::reload::Layer<LogRouteType, tracing_subscriber::Registry>,
tracing_subscriber::Registry,
>;
pub type LogStderrHandle =
tracing_subscriber::reload::Handle<LogStderrType, SubscriberForSecondLayer>;
pub type LogStderrType = tracing_subscriber::filter::Filtered<
Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
Targets,
SubscriberForSecondLayer,
>;
pub fn create_app(
index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>,
opt: Opt,
logs: (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>,
enable_dashboard: bool,
) -> actix_web::App<
@ -137,7 +108,6 @@ pub fn create_app(
index_scheduler.clone(),
auth_controller.clone(),
&opt,
logs,
analytics.clone(),
)
})
@ -153,49 +123,11 @@ pub fn create_app(
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new())
.wrap(actix_web::middleware::Logger::default())
.wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
}
struct AwebTracingLogger;
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
use tracing::field::Empty;
let conn_info = request.connection_info();
let headers = request.headers();
let user_agent = headers
.get(http::header::USER_AGENT)
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
.unwrap_or_default();
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
}
fn on_request_end<B: MessageBody>(
span: tracing::Span,
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
) {
match &outcome {
Ok(response) => {
let code: i32 = response.response().status().as_u16().into();
span.record("status_code", code);
if let Some(error) = response.response().error() {
// use the status code already constructed for the outgoing HTTP response
span.record("error", &tracing::field::display(error.as_response_error()));
}
}
Err(error) => {
let code: i32 = error.error_response().status().as_u16().into();
span.record("status_code", code);
span.record("error", &tracing::field::display(error.as_response_error()));
}
};
}
}
enum OnFailure {
RemoveDb,
KeepDb,
@ -268,9 +200,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
.name(String::from("register-snapshot-tasks"))
.spawn(move || loop {
thread::sleep(snapshot_delay);
if let Err(e) =
index_scheduler.register(KindWithContent::SnapshotCreation, None, false)
{
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
error!("Error while registering snapshot: {}", e);
}
})
@ -299,15 +229,12 @@ fn open_or_create_database_unchecked(
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features,
@ -351,15 +278,15 @@ fn import_dump(
let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() {
tracing::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
%date,
"Importing a dump of meilisearch"
log::info!(
"Importing a dump of meilisearch `{:?}` from the {}",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
date
);
} else {
tracing::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
"Importing a dump of meilisearch",
log::info!(
"Importing a dump of meilisearch `{:?}`",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
);
}
@ -393,7 +320,7 @@ fn import_dump(
for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
tracing::info!("Importing index `{}`.", metadata.uid);
log::info!("Importing index `{}`.", metadata.uid);
let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
@ -407,15 +334,14 @@ fn import_dump(
}
// 4.2 Import the settings.
tracing::info!("Importing the settings.");
log::info!("Importing the settings.");
let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder);
builder
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
// 4.3 Import the documents.
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
log::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
@ -429,9 +355,6 @@ fn import_dump(
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
&index,
@ -440,18 +363,15 @@ fn import_dump(
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|indexing_step| log::trace!("update: {:?}", indexing_step),
|| false,
)?;
let builder = builder.with_embedders(embedders);
let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
log::info!("{} documents found.", user_result?);
builder.execute()?;
wtxn.commit()?;
tracing::info!("All documents successfully imported.");
log::info!("All documents successfully imported.");
}
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
@ -469,22 +389,13 @@ pub fn configure_data(
index_scheduler: Data<IndexScheduler>,
auth: Data<AuthController>,
opt: &Opt,
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>,
) {
let search_queue = SearchQueue::new(
opt.experimental_search_queue_size,
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
);
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
config
.app_data(index_scheduler)
.app_data(auth)
.app_data(web::Data::new(search_queue))
.app_data(web::Data::from(analytics))
.app_data(web::Data::new(logs_route))
.app_data(web::Data::new(logs_stderr))
.app_data(web::Data::new(opt.clone()))
.app_data(
web::JsonConfig::default()
.limit(http_payload_size_limit)
@ -544,3 +455,30 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
config.service(web::resource("/").route(web::get().to(routes::running)));
}
/// Parses the output of
/// [`VERGEN_GIT_SEMVER_LIGHTWEIGHT`](https://docs.rs/vergen/latest/vergen/struct.Git.html#instructions)
/// as a prototype name.
///
/// Returns `Some(prototype_name)` if the following conditions are met on this value:
///
/// 1. starts with `prototype-`,
/// 2. ends with `-<some_number>`,
/// 3. does not end with `<some_number>-<some_number>`.
///
/// Otherwise, returns `None`.
pub fn prototype_name() -> Option<&'static str> {
let prototype: &'static str = option_env!("VERGEN_GIT_SEMVER_LIGHTWEIGHT")?;
if !prototype.starts_with("prototype-") {
return None;
}
let mut rsplit_prototype = prototype.rsplit('-');
// last component MUST be a number
rsplit_prototype.next()?.parse::<u64>().ok()?;
// before than last component SHALL NOT be a number
rsplit_prototype.next()?.parse::<u64>().err()?;
Some(prototype)
}

Some files were not shown because too many files have changed in this diff Show More