Compare commits

..

2 Commits

Author SHA1 Message Date
0a34f70566 update the CI 2023-10-26 13:39:52 +02:00
75d8d4f3a8 remove the use unstable in rustfmt 2023-10-26 13:38:11 +02:00
347 changed files with 9487 additions and 17178 deletions

View File

@ -1,2 +0,0 @@
[alias]
xtask = "run --package xtask --"

View File

@ -27,23 +27,6 @@ Related spec: WIP
- [ ] If prototype validated, merge changes into `main`
- [ ] Update the spec
### Reminders when modifying the Setting API
<!--- Special steps to remind when adding a new index setting -->
- [ ] Ensure the new setting route is at least tested by the [`test_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/tests/settings/get_settings.rs#L276)
- [ ] Ensure Analytics are fully implemented
- [ ] `/settings/my-new-setting` configurated in the [`make_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L141-L165)
- [ ] global `/settings` route configurated in the [`update_all` function](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L655-L751)
- [ ] Ensure the dump serializing is consistent with the `/settings` route serializing, e.g., enums case can be different (`camelCase` in route and `PascalCase` in the dump)
#### Special cases when adding a setting for an experimental feature
- [ ] ⚠️ API stability: The setting does not appear on the main settings route when the feature has never been enabled (e.g. mark it `Unset` when returned from the index in this situation. See [an example](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch-types/src/settings.rs#L608))
- [ ] The setting cannot be set when the feature is disabled, either by the main settings route or the subroute (see [`validate_settings` function](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch/src/routes/indexes/settings.rs#L811))
- [ ] If possible, the setting is reset when the feature is disabled (hard if it requires reindexing)
## Impacted teams
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
<!---@meilisearch/docs-team when there is any API change, e.g. settings addition-->

View File

@ -90,8 +90,7 @@ jobs:
set -x
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
echo 'Here are your benchmarks diff 👊' >> body.txt
echo '```' >> body.txt
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
echo '```' >> body.txt

View File

@ -50,7 +50,7 @@ jobs:
needs: check-version
steps:
- name: Create PR to Homebrew
uses: mislav/bump-homebrew-formula-action@v3
uses: mislav/bump-homebrew-formula-action@v2
with:
formula-name: meilisearch
formula-path: Formula/m/meilisearch.rb

View File

@ -63,7 +63,7 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

View File

@ -22,7 +22,7 @@ jobs:
outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Define the Docker image we need to use
id: define-image
run: |
@ -46,11 +46,11 @@ jobs:
MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core
uses: actions/setup-dotnet@v4
uses: actions/setup-dotnet@v3
with:
dotnet-version: "6.0.x"
- name: Install dependencies
@ -75,12 +75,12 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dart
- uses: dart-lang/setup-dart@v1
with:
sdk: 'latest'
sdk: 3.1.1
- name: Install dependencies
run: dart pub get
- name: Run integration tests
@ -100,10 +100,10 @@ jobs:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version: stable
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-go
- name: Get dependencies
@ -129,11 +129,11 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-java
- name: Set up Java
uses: actions/setup-java@v4
uses: actions/setup-java@v3
with:
java-version: 8
distribution: 'zulu'
@ -156,11 +156,11 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v3
with:
cache: 'yarn'
- name: Install dependencies
@ -191,7 +191,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-php
- name: Install PHP
@ -220,11 +220,11 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-python
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v4
- name: Install pipenv
uses: dschep/install-pipenv-action@v1
- name: Install dependencies
@ -245,7 +245,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-ruby
- name: Set up Ruby 3
@ -270,7 +270,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rust
- name: Build
@ -291,7 +291,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-swift
- name: Run tests
@ -314,11 +314,11 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js-plugins
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v3
with:
cache: yarn
- name: Install dependencies
@ -345,7 +345,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rails
- name: Set up Ruby 3
@ -369,7 +369,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-symfony
- name: Install PHP

View File

@ -43,7 +43,7 @@ jobs:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -65,11 +65,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -82,7 +78,7 @@ jobs:
args: --locked --release --all
test-all-features:
name: Tests almost all features
name: Tests all features
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
@ -98,12 +94,16 @@ jobs:
with:
toolchain: stable
override: true
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
- name: Run cargo test with almost all features
run: |
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
- name: Run cargo build with all features
uses: actions-rs/cargo@v1
with:
command: build
args: --workspace --locked --release --all-features
- name: Run cargo test with all features
uses: actions-rs/cargo@v1
with:
command: test
args: --workspace --locked --release --all-features
test-disabled-tokenization:
name: Test disabled tokenization
@ -149,7 +149,7 @@ jobs:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@ -164,11 +164,11 @@ jobs:
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: 1.75.0
toolchain: 1.71.1
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@ -183,11 +183,11 @@ jobs:
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: nightly
toolchain: 1.71.1
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

View File

@ -1,5 +1,3 @@
unstable_features = true
use_small_heuristics = "max"
imports_granularity = "Module"
group_imports = "StdExternalCrate"

View File

@ -75,12 +75,6 @@ If you get a "Too many open files" error you might want to increase the open fil
ulimit -Sn 3000
```
#### Build tools
Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) workflow to provide some build tools.
Run `cargo xtask --help` from the root of the repository to find out what is available.
## Git Guidelines
### Git Branches

3119
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,6 @@
resolver = "2"
members = [
"meilisearch",
"meilitool",
"meilisearch-types",
"meilisearch-auth",
"meili-snap",
@ -16,16 +15,11 @@ members = [
"json-depth-checker",
"benchmarks",
"fuzzers",
"tracing-trace",
"xtask",
]
[workspace.package]
version = "1.6.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
]
version = "1.4.1"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"
readme = "README.md"

View File

@ -1,9 +1,9 @@
# Compile
FROM rust:1.75.0-alpine3.18 AS compiler
FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR /
WORKDIR /meilisearch
ARG COMMIT_SHA
ARG COMMIT_DATE
@ -17,7 +17,7 @@ RUN set -eux; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release -p meilisearch -p meilitool
cargo build --release
# Run
FROM alpine:3.16
@ -28,10 +28,9 @@ ENV MEILI_SERVER_PROVIDER docker
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
# and it's easy to find.
COPY --from=compiler /target/release/meilisearch /bin/meilisearch
COPY --from=compiler /target/release/meilitool /bin/meilitool
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
# To stay compatible with the older version of the container (pre v0.27.0) we're
# going to symlink the meilisearch binary in the path to `/meilisearch`
RUN ln -s /bin/meilisearch /meilisearch

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019-2024 Meili SAS
Copyright (c) 2019-2022 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -25,6 +25,12 @@
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
---
### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
---
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
<p align="center" name="demo">
@ -42,7 +48,7 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
- **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data

View File

@ -106,7 +106,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_index_count{job=\"$job\", instance=\"$instance\"}",
"expr": "meilisearch_index_count{job=\"meilisearch\", instance=\"$instance\"}",
"interval": "",
"legendFormat": "",
"range": true,
@ -165,7 +165,7 @@
"type": "prometheus"
},
"editorMode": "builder",
"expr": "meilisearch_index_docs_count{job=\"$job\", index=\"$Index\", instance=\"$instance\"}",
"expr": "meilisearch_index_docs_count{job=\"meilisearch\", index=\"$Index\", instance=\"$instance\"}",
"hide": false,
"range": true,
"refId": "A"
@ -228,7 +228,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[1h]))",
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))",
"interval": "",
"legendFormat": "",
"range": true,
@ -288,7 +288,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[24h]))",
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))",
"interval": "",
"legendFormat": "",
"range": true,
@ -348,7 +348,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[30d]))",
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))",
"interval": "",
"legendFormat": "",
"range": true,
@ -447,7 +447,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_db_size_bytes{job=\"$job\", instance=\"$instance\"}",
"expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
"interval": "",
"legendFormat": "Database size on disk",
"range": true,
@ -458,7 +458,7 @@
"type": "prometheus"
},
"editorMode": "builder",
"expr": "meilisearch_used_db_size_bytes{job=\"$job\", instance=\"$instance\"}",
"expr": "meilisearch_used_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
"hide": false,
"legendFormat": "Used bytes",
"range": true,
@ -553,7 +553,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"$job\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])",
"expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])",
"interval": "",
"legendFormat": "{{method}} {{path}}",
"range": true,
@ -646,7 +646,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"$job\"}[5m])",
"expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])",
"interval": "",
"legendFormat": "{{method}} {{path}}",
"range": true,
@ -744,7 +744,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"$job\"}[30s]))",
"expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))",
"format": "heatmap",
"interval": "",
"legendFormat": "{{le}}",
@ -854,7 +854,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"statuses\"}",
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"statuses\"}",
"interval": "",
"legendFormat": "{{value}} ",
"range": true,
@ -947,7 +947,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"types\"}",
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"types\"}",
"interval": "",
"legendFormat": "{{value}} ",
"range": true,
@ -1040,7 +1040,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"indexes\"}",
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"indexes\"}",
"interval": "",
"legendFormat": "{{value}} ",
"range": true,
@ -1161,7 +1161,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "rate(process_cpu_seconds_total{job=\"$job\", instance=\"$instance\"}[1m])",
"expr": "rate(process_cpu_seconds_total{job=\"meilisearch\", instance=\"$instance\"}[1m])",
"interval": "",
"legendFormat": "process",
"range": true,
@ -1264,7 +1264,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "process_resident_memory_bytes{job=\"$job\", instance=\"$instance\"} / 1024 / 1024",
"expr": "process_resident_memory_bytes{job=\"meilisearch\", instance=\"$instance\"} / 1024 / 1024",
"interval": "",
"legendFormat": "process",
"range": true,
@ -1342,33 +1342,6 @@
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": true,
"text": "meilisearch",
"value": "meilisearch"
},
"datasource": {
"type": "prometheus"
},
"definition": "label_values(job)",
"description": "Prometheus job_name from scrape config (default is meilisearch)",
"hide": 0,
"includeAll": false,
"label": "Job",
"multi": false,
"name": "job",
"options": [],
"query": {
"query": "label_values(job)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},

View File

@ -11,24 +11,24 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.79"
csv = "1.3.0"
anyhow = "1.0.70"
csv = "1.2.1"
milli = { path = "../milli" }
mimalloc = { version = "0.1.39", default-features = false }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
mimalloc = { version = "0.1.37", default-features = false }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5"
rand_chacha = "0.3.1"
roaring = "0.10.2"
roaring = "0.10.1"
[build-dependencies]
anyhow = "1.0.79"
bytes = "1.5.0"
anyhow = "1.0.70"
bytes = "1.4.0"
convert_case = "0.6.0"
flate2 = "1.0.28"
reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false }
flate2 = "1.0.25"
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]

View File

@ -6,7 +6,9 @@ use std::path::Path;
use criterion::{criterion_group, criterion_main, Criterion};
use milli::heed::{EnvOpenOptions, RwTxn};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::update::{
DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
};
use milli::Index;
use rand::seq::SliceRandom;
use rand_chacha::rand_core::SeedableRng;
@ -36,7 +38,7 @@ fn setup_index() -> Index {
}
fn setup_settings<'t>(
wtxn: &mut RwTxn<'t>,
wtxn: &mut RwTxn<'t, '_>,
index: &'t Index,
primary_key: &str,
searchable_fields: &[&str],
@ -264,7 +266,17 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete)
},
move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete)
let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});
@ -601,7 +613,17 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete)
},
move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete)
let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});
@ -853,31 +875,22 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete)
},
move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete)
let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});
}
fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBitmap>) {
let mut wtxn = index.write_txn().unwrap();
let indexer_config = IndexerConfig::default();
for ids in document_ids_to_delete {
let config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &indexer_config, config, |_| (), || false)
.unwrap();
(builder, _) = builder.remove_documents_from_db_no_batch(&ids).unwrap();
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
}
fn indexing_movies_in_three_batches(c: &mut Criterion) {
let mut group = c.benchmark_group("indexing");
group.sample_size(BENCHMARK_ITERATION);
@ -1099,7 +1112,17 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete)
},
move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete)
let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});
@ -1315,7 +1338,17 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
(index, document_ids_to_delete)
},
move |(index, document_ids_to_delete)| {
delete_documents_from_ids(index, document_ids_to_delete)
let mut wtxn = index.write_txn().unwrap();
for ids in document_ids_to_delete {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_documents(&ids);
builder.execute().unwrap();
}
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});

View File

@ -129,6 +129,3 @@ experimental_enable_metrics = false
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
experimental_reduce_indexing_memory_usage = false
# Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
# experimental_max_number_of_batched_tasks = 100

View File

@ -11,22 +11,22 @@ readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.79"
flate2 = "1.0.28"
http = "0.2.11"
anyhow = "1.0.70"
flate2 = "1.0.25"
http = "0.2.9"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0"
regex = "1.10.2"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
once_cell = "1.17.1"
regex = "1.7.3"
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
tar = "0.4.38"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"

View File

@ -267,7 +267,6 @@ pub(crate) mod test {
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::Set(FacetingSettings {
max_values_per_facet: Setting::Set(111),
@ -276,7 +275,6 @@ pub(crate) mod test {
),
}),
pagination: Setting::NotSet,
embedders: Setting::NotSet,
_kind: std::marker::PhantomData,
};
settings.check()

View File

@ -120,7 +120,7 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
criterion.as_ref().map(ToString::to_string)
}
Err(()) => {
tracing::warn!(
log::warn!(
"Could not import the following ranking rule: `{}`.",
ranking_rule
);
@ -152,11 +152,11 @@ impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
use v2::updates::UpdateStatus as UpdateStatusV2;
Some(match source {
UpdateStatusV1::Enqueued { content } => {
tracing::warn!(
log::warn!(
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
content.update_id
);
tracing::warn!("Task will be skipped in the queue of imported tasks.");
log::warn!("Task will be skipped in the queue of imported tasks.");
return None;
}
@ -229,7 +229,7 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
Some(match source {
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
v1::update::UpdateType::Customs => {
tracing::warn!("Ignoring task with type 'Customs' that is no longer supported");
log::warn!("Ignoring task with type 'Customs' that is no longer supported");
return None;
}
v1::update::UpdateType::DocumentsAddition { .. } => {
@ -296,7 +296,7 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
v1::settings::RankingRule::WordsPosition => {
tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
None
}
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),

View File

@ -146,8 +146,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
started_processing_at: processing.started_processing_at,
}),
Err(e) => {
tracing::warn!("Error with task {}: {}", processing.from.update_id, e);
tracing::warn!("Task will be marked as `Failed`.");
log::warn!("Error with task {}: {}", processing.from.update_id, e);
log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
@ -172,8 +172,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
enqueued_at: enqueued.enqueued_at,
}),
Err(e) => {
tracing::warn!("Error with task {}: {}", enqueued.update_id, e);
tracing::warn!("Task will be marked as `Failed`.");
log::warn!("Error with task {}: {}", enqueued.update_id, e);
log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
@ -353,7 +353,7 @@ impl From<String> for v3::Code {
"malformed_payload" => v3::Code::MalformedPayload,
"missing_payload" => v3::Code::MissingPayload,
other => {
tracing::warn!("Unknown error code {}", other);
log::warn!("Unknown error code {}", other);
v3::Code::UnretrievableErrorCode
}
}

View File

@ -76,20 +76,20 @@ impl CompatV3ToV4 {
let index_uid = match index_uid {
Some(uid) => uid,
None => {
tracing::warn!(
log::warn!(
"Error while importing the update {}.",
task.update.id()
);
tracing::warn!(
log::warn!(
"The index associated to the uuid `{}` could not be retrieved.",
task.uuid.to_string()
);
if task.update.is_finished() {
// we're fucking with his history but not his data, that's ok-ish.
tracing::warn!("The index-uuid will be set as `unknown`.");
log::warn!("The index-uuid will be set as `unknown`.");
String::from("unknown")
} else {
tracing::warn!("The task will be ignored.");
log::warn!("The task will be ignored.");
return None;
}
}

View File

@ -305,7 +305,7 @@ impl From<v4::ResponseError> for v5::ResponseError {
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
other => {
tracing::warn!("Unknown error code {}", other);
log::warn!("Unknown error code {}", other);
v5::Code::UnretrievableErrorCode
}
};

View File

@ -304,7 +304,7 @@ impl From<v5::ResponseError> for v6::ResponseError {
"immutable_field" => v6::Code::BadRequest,
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
other => {
tracing::warn!("Unknown error code {}", other);
log::warn!("Unknown error code {}", other);
v6::Code::UnretrievableErrorCode
}
};
@ -329,7 +329,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
new_ranking_rules.push(new_rule);
}
Err(_) => {
tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
}
}
}
@ -345,7 +345,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
dictionary: v6::Setting::NotSet,
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
proximity_precision: v6::Setting::NotSet,
typo_tolerance: match settings.typo_tolerance {
v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
enabled: typo.enabled.into(),
@ -378,7 +377,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
},
embedders: v6::Setting::NotSet,
_kind: std::marker::PhantomData,
}
}

View File

@ -13,12 +13,12 @@ use crate::{Result, Version};
mod compat;
mod v1;
mod v2;
mod v3;
mod v4;
mod v5;
mod v6;
pub(self) mod v1;
pub(self) mod v2;
pub(self) mod v3;
pub(self) mod v4;
pub(self) mod v5;
pub(self) mod v6;
pub type Document = serde_json::Map<String, serde_json::Value>;
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;

View File

@ -0,0 +1,24 @@
---
source: dump/src/reader/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@ -0,0 +1,38 @@
---
source: dump/src/reader/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@ -0,0 +1,31 @@
---
source: dump/src/reader/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"genres",
"id"
],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@ -56,7 +56,8 @@ pub enum RankingRule {
Desc(String),
}
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(asc|desc)\(([\w_-]+)\)").unwrap());
static ASC_DESC_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
impl FromStr for RankingRule {
type Err = ();

View File

@ -1,6 +1,5 @@
use serde::{Deserialize, Serialize};
#[allow(clippy::enum_variant_names)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code {
// index related error

View File

@ -95,7 +95,6 @@ impl fmt::Display for ErrorType {
}
}
#[allow(clippy::enum_variant_names)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code {
// index related error

View File

@ -31,7 +31,6 @@ impl ResponseError {
}
}
#[allow(clippy::enum_variant_names)]
#[derive(Deserialize, Debug, Clone, Copy)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum Code {

View File

@ -2,10 +2,10 @@ use std::fs::{self, File};
use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path;
use log::debug;
pub use meilisearch_types::milli;
use tempfile::TempDir;
use time::OffsetDateTime;
use tracing::debug;
use uuid::Uuid;
use super::Document;

View File

@ -11,9 +11,9 @@ edition.workspace = true
license.workspace = true
[dependencies]
tempfile = "3.9.0"
thiserror = "1.0.56"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
tempfile = "3.5.0"
thiserror = "1.0.40"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
faux = "0.1.10"
faux = "0.1.9"

View File

@ -13,8 +13,8 @@ license.workspace = true
[dependencies]
nom = "7.1.3"
nom_locate = "4.2.0"
unescaper = "0.1.3"
nom_locate = "4.1.0"
unescaper = "0.1.2"
[dev-dependencies]
insta = "1.34.0"
insta = "1.29.0"

View File

@ -564,10 +564,10 @@ pub mod tests {
#[test]
fn parse_escaped() {
insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequencies
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
}

View File

@ -270,8 +270,8 @@ pub mod test {
("aaaa", "", rtok("", "aaaa"), "aaaa"),
(r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
(r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
(r"aa\\\aa", r#""#, rtok("", r"aa\\\aa"), r"aa\\\aa"),
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r"aa\\"), r"aa\\"),
(r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
(r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
(r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
];
@ -301,12 +301,12 @@ pub mod test {
);
// simple quote
assert_eq!(
unescape(Span::new_extra(r"Hello \'World\'", ""), '\''),
unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
r#"Hello 'World'"#.to_string()
);
assert_eq!(
unescape(Span::new_extra(r"Hello \\\'World\\\'", ""), '\''),
r"Hello \\'World\\'".to_string()
unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
r#"Hello \\'World\\'"#.to_string()
);
}
@ -335,19 +335,19 @@ pub mod test {
("\"cha'nnel\"", "cha'nnel", false),
("I'm tamo", "I", false),
// escaped thing but not quote
(r#""\\""#, r"\", true),
(r#""\\\\\\""#, r"\\\", true),
(r#""aa\\aa""#, r"aa\aa", true),
(r#""\\""#, r#"\"#, true),
(r#""\\\\\\""#, r#"\\\"#, true),
(r#""aa\\aa""#, r#"aa\aa"#, true),
// with double quote
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
(r#""\"\"""#, r#""""#, true),
// with simple quote
(r"'Hello \'world\''", r#"Hello 'world'"#, true),
(r"'Hello \\\'world\\\''", r"Hello \'world\'", true),
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
(r"'\'\''", r#"''"#, true),
(r#"'\'\''"#, r#"''"#, true),
];
for (input, expected, escaped) in test_case {

View File

@ -11,10 +11,10 @@ edition.workspace = true
license.workspace = true
[dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] }
clap = { version = "4.4.17", features = ["derive"] }
fastrand = "2.0.1"
arbitrary = { version = "1.3.0", features = ["derive"] }
clap = { version = "4.3.0", features = ["derive"] }
fastrand = "2.0.0"
milli = { path = "../milli" }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
tempfile = "3.9.0"
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
tempfile = "3.5.0"

View File

@ -113,7 +113,7 @@ fn main() {
index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed);
}
wtxn.abort();
wtxn.abort().unwrap();
});
if let err @ Err(_) = handle.join() {
stop.store(true, Ordering::Relaxed);

View File

@ -11,36 +11,30 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.79"
anyhow = "1.0.70"
bincode = "1.3.3"
csv = "1.3.0"
csv = "1.2.1"
derive_builder = "0.12.0"
dump = { path = "../dump" }
enum-iterator = "1.5.0"
enum-iterator = "1.4.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
puffin = { version = "0.16.0", features = ["serialization"] }
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
puffin = "0.16.0"
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tracing = "0.1.40"
ureq = "2.9.1"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"
crossbeam = "0.8.4"
insta = { version = "1.34.0", features = ["json", "redactions"] }
crossbeam = "0.8.2"
insta = { version = "1.29.0", features = ["json", "redactions"] }
meili-snap = { path = "../meili-snap" }
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}

View File

@ -24,14 +24,16 @@ use std::fs::{self, File};
use std::io::BufWriter;
use dump::IndexMetadata;
use log::{debug, error, info};
use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
};
use meilisearch_types::milli::{self, Filter};
use meilisearch_types::milli::{self, Filter, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@ -42,7 +44,7 @@ use uuid::Uuid;
use crate::autobatcher::{self, BatchKind};
use crate::utils::{self, swap_index_uid_in_task};
use crate::{Error, IndexScheduler, MustStopProcessing, ProcessingTasks, Result, TaskId};
use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
/// Represents a combination of tasks that can all be processed at the same time.
///
@ -59,7 +61,7 @@ pub(crate) enum Batch {
/// The list of tasks that were processing when this task cancelation appeared.
previous_processing_tasks: RoaringBitmap,
},
TaskDeletions(Vec<Task>),
TaskDeletion(Task),
SnapshotCreation(Vec<Task>),
Dump(Task),
IndexOperation {
@ -103,6 +105,12 @@ pub(crate) enum IndexOperation {
operations: Vec<DocumentOperation>,
tasks: Vec<Task>,
},
DocumentDeletion {
index_uid: String,
// The vec associated with each document deletion tasks.
documents: Vec<Vec<String>>,
tasks: Vec<Task>,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
@ -145,14 +153,16 @@ impl Batch {
pub fn ids(&self) -> Vec<TaskId> {
match self {
Batch::TaskCancelation { task, .. }
| Batch::TaskDeletion(task)
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::IndexUpdate { task, .. } => vec![task.uid],
Batch::SnapshotCreation(tasks)
| Batch::TaskDeletions(tasks)
| Batch::IndexDeletion { tasks, .. } => tasks.iter().map(|task| task.uid).collect(),
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. }
| IndexOperation::DocumentDeletion { tasks, .. }
| IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
@ -178,7 +188,7 @@ impl Batch {
use Batch::*;
match self {
TaskCancelation { .. }
| TaskDeletions(_)
| TaskDeletion(_)
| SnapshotCreation(_)
| Dump(_)
| IndexSwap { .. } => None,
@ -197,7 +207,7 @@ impl fmt::Display for Batch {
let tasks = self.ids();
match self {
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?,
Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
Batch::Dump(_) => f.write_str("Dump")?,
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
@ -217,6 +227,7 @@ impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
@ -232,6 +243,9 @@ impl fmt::Display for IndexOperation {
IndexOperation::DocumentOperation { .. } => {
f.write_str("IndexOperation::DocumentOperation")
}
IndexOperation::DocumentDeletion { .. } => {
f.write_str("IndexOperation::DocumentDeletion")
}
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
}
@ -334,27 +348,18 @@ impl IndexScheduler {
BatchKind::DocumentDeletion { deletion_ids } => {
let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;
let mut operations = Vec::with_capacity(tasks.len());
let mut documents_counts = Vec::with_capacity(tasks.len());
let mut documents = Vec::new();
for task in &tasks {
match task.kind {
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
operations.push(DocumentOperation::Delete(documents_ids.clone()));
documents_counts.push(documents_ids.len() as u64);
documents.push(documents_ids.clone())
}
_ => unreachable!(),
}
}
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentOperation {
index_uid,
primary_key: None,
method: IndexDocumentsMethod::ReplaceDocuments,
documents_counts,
operations,
tasks,
},
op: IndexOperation::DocumentDeletion { index_uid, documents, tasks },
must_create_index,
}))
}
@ -513,7 +518,6 @@ impl IndexScheduler {
/// 3. We get the *next* snapshot to process.
/// 4. We get the *next* dump to process.
/// 5. We get the *next* tasks to process for a specific index.
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> {
#[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
@ -538,9 +542,9 @@ impl IndexScheduler {
// 2. we get the next task to delete
let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
if !to_delete.is_empty() {
let tasks = self.get_existing_tasks(rtxn, to_delete)?;
return Ok(Some(Batch::TaskDeletions(tasks)));
if let Some(task_id) = to_delete.min() {
let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
return Ok(Some(Batch::TaskDeletion(task)));
}
// 3. we batch the snapshot.
@ -583,9 +587,7 @@ impl IndexScheduler {
let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;
// If autobatching is disabled we only take one task at a time.
// Otherwise, we take only a maximum of tasks to create batches.
let tasks_limit =
if self.autobatching_enabled { self.max_number_of_batched_tasks } else { 1 };
let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
let enqueued = index_tasks
.into_iter()
@ -619,7 +621,6 @@ impl IndexScheduler {
/// The list of tasks that were processed. The metadata of each task in the returned
/// list is updated accordingly, with the exception of the its date fields
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> {
#[cfg(test)]
{
@ -669,10 +670,9 @@ impl IndexScheduler {
Ok(()) => {
for content_uuid in canceled_tasks_content_uuids {
if let Err(error) = self.delete_update_file(content_uuid) {
tracing::error!(
file_content_uuid = %content_uuid,
%error,
"Failed deleting content file"
error!(
"We failed deleting the content file indentified as {}: {}",
content_uuid, error
)
}
}
@ -682,43 +682,31 @@ impl IndexScheduler {
Ok(vec![task])
}
Batch::TaskDeletions(mut tasks) => {
Batch::TaskDeletion(mut task) => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
let mut matched_tasks = RoaringBitmap::new();
for task in tasks.iter() {
let matched_tasks =
if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
matched_tasks |= tasks;
tasks
} else {
unreachable!()
}
}
let mut wtxn = self.env.write_txn()?;
let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
wtxn.commit()?;
for task in tasks.iter_mut() {
task.status = Status::Succeeded;
let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
unreachable!()
};
let deleted_tasks_count = deleted_tasks.intersection_len(tasks);
deleted_tasks -= tasks;
let mut wtxn = self.env.write_txn()?;
let deleted_tasks_count = self.delete_matched_tasks(&mut wtxn, matched_tasks)?;
match &mut task.details {
Some(Details::TaskDeletion {
matched_tasks: _,
deleted_tasks,
original_filter: _,
}) => {
*deleted_tasks = Some(deleted_tasks_count);
}
_ => unreachable!(),
task.status = Status::Succeeded;
match &mut task.details {
Some(Details::TaskDeletion {
matched_tasks: _,
deleted_tasks,
original_filter: _,
}) => {
*deleted_tasks = Some(deleted_tasks_count);
}
_ => unreachable!(),
}
Ok(tasks)
wtxn.commit()?;
Ok(vec![task])
}
Batch::SnapshotCreation(mut tasks) => {
fs::create_dir_all(&self.snapshots_path)?;
@ -730,7 +718,7 @@ impl IndexScheduler {
// 2. Snapshot the index-scheduler LMDB env
//
// When we call copy_to_file, LMDB opens a read transaction by itself,
// When we call copy_to_path, LMDB opens a read transaction by itself,
// we can't provide our own. It is an issue as we would like to know
// the update files to copy but new ones can be enqueued between the copy
// of the env and the new transaction we open to retrieve the enqueued tasks.
@ -743,7 +731,7 @@ impl IndexScheduler {
// 2.1 First copy the LMDB env of the index-scheduler
let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?;
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?;
@ -768,7 +756,7 @@ impl IndexScheduler {
let index = self.index_mapper.index(&rtxn, name)?;
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?;
index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
}
drop(rtxn);
@ -781,7 +769,7 @@ impl IndexScheduler {
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.auth_path)?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot
// 5.1 Find the original name of the database
@ -837,10 +825,6 @@ impl IndexScheduler {
// 2. dump the tasks
let mut dump_tasks = dump.create_tasks_queue()?;
for ret in self.all_tasks.iter(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_, mut t) = ret?;
let status = t.status;
let content_file = t.content_uuid();
@ -861,9 +845,6 @@ impl IndexScheduler {
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file) = content_file {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
if status == Status::Enqueued {
let content_file = self.file_store.get_update(content_file)?;
@ -903,9 +884,6 @@ impl IndexScheduler {
// 3.1. Dump the documents
for ret in index.all_documents(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
index_dumper.push_document(&document)?;
@ -925,9 +903,6 @@ impl IndexScheduler {
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
)).unwrap();
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
let file = File::create(path)?;
dump.persist_to(BufWriter::new(file))?;
@ -948,10 +923,6 @@ impl IndexScheduler {
self.index_mapper.index(&rtxn, &index_uid)?
};
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
self.index_mapper
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
index_wtxn.commit()?;
@ -971,10 +942,7 @@ impl IndexScheduler {
match res {
Ok(_) => (),
Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
Err(e) => error!("Could not write the stats of the index {}", e),
}
Ok(tasks)
@ -1002,7 +970,7 @@ impl IndexScheduler {
builder.set_primary_key(primary_key);
let must_stop_processing = self.must_stop_processing.clone();
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
index_wtxn.commit()?;
@ -1029,10 +997,7 @@ impl IndexScheduler {
match res {
Ok(_) => (),
Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
Err(e) => error!("Could not write the stats of the index {}", e),
}
Ok(vec![task])
@ -1127,7 +1092,7 @@ impl IndexScheduler {
for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
swap_index_uid_in_task(&mut task, (lhs, rhs));
self.all_tasks.put(wtxn, &task_id, &task)?;
self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?;
}
// 4. remove the task from indexuid = before_name
@ -1151,14 +1116,9 @@ impl IndexScheduler {
///
/// ## Return
/// The list of processed tasks.
#[tracing::instrument(
level = "trace",
skip(self, index_wtxn, index),
target = "indexing::scheduler"
)]
fn apply_index_operation<'i>(
&self,
index_wtxn: &mut RwTxn<'i>,
index_wtxn: &mut RwTxn<'i, '_>,
index: &'i Index,
operation: IndexOperation,
) -> Result<Vec<Task>> {
@ -1216,7 +1176,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.set_primary_key(primary_key);
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(),
)?;
primary_key_has_been_set = true;
@ -1226,16 +1186,12 @@ impl IndexScheduler {
let config = IndexDocumentsConfig { update_method: method, ..Default::default() };
let embedder_configs = index.embedding_configs(index_wtxn)?;
// TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
let embedders = self.embedders(embedder_configs)?;
let mut builder = milli::update::IndexDocuments::new(
index_wtxn,
index,
indexer_config,
config,
|indexing_step| tracing::trace!(?indexing_step, "Update"),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
@ -1248,8 +1204,6 @@ impl IndexScheduler {
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
builder = builder.with_embedders(embedders.clone());
let received_documents =
if let Some(Details::DocumentAdditionOrUpdate {
received_documents,
@ -1284,8 +1238,7 @@ impl IndexScheduler {
let (new_builder, user_result) =
builder.remove_documents(document_ids)?;
builder = new_builder;
// Uses Invariant: remove documents actually always returns Ok for the inner result
let count = user_result.unwrap();
let provided_ids =
if let Some(Details::DocumentDeletion { provided_ids, .. }) =
task.details
@ -1296,18 +1249,30 @@ impl IndexScheduler {
unreachable!();
};
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(count),
});
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(count),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
}
}
if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?;
tracing::info!(indexing_result = ?addition, "document indexing done");
info!("document addition done: {:?}", addition);
} else if primary_key_has_been_set {
// Everything failed but we've set a primary key.
// We need to remove it.
@ -1315,13 +1280,31 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.reset_primary_key();
builder.execute(
|indexing_step| tracing::trace!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(),
)?;
}
Ok(tasks)
}
IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?;
documents.iter().flatten().for_each(|id| {
builder.delete_external_id(id);
});
let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?;
for (task, documents) in tasks.iter_mut().zip(documents) {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids: documents.len(),
deleted_documents: Some(deleted_documents.min(documents.len() as u64)),
});
}
Ok(tasks)
}
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let filter =
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
@ -1331,13 +1314,7 @@ impl IndexScheduler {
} else {
unreachable!()
};
let deleted_documents = delete_document_by_filter(
index_wtxn,
filter,
self.index_mapper.indexer_config(),
self.must_stop_processing.clone(),
index,
);
let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
@ -1385,7 +1362,7 @@ impl IndexScheduler {
let must_stop_processing = self.must_stop_processing.clone();
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
@ -1459,11 +1436,7 @@ impl IndexScheduler {
/// Delete each given task from all the databases (if it is deleteable).
///
/// Return the number of tasks that were actually deleted.
fn delete_matched_tasks(
&self,
wtxn: &mut RwTxn,
matched_tasks: &RoaringBitmap,
) -> Result<RoaringBitmap> {
fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
// 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
@ -1515,9 +1488,10 @@ impl IndexScheduler {
}
for task in to_delete_tasks.iter() {
self.all_tasks.delete(wtxn, &task)?;
self.all_tasks.delete(wtxn, &BEU32::new(task))?;
}
for canceled_by in affected_canceled_by {
let canceled_by = BEU32::new(canceled_by);
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
tasks -= &to_delete_tasks;
if tasks.is_empty() {
@ -1528,7 +1502,7 @@ impl IndexScheduler {
}
}
Ok(to_delete_tasks)
Ok(to_delete_tasks.len())
}
/// Cancel each given task from all the databases (if it is cancelable).
@ -1565,17 +1539,15 @@ impl IndexScheduler {
task.details = task.details.map(|d| d.to_failed());
self.update_task(wtxn, &task)?;
}
self.canceled_by.put(wtxn, &cancel_task_id, &tasks_to_cancel)?;
self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?;
Ok(content_files_to_delete)
}
}
fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a>,
wtxn: &mut RwTxn<'a, '_>,
filter: &serde_json::Value,
indexer_config: &IndexerConfig,
must_stop_processing: MustStopProcessing,
index: &'a Index,
) -> Result<u64> {
let filter = Filter::from_json(filter)?;
@ -1586,26 +1558,9 @@ fn delete_document_by_filter<'a>(
}
e => e.into(),
})?;
let config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let mut builder = milli::update::IndexDocuments::new(
wtxn,
index,
indexer_config,
config,
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)?;
let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?;
builder = new_builder;
let _ = builder.execute()?;
count
let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
delete_operation.delete_documents(&candidates);
delete_operation.execute().map(|result| result.deleted_documents)?
} else {
0
})

View File

@ -108,8 +108,6 @@ pub enum Error {
TaskDeletionWithEmptyQuery,
#[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
TaskCancelationWithEmptyQuery,
#[error("Aborted task")]
AbortedTask,
#[error(transparent)]
Dump(#[from] dump::Error),
@ -177,7 +175,6 @@ impl Error {
| Error::TaskNotFound(_)
| Error::TaskDeletionWithEmptyQuery
| Error::TaskCancelationWithEmptyQuery
| Error::AbortedTask
| Error::Dump(_)
| Error::Heed(_)
| Error::Milli(_)
@ -239,9 +236,6 @@ impl ErrorCode for Error {
Error::TaskDatabaseUpdate(_) => Code::Internal,
Error::CreateBatch(_) => Code::Internal,
// This one should never be seen by the end user
Error::AbortedTask => Code::Internal,
#[cfg(test)]
Error::PlannedFailure => Code::Internal,
}

View File

@ -56,25 +56,12 @@ impl RoFeatures {
}
}
pub fn check_logs_route(&self) -> Result<()> {
if self.runtime.logs_route {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "getting logs through the `/logs/stream` route",
feature: "logs route",
issue_link: "https://github.com/orgs/meilisearch/discussions/721",
}
.into())
}
}
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
pub fn check_vector(&self) -> Result<()> {
if self.runtime.vector_store {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
disabled_action: "Passing `vector` as a query parameter",
feature: "vector store",
issue_link: "https://github.com/meilisearch/product/discussions/677",
}
@ -107,7 +94,6 @@ impl FeatureData {
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: instance_features.metrics || persisted_features.metrics,
logs_route: instance_features.logs_route || persisted_features.logs_route,
..persisted_features
}));

View File

@ -1,8 +1,12 @@
/// the map size to use when we don't succeed in reading it in indexes.
const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
use std::collections::BTreeMap;
use std::path::Path;
use std::time::Duration;
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
use meilisearch_types::heed::flags::Flags;
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
use meilisearch_types::milli::Index;
use time::OffsetDateTime;
use uuid::Uuid;
@ -232,7 +236,7 @@ impl IndexMap {
enable_mdb_writemap: bool,
map_size_growth: usize,
) {
let map_size = index.map_size() + map_size_growth;
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
let closing_event = index.prepare_for_closing();
let generation = self.next_generation();
self.unavailable.insert(
@ -305,7 +309,7 @@ fn create_or_open_index(
options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024);
if enable_mdb_writemap {
unsafe { options.flags(EnvFlags::WRITE_MAP) };
unsafe { options.flag(Flags::MdbWriteMap) };
}
if let Some((created, updated)) = date {
@ -384,7 +388,7 @@ mod tests {
fn assert_index_size(index: Index, expected: usize) {
let expected = clamp_to_page_size(expected);
let index_map_size = index.map_size();
let index_map_size = index.map_size().unwrap();
assert_eq!(index_map_size, expected);
}
}

View File

@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock};
use std::time::Duration;
use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tracing::error;
use uuid::Uuid;
use self::index_map::IndexMap;
@ -69,10 +69,6 @@ pub struct IndexMapper {
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
enable_mdb_writemap: bool,
pub indexer_config: Arc<IndexerConfig>,
/// A few types of long running batches of tasks that act on a single index set this field
/// so that a handle to the index is available from other threads (search) in an optimized manner.
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
}
/// Whether the index is available for use or is forbidden to be inserted back in the index map
@ -155,7 +151,6 @@ impl IndexMapper {
index_growth_amount,
enable_mdb_writemap,
indexer_config: Arc::new(indexer_config),
currently_updating_index: Default::default(),
})
}
@ -308,14 +303,6 @@ impl IndexMapper {
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
if let Some((current_name, current_index)) =
self.currently_updating_index.read().unwrap().as_ref()
{
if current_name == name {
return Ok(current_index.clone());
}
}
let uuid = self
.index_mapping
.get(rtxn, name)?
@ -487,8 +474,4 @@ impl IndexMapper {
pub fn indexer_config(&self) -> &IndexerConfig {
&self.indexer_config
}
pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
*self.currently_updating_index.write().unwrap() = index;
}
}

View File

@ -1,7 +1,7 @@
use std::collections::BTreeSet;
use std::fmt::Write;
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Task};
@ -30,19 +30,15 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
index_mapper,
features: _,
max_number_of_tasks: _,
max_number_of_batched_tasks: _,
puffin_frame: _,
wake_up: _,
dumps_path: _,
snapshots_path: _,
auth_path: _,
version_file_path: _,
webhook_url: _,
webhook_authorization_header: _,
test_breakpoint_sdr: _,
planned_failures: _,
run_loop_iteration: _,
embedders: _,
} = scheduler;
let rtxn = env.read_txn().unwrap();
@ -118,7 +114,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
snap
}
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) -> String {
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
@ -128,7 +124,10 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) ->
snap
}
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
pub fn snapshot_date_db(
rtxn: &RoTxn,
db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
@ -248,7 +247,10 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>)
}
snap
}
pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec>) -> String {
pub fn snapshot_canceled_by(
rtxn: &RoTxn,
db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {

View File

@ -27,14 +27,13 @@ mod index_mapper;
mod insta_snapshot;
mod lru;
mod utils;
pub mod uuid_codec;
mod uuid_codec;
pub type Result<T> = std::result::Result<T, Error>;
pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::{self, BufReader, Read};
use std::ops::{Bound, RangeBounds};
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
@ -46,18 +45,13 @@ use dump::{KindDump, TaskDump, UpdateFile};
pub use error::Error;
pub use features::RoFeatures;
use file_store::FileStore;
use flate2::bufread::GzEncoder;
use flate2::Compression;
use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use puffin::FrameView;
use roaring::RoaringBitmap;
@ -70,7 +64,8 @@ use uuid::Uuid;
use crate::index_mapper::IndexMapper;
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
pub(crate) type BEI128 = I128<BE>;
pub(crate) type BEI128 =
meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
///
@ -174,8 +169,8 @@ impl ProcessingTasks {
}
/// Set the processing tasks to an empty list
fn stop_processing(&mut self) -> RoaringBitmap {
std::mem::take(&mut self.processing)
fn stop_processing(&mut self) {
self.processing = RoaringBitmap::new();
}
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
@ -245,10 +240,6 @@ pub struct IndexSchedulerOptions {
pub snapshots_path: PathBuf,
/// The path to the folder containing the dumps.
pub dumps_path: PathBuf,
/// The URL on which we must send the tasks statuses
pub webhook_url: Option<String>,
/// The value we will send into the Authorization HTTP header on the webhook URL
pub webhook_authorization_header: Option<String>,
/// The maximum size, in bytes, of the task index.
pub task_db_size: usize,
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@ -267,9 +258,6 @@ pub struct IndexSchedulerOptions {
/// The maximum number of tasks stored in the task queue before starting
/// to auto schedule task deletions.
pub max_number_of_tasks: usize,
/// If the autobatcher is allowed to automatically batch tasks
/// it will only batch this defined number of tasks at once.
pub max_number_of_batched_tasks: usize,
/// The experimental features enabled for this instance.
pub instance_features: InstanceTogglableFeatures,
}
@ -290,7 +278,7 @@ pub struct IndexScheduler {
pub(crate) file_store: FileStore,
// The main database, it contains all the tasks accessible by their Id.
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
pub(crate) all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>>,
/// All the tasks ids grouped by their status.
// TODO we should not be able to serialize a `Status::Processing` in this database.
@ -301,16 +289,16 @@ pub struct IndexScheduler {
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
/// Store the tasks that were canceled by a task uid
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
pub(crate) canceled_by: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
/// Store the task ids of tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
pub(crate) enqueued_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
/// Store the task ids of finished tasks which started being processed at a specific date
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
pub(crate) started_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
/// Store the task ids of tasks which finished at a specific date
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
pub(crate) finished_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
/// In charge of creating, opening, storing and returning indexes.
pub(crate) index_mapper: IndexMapper,
@ -328,14 +316,6 @@ pub struct IndexScheduler {
/// the finished tasks automatically.
pub(crate) max_number_of_tasks: usize,
/// The maximum number of tasks that will be batched together.
pub(crate) max_number_of_batched_tasks: usize,
/// The webhook url we should send tasks to after processing every batches.
pub(crate) webhook_url: Option<String>,
/// The Authorization header to send to the webhook URL.
pub(crate) webhook_authorization_header: Option<String>,
/// A frame to output the indexation profiling files to disk.
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
@ -351,8 +331,6 @@ pub struct IndexScheduler {
/// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf,
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
// ================= test
// The next entry is dedicated to the tests.
/// Provide a way to set a breakpoint in multiple part of the scheduler.
@ -391,15 +369,11 @@ impl IndexScheduler {
wake_up: self.wake_up.clone(),
autobatching_enabled: self.autobatching_enabled,
max_number_of_tasks: self.max_number_of_tasks,
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
puffin_frame: self.puffin_frame.clone(),
snapshots_path: self.snapshots_path.clone(),
dumps_path: self.dumps_path.clone(),
auth_path: self.auth_path.clone(),
version_file_path: self.version_file_path.clone(),
webhook_url: self.webhook_url.clone(),
webhook_authorization_header: self.webhook_authorization_header.clone(),
embedders: self.embedders.clone(),
#[cfg(test)]
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
#[cfg(test)]
@ -492,14 +466,10 @@ impl IndexScheduler {
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
autobatching_enabled: options.autobatching_enabled,
max_number_of_tasks: options.max_number_of_tasks,
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
dumps_path: options.dumps_path,
snapshots_path: options.snapshots_path,
auth_path: options.auth_path,
version_file_path: options.version_file_path,
webhook_url: options.webhook_url,
webhook_authorization_header: options.webhook_authorization_header,
embedders: Default::default(),
#[cfg(test)]
test_breakpoint_sdr,
@ -535,17 +505,17 @@ impl IndexScheduler {
let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) {
DEFAULT_BUDGET
} else {
tracing::debug!("determining budget with dichotomic search");
log::debug!("determining budget with dichotomic search");
utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| {
Self::is_good_heed(tasks_path, map_size)
})
};
tracing::debug!("memmap budget: {budget}B");
log::debug!("memmap budget: {budget}B");
let mut budget = budget / 2;
if task_db_size > (budget / 2) {
task_db_size = clamp_to_page_size(budget * 2 / 5);
tracing::debug!(
log::debug!(
"Decreasing max size of task DB to {task_db_size}B due to constrained memory space"
);
}
@ -555,13 +525,13 @@ impl IndexScheduler {
let budget = budget;
let task_db_size = task_db_size;
tracing::debug!("index budget: {budget}B");
log::debug!("index budget: {budget}B");
let mut index_count = budget / base_map_size;
if index_count < 2 {
// take a bit less than half than the budget to make sure we can always afford to open an index
let map_size = (budget * 2) / 5;
// single index of max budget
tracing::debug!("1 index of {map_size}B can be opened simultaneously.");
log::debug!("1 index of {map_size}B can be opened simultaneously.");
return IndexBudget { map_size, index_count: 1, task_db_size };
}
// give us some space for an additional index when the cache is already full
@ -570,7 +540,7 @@ impl IndexScheduler {
if index_count > max_index_count {
index_count = max_index_count;
}
tracing::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
IndexBudget { map_size: base_map_size, index_count, task_db_size }
}
@ -617,7 +587,7 @@ impl IndexScheduler {
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => {
tracing::error!("{e}");
log::error!("{e}");
// Wait one second when an irrecoverable error occurs.
if !e.is_recoverable() {
std::thread::sleep(Duration::from_secs(1));
@ -634,15 +604,15 @@ impl IndexScheduler {
let mut file = match File::create(format!("{}.puffin", now)) {
Ok(file) => file,
Err(e) => {
tracing::error!("{e}");
log::error!("{e}");
continue;
}
};
if let Err(e) = frame_view.save_to_writer(&mut file) {
tracing::error!("{e}");
log::error!("{e}");
}
if let Err(e) = file.sync_all() {
tracing::error!("{e}");
log::error!("{e}");
}
// We erase this frame view as it is no more useful. We want to
// measure the new frames now that we exported the previous ones.
@ -747,7 +717,9 @@ impl IndexScheduler {
if let Some(canceled_by) = &query.canceled_by {
let mut all_canceled_tasks = RoaringBitmap::new();
for cancel_task_uid in canceled_by {
if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, cancel_task_uid)? {
if let Some(canceled_by_uid) =
self.canceled_by.get(rtxn, &BEU32::new(*cancel_task_uid))?
{
all_canceled_tasks |= canceled_by_uid;
}
}
@ -998,7 +970,7 @@ impl IndexScheduler {
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
&& (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50
{
return Err(Error::NoSpaceLeftInTaskQueue);
}
@ -1024,7 +996,7 @@ impl IndexScheduler {
// Get rid of the mutability.
let task = task;
self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?;
self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?;
for index in task.indexes() {
self.update_index(&mut wtxn, index, |bitmap| {
@ -1161,9 +1133,6 @@ impl IndexScheduler {
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
};
// Reset the currently updating index to relinquish the index handle
self.index_mapper.set_currently_updating_index(None);
#[cfg(test)]
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
@ -1190,19 +1159,18 @@ impl IndexScheduler {
self.update_task(&mut wtxn, &task)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
if let Err(e) = self.delete_persisted_task_data(&task) {
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
}
}
tracing::info!("A batch of tasks was successfully completed.");
log::info!("A batch of tasks was successfully completed.");
}
// If we have an abortion error we must stop the tick here and re-schedule tasks.
Err(Error::Milli(milli::Error::InternalError(
milli::InternalError::AbortedIndexation,
)))
| Err(Error::AbortedTask) => {
))) => {
#[cfg(test)]
self.breakpoint(Breakpoint::AbortedIndexation);
wtxn.abort();
wtxn.abort().map_err(Error::HeedTransaction)?;
// We make sure that we don't call `stop_processing` on the `processing_tasks`,
// this is because we want to let the next tick call `create_next_batch` and keep
@ -1223,7 +1191,7 @@ impl IndexScheduler {
let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen
self.index_mapper.resize_index(&wtxn, &index_uid)?;
wtxn.abort();
wtxn.abort().map_err(Error::HeedTransaction)?;
return Ok(TickOutcome::TickAgain(0));
}
@ -1247,7 +1215,7 @@ impl IndexScheduler {
self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;
if let Err(e) = self.delete_persisted_task_data(&task) {
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
}
self.update_task(&mut wtxn, &task)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
@ -1255,99 +1223,19 @@ impl IndexScheduler {
}
}
let processed = self.processing_tasks.write().unwrap().stop_processing();
self.processing_tasks.write().unwrap().stop_processing();
#[cfg(test)]
self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;
wtxn.commit().map_err(Error::HeedTransaction)?;
// We shouldn't crash the tick function if we can't send data to the webhook.
let _ = self.notify_webhook(&processed);
#[cfg(test)]
self.breakpoint(Breakpoint::AfterProcessing);
Ok(TickOutcome::TickAgain(processed_tasks))
}
/// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
if let Some(ref url) = self.webhook_url {
struct TaskReader<'a, 'b> {
rtxn: &'a RoTxn<'a>,
index_scheduler: &'a IndexScheduler,
tasks: &'b mut roaring::bitmap::Iter<'b>,
buffer: Vec<u8>,
written: usize,
}
impl<'a, 'b> Read for TaskReader<'a, 'b> {
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
if self.buffer.is_empty() {
match self.tasks.next() {
None => return Ok(0),
Some(task_id) => {
let task = self
.index_scheduler
.get_task(self.rtxn, task_id)
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
Error::CorruptedTaskQueue,
)
})?;
serde_json::to_writer(
&mut self.buffer,
&TaskView::from_task(&task),
)?;
self.buffer.push(b'\n');
}
}
}
let mut to_write = &self.buffer[self.written..];
let wrote = io::copy(&mut to_write, &mut buf)?;
self.written += wrote as usize;
// we wrote everything and must refresh our buffer on the next call
if self.written == self.buffer.len() {
self.written = 0;
self.buffer.clear();
}
Ok(wrote as usize)
}
}
let rtxn = self.env.read_txn()?;
let task_reader = TaskReader {
rtxn: &rtxn,
index_scheduler: self,
tasks: &mut updated.into_iter(),
buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
written: 0,
};
// let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
let request = ureq::post(url).set("Content-Encoding", "gzip");
let request = match &self.webhook_authorization_header {
Some(header) => request.set("Authorization", header),
None => request,
};
if let Err(e) = request.send(reader) {
tracing::error!("While sending data to the webhook: {e}");
}
}
Ok(())
}
/// Register a task to cleanup the task queue if needed
fn cleanup_task_queue(&self) -> Result<()> {
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@ -1367,12 +1255,12 @@ impl IndexScheduler {
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
// the deletion tasks we enqueued ourselves.
if to_delete.len() < 2 {
tracing::warn!("The task queue is almost full, but no task can be deleted yet.");
log::warn!("The task queue is almost full, but no task can be deleted yet.");
// the only thing we can do is hope that the user tasks are going to finish
return Ok(());
}
tracing::info!(
log::info!(
"The task queue is almost full. Deleting the oldest {} finished tasks.",
to_delete.len()
);
@ -1422,40 +1310,6 @@ impl IndexScheduler {
}
}
// TODO: consider using a type alias or a struct embedder/template
pub fn embedders(
&self,
embedding_configs: Vec<(String, milli::vector::EmbeddingConfig)>,
) -> Result<EmbeddingConfigs> {
let res: Result<_> = embedding_configs
.into_iter()
.map(|(name, milli::vector::EmbeddingConfig { embedder_options, prompt })| {
let prompt =
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
// optimistically return existing embedder
{
let embedders = self.embedders.read().unwrap();
if let Some(embedder) = embedders.get(&embedder_options) {
return Ok((name, (embedder.clone(), prompt)));
}
}
// add missing embedder
let embedder = Arc::new(
Embedder::new(embedder_options.clone())
.map_err(meilisearch_types::milli::vector::Error::from)
.map_err(meilisearch_types::milli::Error::from)?,
);
{
let mut embedders = self.embedders.write().unwrap();
embedders.insert(embedder_options, embedder.clone());
}
Ok((name, (embedder, prompt)))
})
.collect();
res.map(EmbeddingConfigs::new)
}
/// Blocks the thread until the test handle asks to progress to/through this breakpoint.
///
/// Two messages are sent through the channel for each breakpoint.
@ -1483,7 +1337,7 @@ impl IndexScheduler {
pub struct Dump<'a> {
index_scheduler: &'a IndexScheduler,
wtxn: RwTxn<'a>,
wtxn: RwTxn<'a, 'a>,
indexes: HashMap<String, RoaringBitmap>,
statuses: HashMap<Status, RoaringBitmap>,
@ -1598,7 +1452,7 @@ impl<'a> Dump<'a> {
},
};
self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?;
for index in task.indexes() {
match self.indexes.get_mut(index) {
@ -1640,8 +1494,8 @@ impl<'a> Dump<'a> {
}
}
self.statuses.entry(task.status).or_default().insert(task.uid);
self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);
self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid);
self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid);
Ok(task)
}
@ -1761,8 +1615,6 @@ mod tests {
indexes_path: tempdir.path().join("indexes"),
snapshots_path: tempdir.path().join("snapshots"),
dumps_path: tempdir.path().join("dumps"),
webhook_url: None,
webhook_authorization_header: None,
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false,
@ -1771,7 +1623,6 @@ mod tests {
indexer_config,
autobatching_enabled: true,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: usize::MAX,
instance_features: Default::default(),
};
configuration(&mut options);
@ -2244,7 +2095,10 @@ mod tests {
.unwrap();
index_scheduler.assert_internally_consistent();
}
handle.advance_one_successful_batch();
for _ in 0..2 {
handle.advance_one_successful_batch();
index_scheduler.assert_internally_consistent();
}
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed");
}
@ -4469,26 +4323,4 @@ mod tests {
}
"###);
}
#[test]
fn cancel_processing_dump() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
let dump_cancellation = KindWithContent::TaskCancelation {
query: "cancel dump".to_owned(),
tasks: RoaringBitmap::from_iter([0]),
};
let _ = index_scheduler.register(dump_creation).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
let _ = index_scheduler.register(dump_cancellation).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
}
}

View File

@ -1,35 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,45 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [1,]
canceled [0,]
----------------------------------------------------------------------
### Kind:
"taskCancelation" [1,]
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
1 [0,]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,38 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[0,]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"taskCancelation" [1,]
"dumpCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -34,10 +34,12 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} }
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
[timestamp] [2,3,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [2,3,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000001

View File

@ -3,9 +3,9 @@
use std::collections::{BTreeSet, HashSet};
use std::ops::Bound;
use meilisearch_types::heed::types::DecodeIgnore;
use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::CboRoaringBitmapCodec;
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
@ -18,7 +18,7 @@ impl IndexScheduler {
}
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k.get() + 1))
}
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
@ -26,7 +26,7 @@ impl IndexScheduler {
}
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
Ok(self.all_tasks.get(rtxn, &task_id)?)
Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
}
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
@ -88,7 +88,7 @@ impl IndexScheduler {
}
}
self.all_tasks.put(wtxn, &task.uid, task)?;
self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
Ok(())
}
@ -169,11 +169,11 @@ impl IndexScheduler {
pub(crate) fn insert_task_datetime(
wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
let timestamp = time.unix_timestamp_nanos();
let timestamp = BEI128::new(time.unix_timestamp_nanos());
let mut task_ids = database.get(wtxn, &timestamp)?.unwrap_or_default();
task_ids.insert(task_id);
database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(task_ids))?;
@ -182,11 +182,11 @@ pub(crate) fn insert_task_datetime(
pub(crate) fn remove_task_datetime(
wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
let timestamp = time.unix_timestamp_nanos();
let timestamp = BEI128::new(time.unix_timestamp_nanos());
if let Some(mut existing) = database.get(wtxn, &timestamp)? {
existing.remove(task_id);
if existing.is_empty() {
@ -202,7 +202,7 @@ pub(crate) fn remove_task_datetime(
pub(crate) fn keep_tasks_within_datetimes(
rtxn: &RoTxn,
tasks: &mut RoaringBitmap,
database: Database<BEI128, CboRoaringBitmapCodec>,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
after: Option<OffsetDateTime>,
before: Option<OffsetDateTime>,
) -> Result<()> {
@ -213,8 +213,8 @@ pub(crate) fn keep_tasks_within_datetimes(
(Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
};
let mut collected_task_ids = RoaringBitmap::new();
let start = map_bound(start, |b| b.unix_timestamp_nanos());
let end = map_bound(end, |b| b.unix_timestamp_nanos());
let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
let iter = database.range(rtxn, &(start, end))?;
for r in iter {
let (_timestamp, task_ids) = r?;
@ -337,6 +337,8 @@ impl IndexScheduler {
let rtxn = self.env.read_txn().unwrap();
for task in self.all_tasks.iter(&rtxn).unwrap() {
let (task_id, task) = task.unwrap();
let task_id = task_id.get();
let task_index_uid = task.index_uid().map(ToOwned::to_owned);
let Task {
@ -359,13 +361,16 @@ impl IndexScheduler {
.unwrap()
.contains(task.uid));
}
let db_enqueued_at =
self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap();
let db_enqueued_at = self
.enqueued_at
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
assert!(db_enqueued_at.contains(task_id));
if let Some(started_at) = started_at {
let db_started_at = self
.started_at
.get(&rtxn, &started_at.unix_timestamp_nanos())
.get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
assert!(db_started_at.contains(task_id));
@ -373,7 +378,7 @@ impl IndexScheduler {
if let Some(finished_at) = finished_at {
let db_finished_at = self
.finished_at
.get(&rtxn, &finished_at.unix_timestamp_nanos())
.get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
assert!(db_finished_at.contains(task_id));

View File

@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::convert::TryInto;
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
use meilisearch_types::heed::{BytesDecode, BytesEncode};
use uuid::Uuid;
/// A heed codec for value of struct Uuid.
@ -10,15 +10,15 @@ pub struct UuidCodec;
impl<'a> BytesDecode<'a> for UuidCodec {
type DItem = Uuid;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
bytes.try_into().ok().map(Uuid::from_bytes)
}
}
impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
Some(Cow::Borrowed(item.as_bytes()))
}
}

View File

@ -11,6 +11,6 @@ edition.workspace = true
license.workspace = true
[dependencies]
insta = { version = "^1.34.0", features = ["json", "redactions"] }
insta = { version = "^1.29.0", features = ["json", "redactions"] }
md5 = "0.7.0"
once_cell = "1.19"
once_cell = "1.17"

View File

@ -11,16 +11,16 @@ edition.workspace = true
license.workspace = true
[dependencies]
base64 = "0.21.7"
enum-iterator = "1.5.0"
base64 = "0.21.0"
enum-iterator = "1.4.0"
hmac = "0.12.1"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
sha2 = "0.10.8"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.6.1", features = ["serde", "v4"] }
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
sha2 = "0.10.6"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }

View File

@ -4,20 +4,17 @@ use std::collections::HashSet;
use std::convert::{TryFrom, TryInto};
use std::fs::create_dir_all;
use std::path::Path;
use std::result::Result as StdResult;
use std::str;
use std::str::FromStr;
use std::sync::Arc;
use hmac::{Hmac, Mac};
use meilisearch_types::heed::BoxedError;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId;
use meilisearch_types::milli;
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use sha2::Sha256;
use thiserror::Error;
use time::OffsetDateTime;
use uuid::fmt::Hyphenated;
use uuid::Uuid;
@ -33,7 +30,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat
#[derive(Clone)]
pub struct HeedAuthStore {
env: Arc<Env>,
keys: Database<Bytes, SerdeJson<Key>>,
keys: Database<ByteSlice, SerdeJson<Key>>,
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
should_close_on_drop: bool,
}
@ -279,7 +276,7 @@ impl HeedAuthStore {
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
let mut iter = self
.action_keyid_index_expiration
.remap_types::<Bytes, DecodeIgnore>()
.remap_types::<ByteSlice, DecodeIgnore>()
.prefix_iter_mut(wtxn, key.as_bytes())?;
while iter.next().transpose()?.is_some() {
// safety: we don't keep references from inside the LMDB database.
@ -297,24 +294,23 @@ pub struct KeyIdActionCodec;
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
type DItem = (KeyId, Action, Option<&'a [u8]>);
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
let (key_id_bytes, action_bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let (&action_byte, index) =
match try_split_array_at(action_bytes).ok_or(SliceTooShortError)? {
([action], []) => (action, None),
([action], index) => (action, Some(index)),
};
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (key_id_bytes, action_bytes) = try_split_array_at(bytes)?;
let (action_bytes, index) = match try_split_array_at(action_bytes)? {
(action, []) => (action, None),
(action, index) => (action, Some(index)),
};
let key_id = Uuid::from_bytes(*key_id_bytes);
let action = Action::from_repr(action_byte).ok_or(InvalidActionError { action_byte })?;
let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?;
Ok((key_id, action, index))
Some((key_id, action, index))
}
}
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
fn bytes_encode((key_id, action, index): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::new();
bytes.extend_from_slice(key_id.as_bytes());
@ -324,20 +320,10 @@ impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
bytes.extend_from_slice(index);
}
Ok(Cow::Owned(bytes))
Some(Cow::Owned(bytes))
}
}
#[derive(Error, Debug)]
#[error("the slice is too short")]
pub struct SliceTooShortError;
#[derive(Error, Debug)]
#[error("cannot construct a valid Action from {action_byte}")]
pub struct InvalidActionError {
pub action_byte: u8,
}
pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String {
// format uid as hyphenated allowing user to generate their own keys.
let mut uid_buffer = [0; Hyphenated::LENGTH];

View File

@ -11,31 +11,31 @@ edition.workspace = true
license.workspace = true
[dependencies]
actix-web = { version = "4.4.1", default-features = false }
anyhow = "1.0.79"
actix-web = { version = "4.3.1", default-features = false }
anyhow = "1.0.70"
convert_case = "0.6.0"
csv = "1.3.0"
deserr = { version = "0.6.1", features = ["actix-web"] }
either = { version = "1.9.0", features = ["serde"] }
enum-iterator = "1.5.0"
csv = "1.2.1"
deserr = { version = "0.6.0", features = ["actix-web"]}
either = { version = "1.8.1", features = ["serde"] }
enum-iterator = "1.4.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
flate2 = "1.0.25"
fst = "0.4.7"
memmap2 = "0.7.1"
milli = { path = "../milli" }
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde-cs = "0.2.4"
serde_json = "1.0.111"
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.35"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
serde_json = "1.0.95"
tar = "0.4.38"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.27"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
insta = "1.34.0"
insta = "1.29.0"
meili-snap = { path = "../meili-snap" }
[features]
@ -50,7 +50,6 @@ hebrew = ["milli/hebrew"]
japanese = ["milli/japanese"]
# thai specialized tokenization
thai = ["milli/thai"]
# allow greek specialized tokenization
greek = ["milli/greek"]
# allow khmer specialized tokenization
khmer = ["milli/khmer"]

View File

@ -188,4 +188,3 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError);
merge_with_error_impl_take_error_message!(ParseTaskKindError);
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
merge_with_error_impl_take_error_message!(IndexUidFormatError);
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);

View File

@ -222,8 +222,6 @@ InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidEmbedder , InvalidRequest , BAD_REQUEST ;
InvalidHybridQuery , InvalidRequest , BAD_REQUEST ;
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
@ -235,7 +233,6 @@ InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
@ -255,11 +252,9 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ;
@ -299,20 +294,15 @@ MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
MissingIndexUid , InvalidRequest , BAD_REQUEST ;
MissingMasterKey , Auth , UNAUTHORIZED ;
MissingPayload , InvalidRequest , BAD_REQUEST ;
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
TaskNotFound , InvalidRequest , NOT_FOUND ;
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
TooManyVectors , InvalidRequest , BAD_REQUEST ;
UnretrievableDocument , Internal , BAD_REQUEST ;
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
// Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE
}
impl ErrorCode for JoinError {
@ -334,6 +324,7 @@ impl ErrorCode for milli::Error {
UserError::SerdeJson(_)
| UserError::InvalidLmdbOpenOptions
| UserError::DocumentLimitReached
| UserError::AccessingSoftDeletedDocument { .. }
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStoreFile,
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
@ -345,13 +336,6 @@ impl ErrorCode for milli::Error {
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
Code::InvalidDocumentId
}
UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
UserError::InvalidFieldForSource { .. }
| UserError::MissingFieldForSource { .. }
| UserError::InvalidOpenAiModel { .. }
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
Code::IndexPrimaryKeyMultipleCandidatesFound
@ -369,15 +353,11 @@ impl ErrorCode for milli::Error {
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
UserError::SortError(_) => Code::InvalidSearchSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidSettingsTypoTolerance
}
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
}
}
}
@ -407,11 +387,11 @@ impl ErrorCode for HeedError {
HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile,
HeedError::Io(e) => e.error_code(),
HeedError::Mdb(_)
| HeedError::Encoding(_)
| HeedError::Decoding(_)
| HeedError::Encoding
| HeedError::Decoding
| HeedError::InvalidDatabaseTyping
| HeedError::DatabaseClosing
| HeedError::BadOpenOptions { .. } => Code::Internal,
| HeedError::BadOpenOptions => Code::Internal,
}
}
}
@ -465,15 +445,6 @@ impl fmt::Display for DeserrParseIntError {
}
}
impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`."
)
}
}
#[macro_export]
macro_rules! internal_error {
($target:ty : $($other:path), *) => {

View File

@ -6,12 +6,10 @@ pub struct RuntimeTogglableFeatures {
pub score_details: bool,
pub vector_store: bool,
pub metrics: bool,
pub logs_route: bool,
pub export_puffin_reports: bool,
}
#[derive(Default, Debug, Clone, Copy)]
pub struct InstanceTogglableFeatures {
pub metrics: bool,
pub logs_route: bool,
}

View File

@ -9,7 +9,6 @@ pub mod index_uid_pattern;
pub mod keys;
pub mod settings;
pub mod star_or;
pub mod task_view;
pub mod tasks;
pub mod versioning;
pub use milli::{heed, Index};

View File

@ -8,7 +8,6 @@ use std::str::FromStr;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer;
use milli::proximity::ProximityPrecision;
use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize, Serializer};
@ -187,9 +186,6 @@ pub struct Settings<T> {
#[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
pub distinct_attribute: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
pub proximity_precision: Setting<ProximityPrecisionView>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub typo_tolerance: Setting<TypoSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
@ -199,10 +195,6 @@ pub struct Settings<T> {
#[deserr(default, error = DeserrJsonError<InvalidSettingsPagination>)]
pub pagination: Setting<PaginationSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
#[serde(skip)]
#[deserr(skip)]
pub _kind: PhantomData<T>,
@ -222,11 +214,9 @@ impl Settings<Checked> {
separator_tokens: Setting::Reset,
dictionary: Setting::Reset,
distinct_attribute: Setting::Reset,
proximity_precision: Setting::Reset,
typo_tolerance: Setting::Reset,
faceting: Setting::Reset,
pagination: Setting::Reset,
embedders: Setting::Reset,
_kind: PhantomData,
}
}
@ -244,11 +234,9 @@ impl Settings<Checked> {
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
embedders,
..
} = self;
@ -264,11 +252,9 @@ impl Settings<Checked> {
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
embedders,
_kind: PhantomData,
}
}
@ -310,29 +296,12 @@ impl Settings<Unchecked> {
separator_tokens: self.separator_tokens,
dictionary: self.dictionary,
distinct_attribute: self.distinct_attribute,
proximity_precision: self.proximity_precision,
typo_tolerance: self.typo_tolerance,
faceting: self.faceting,
pagination: self.pagination,
embedders: self.embedders,
_kind: PhantomData,
}
}
pub fn validate(self) -> Result<Self, milli::Error> {
self.validate_embedding_settings()
}
fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
for (name, config) in configs.iter_mut() {
let config_to_check = std::mem::take(config);
let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
*config = checked_config
}
self.embedders = Setting::Set(configs);
Ok(self)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -421,12 +390,6 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match settings.proximity_precision {
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
Setting::Reset => builder.reset_proximity_precision(),
Setting::NotSet => (),
}
match settings.typo_tolerance {
Setting::Set(ref value) => {
match value.enabled {
@ -513,12 +476,6 @@ pub fn apply_settings_to_builder(
Setting::Reset => builder.reset_pagination_max_total_hits(),
Setting::NotSet => (),
}
match settings.embedders.clone() {
Setting::Set(value) => builder.set_embedder_settings(value),
Setting::Reset => builder.reset_embedder_settings(),
Setting::NotSet => (),
}
}
pub fn settings(
@ -552,8 +509,6 @@ pub fn settings(
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
let proximity_precision = index.proximity_precision(rtxn)?.map(ProximityPrecisionView::from);
let synonyms = index.user_defined_synonyms(rtxn)?;
let min_typo_word_len = MinWordSizeTyposSetting {
@ -577,10 +532,7 @@ pub fn settings(
let faceting = FacetingSettings {
max_values_per_facet: Setting::Set(
index
.max_values_per_facet(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET),
index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET),
),
sort_facet_values_by: Setting::Set(
index
@ -593,20 +545,10 @@ pub fn settings(
let pagination = PaginationSettings {
max_total_hits: Setting::Set(
index
.pagination_max_total_hits(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
),
};
let embedders: BTreeMap<_, _> = index
.embedding_configs(rtxn)?
.into_iter()
.map(|(name, config)| (name, Setting::Set(config.into())))
.collect();
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
@ -627,12 +569,10 @@ pub fn settings(
Some(field) => Setting::Set(field),
None => Setting::Reset,
},
proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()),
synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination),
embedders,
_kind: PhantomData,
})
}
@ -733,32 +673,6 @@ impl From<RankingRuleView> for Criterion {
}
}
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
pub enum ProximityPrecisionView {
#[default]
ByWord,
ByAttribute,
}
impl From<ProximityPrecision> for ProximityPrecisionView {
fn from(value: ProximityPrecision) -> Self {
match value {
ProximityPrecision::ByWord => ProximityPrecisionView::ByWord,
ProximityPrecision::ByAttribute => ProximityPrecisionView::ByAttribute,
}
}
}
impl From<ProximityPrecisionView> for ProximityPrecision {
fn from(value: ProximityPrecisionView) -> Self {
match value {
ProximityPrecisionView::ByWord => ProximityPrecision::ByWord,
ProximityPrecisionView::ByAttribute => ProximityPrecision::ByAttribute,
}
}
}
#[cfg(test)]
pub(crate) mod test {
use super::*;
@ -778,11 +692,9 @@ pub(crate) mod test {
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
embedders: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
@ -804,11 +716,9 @@ pub(crate) mod test {
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
embedders: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};

View File

@ -1,139 +0,0 @@
use serde::Serialize;
use time::{Duration, OffsetDateTime};
use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskView {
pub uid: TaskId,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
#[serde(rename = "type")]
pub kind: Kind,
pub canceled_by: Option<TaskId>,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<DetailsView>,
pub error: Option<ResponseError>,
#[serde(serialize_with = "serialize_duration", default)]
pub duration: Option<Duration>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option", default)]
pub started_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
}
impl TaskView {
pub fn from_task(task: &Task) -> TaskView {
TaskView {
uid: task.uid,
index_uid: task.index_uid().map(ToOwned::to_owned),
status: task.status,
kind: task.kind.as_kind(),
canceled_by: task.canceled_by,
details: task.details.clone().map(DetailsView::from),
error: task.error.clone(),
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
}
}
}
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub received_documents: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexed_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub provided_ids: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub matched_tasks: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub canceled_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_filter: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub swaps: Option<Vec<IndexSwap>>,
}
impl From<Details> for DetailsView {
fn from(details: Details) -> Self {
match details {
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
DetailsView {
received_documents: Some(received_documents),
indexed_documents: Some(indexed_documents),
..DetailsView::default()
}
}
Details::SettingsUpdate { settings } => {
DetailsView { settings: Some(settings), ..DetailsView::default() }
}
Details::IndexInfo { primary_key } => {
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
}
Details::DocumentDeletion {
provided_ids: received_document_ids,
deleted_documents,
} => DetailsView {
provided_ids: Some(received_document_ids),
deleted_documents: Some(deleted_documents),
original_filter: Some(None),
..DetailsView::default()
},
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
DetailsView {
provided_ids: Some(0),
original_filter: Some(Some(original_filter)),
deleted_documents: Some(deleted_documents),
..DetailsView::default()
}
}
Details::ClearAll { deleted_documents } => {
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
}
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
canceled_tasks: Some(canceled_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
deleted_tasks: Some(deleted_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::Dump { dump_uid } => {
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
}
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
}
}
}

View File

@ -13,14 +13,14 @@ license.workspace = true
default-run = "meilisearch"
[dependencies]
actix-cors = "0.7.0"
actix-http = { version = "3.5.1", default-features = false, features = [
actix-cors = "0.6.4"
actix-http = { version = "3.3.1", default-features = false, features = [
"compress-brotli",
"compress-gzip",
"rustls",
] }
actix-utils = "3.0.1"
actix-web = { version = "4.4.1", default-features = false, features = [
actix-web = { version = "4.3.1", default-features = false, features = [
"macros",
"compress-brotli",
"compress-gzip",
@ -28,111 +28,108 @@ actix-web = { version = "4.4.1", default-features = false, features = [
"rustls",
] }
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] }
anyhow = { version = "1.0.70", features = ["backtrace"] }
async-stream = "0.3.5"
async-trait = "0.1.77"
bstr = "1.9.0"
async-trait = "0.1.68"
bstr = "1.4.0"
byte-unit = { version = "4.0.19", default-features = false, features = [
"std",
"serde",
] }
bytes = "1.5.0"
clap = { version = "4.4.17", features = ["derive", "env"] }
crossbeam-channel = "0.5.11"
deserr = { version = "0.6.1", features = ["actix-web"] }
bytes = "1.4.0"
clap = { version = "4.2.1", features = ["derive", "env"] }
crossbeam-channel = "0.5.8"
deserr = { version = "0.6.0", features = ["actix-web"]}
dump = { path = "../dump" }
either = "1.9.0"
either = "1.8.1"
env_logger = "0.10.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
flate2 = "1.0.25"
fst = "0.4.7"
futures = "0.3.30"
futures-util = "0.3.30"
http = "0.2.11"
futures = "0.3.28"
futures-util = "0.3.28"
http = "0.2.9"
index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.1.0", features = ["serde"] }
is-terminal = "0.4.10"
indexmap = { version = "2.0.0", features = ["serde"] }
is-terminal = "0.4.8"
itertools = "0.11.0"
jsonwebtoken = "8.3.0"
lazy_static = "1.4.0"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.39", default-features = false }
mimalloc = { version = "0.1.37", default-features = false }
mime = "0.3.17"
num_cpus = "1.16.0"
obkv = "0.2.1"
once_cell = "1.19.0"
ordered-float = "4.2.0"
num_cpus = "1.15.0"
obkv = "0.2.0"
once_cell = "1.17.1"
ordered-float = "3.7.0"
parking_lot = "0.12.1"
permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.13"
pin-project-lite = "0.2.9"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] }
puffin = { version = "0.16.0", features = ["serialization"] }
rand = "0.8.5"
rayon = "1.8.0"
regex = "1.10.2"
reqwest = { version = "0.11.23", features = [
rayon = "1.7.0"
regex = "1.7.3"
reqwest = { version = "0.11.16", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = "0.20.8"
rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
sha2 = "0.10.8"
siphasher = "1.0.0"
slice-group-by = "0.3.1"
segment = { version = "0.2.2", optional = true }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
sha2 = "0.10.6"
siphasher = "0.3.10"
slice-group-by = "0.3.0"
static-files = { version = "0.2.3", optional = true }
sysinfo = "0.30.5"
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
sysinfo = "0.29.7"
tar = "0.4.38"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = { version = "1.35.1", features = ["full"] }
tokio-stream = "0.1.14"
toml = "0.8.8"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
walkdir = "2.4.0"
tokio = { version = "1.27.0", features = ["full"] }
tokio-stream = "0.1.12"
toml = "0.7.3"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
walkdir = "2.3.3"
yaup = "0.2.1"
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.9"
termcolor = "1.2.0"
[dev-dependencies]
actix-rt = "2.9.0"
actix-rt = "2.8.0"
assert-json-diff = "2.0.2"
brotli = "3.4.0"
insta = "1.34.0"
manifest-dir-macros = "0.1.18"
brotli = "3.3.4"
insta = "1.29.0"
manifest-dir-macros = "0.1.16"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
urlencoding = "2.1.3"
temp-env = "0.3.3"
urlencoding = "2.1.2"
yaup = "0.2.1"
[build-dependencies]
anyhow = { version = "1.0.79", optional = true }
cargo_toml = { version = "0.18.0", optional = true }
anyhow = { version = "1.0.70", optional = true }
cargo_toml = { version = "0.15.2", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.11.23", features = [
reqwest = { version = "0.11.16", features = [
"blocking",
"rustls-tls",
], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.9.0", optional = true }
tempfile = { version = "3.5.0", optional = true }
vergen = { version = "7.5.1", default-features = false, features = ["git"] }
zip = { version = "0.6.6", optional = true }
zip = { version = "0.6.4", optional = true }
[features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
@ -153,8 +150,7 @@ hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.12/build.zip"
sha1 = "acfe9a018c93eb0604ea87ee87bff7df5474e18e"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"

View File

@ -18,7 +18,7 @@ use segment::message::{Identify, Track, User};
use segment::{AutoBatcher, Batcher, HttpClient};
use serde::Serialize;
use serde_json::{json, Value};
use sysinfo::{Disks, System};
use sysinfo::{DiskExt, System, SystemExt};
use time::OffsetDateTime;
use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender};
@ -36,7 +36,7 @@ use crate::routes::{create_all_stats, Stats};
use crate::search::{
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
use crate::Opt;
@ -250,9 +250,7 @@ impl super::Analytics for SegmentAnalytics {
struct Infos {
env: String,
experimental_enable_metrics: bool,
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize,
db_path: bool,
import_dump: bool,
dump_dir: bool,
@ -265,8 +263,6 @@ struct Infos {
ignore_snapshot_if_db_exists: bool,
http_addr: bool,
http_payload_size_limit: Byte,
task_queue_webhook: bool,
task_webhook_authorization_header: bool,
log_level: String,
max_indexing_memory: MaxMemory,
max_indexing_threads: MaxThreads,
@ -288,14 +284,10 @@ impl From<Opt> for Infos {
let Opt {
db_path,
experimental_enable_metrics,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks,
http_addr,
master_key: _,
env,
task_webhook_url,
task_webhook_authorization_header,
max_index_size: _,
max_task_db_size: _,
http_payload_size_limit,
@ -335,7 +327,6 @@ impl From<Opt> for Infos {
Self {
env,
experimental_enable_metrics,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),
@ -349,9 +340,6 @@ impl From<Opt> for Infos {
ignore_snapshot_if_db_exists,
http_addr: http_addr != default_http_addr(),
http_payload_size_limit,
experimental_max_number_of_batched_tasks,
task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(),
max_indexing_memory,
max_indexing_threads,
@ -389,17 +377,16 @@ impl Segment {
fn compute_traits(opt: &Opt, stats: Stats) -> Value {
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
static SYSTEM: Lazy<Value> = Lazy::new(|| {
let disks = Disks::new_with_refreshed_list();
let mut sys = System::new_all();
sys.refresh_all();
let kernel_version = System::kernel_version()
.and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
let kernel_version =
sys.kernel_version().and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
json!({
"distribution": System::name(),
"distribution": sys.name(),
"kernel_version": kernel_version,
"cores": sys.cpus().len(),
"ram_size": sys.total_memory(),
"disk_size": disks.iter().map(|disk| disk.total_space()).max(),
"disk_size": sys.disks().iter().map(|disk| disk.total_space()).max(),
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
})
});
@ -596,11 +583,6 @@ pub struct SearchAggregator {
// vector
// The maximum number of floats in a vector request
max_vector_size: usize,
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
semantic_ratio: bool,
// Whether a non-default embedder was specified
embedder: bool,
hybrid: bool,
// every time a search is done, we increment the counter linked to the used settings
matching_strategy: HashMap<String, usize>,
@ -654,7 +636,6 @@ impl SearchAggregator {
crop_marker,
matching_strategy,
attributes_to_search_on,
hybrid,
} = query;
let mut ret = Self::default();
@ -728,12 +709,6 @@ impl SearchAggregator {
ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details;
if let Some(hybrid) = hybrid {
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
ret.embedder = hybrid.embedder.is_some();
ret.hybrid = true;
}
ret
}
@ -787,9 +762,6 @@ impl SearchAggregator {
facets_total_number_of_facets,
show_ranking_score,
show_ranking_score_details,
semantic_ratio,
embedder,
hybrid,
} = other;
if self.timestamp.is_none() {
@ -835,9 +807,6 @@ impl SearchAggregator {
// vector
self.max_vector_size = self.max_vector_size.max(max_vector_size);
self.semantic_ratio |= semantic_ratio;
self.hybrid |= hybrid;
self.embedder |= embedder;
// pagination
self.max_limit = self.max_limit.max(max_limit);
@ -906,9 +875,6 @@ impl SearchAggregator {
facets_total_number_of_facets,
show_ranking_score,
show_ranking_score_details,
semantic_ratio,
embedder,
hybrid,
} = self;
if total_received == 0 {
@ -948,11 +914,6 @@ impl SearchAggregator {
"vector": {
"max_vector_size": max_vector_size,
},
"hybrid": {
"enabled": hybrid,
"semantic_ratio": semantic_ratio,
"embedder": embedder,
},
"pagination": {
"max_limit": max_limit,
"max_offset": max_offset,
@ -1048,7 +1009,6 @@ impl MultiSearchAggregator {
crop_marker: _,
matching_strategy: _,
attributes_to_search_on: _,
hybrid: _,
} = query;
index_uid.as_str()
@ -1195,7 +1155,6 @@ impl FacetSearchAggregator {
filter,
matching_strategy,
attributes_to_search_on,
hybrid,
} = query;
let mut ret = Self::default();
@ -1209,8 +1168,7 @@ impl FacetSearchAggregator {
|| vector.is_some()
|| filter.is_some()
|| *matching_strategy != MatchingStrategy::default()
|| attributes_to_search_on.is_some()
|| hybrid.is_some();
|| attributes_to_search_on.is_some();
ret
}

View File

@ -12,8 +12,6 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error("The `/logs/stream` route is currently in use by someone else.")]
AlreadyUsedLogRoute,
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
@ -53,15 +51,12 @@ pub enum MeilisearchHttpError {
DocumentFormat(#[from] DocumentFormatError),
#[error(transparent)]
Join(#[from] JoinError),
#[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
MissingSearchHybrid,
}
impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
@ -79,7 +74,6 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::FileStore(_) => Code::Internal,
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
MeilisearchHttpError::Join(_) => Code::Internal,
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
}
}
}

View File

@ -29,6 +29,7 @@ use error::PayloadError;
use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::error;
use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@ -38,8 +39,6 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use option::ScheduleSnapshot;
use tracing::error;
use tracing_subscriber::filter::Targets;
use crate::error::MeilisearchHttpError;
@ -87,21 +86,10 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
}
}
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
pub type LogRouteHandle =
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
pub type LogRouteType = tracing_subscriber::filter::Filtered<
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
Targets,
tracing_subscriber::Registry,
>;
pub fn create_app(
index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>,
opt: Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>,
enable_dashboard: bool,
) -> actix_web::App<
@ -120,7 +108,6 @@ pub fn create_app(
index_scheduler.clone(),
auth_controller.clone(),
&opt,
logs,
analytics.clone(),
)
})
@ -136,7 +123,7 @@ pub fn create_app(
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(tracing_actix_web::TracingLogger::default())
.wrap(actix_web::middleware::Logger::default())
.wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
}
@ -241,15 +228,12 @@ fn open_or_create_database_unchecked(
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features,
@ -293,15 +277,15 @@ fn import_dump(
let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() {
tracing::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
%date,
"Importing a dump of meilisearch"
log::info!(
"Importing a dump of meilisearch `{:?}` from the {}",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
date
);
} else {
tracing::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
"Importing a dump of meilisearch",
log::info!(
"Importing a dump of meilisearch `{:?}`",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
);
}
@ -335,7 +319,7 @@ fn import_dump(
for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
tracing::info!("Importing index `{}`.", metadata.uid);
log::info!("Importing index `{}`.", metadata.uid);
let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
@ -349,15 +333,14 @@ fn import_dump(
}
// 4.2 Import the settings.
tracing::info!("Importing the settings.");
log::info!("Importing the settings.");
let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder);
builder
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
// 4.3 Import the documents.
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
log::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
@ -379,16 +362,15 @@ fn import_dump(
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|indexing_step| log::debug!("update: {:?}", indexing_step),
|| false,
)?;
let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
log::info!("{} documents found.", user_result?);
builder.execute()?;
wtxn.commit()?;
tracing::info!("All documents successfully imported.");
log::info!("All documents successfully imported.");
}
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
@ -406,7 +388,6 @@ pub fn configure_data(
index_scheduler: Data<IndexScheduler>,
auth: Data<AuthController>,
opt: &Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>,
) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
@ -414,10 +395,8 @@ pub fn configure_data(
.app_data(index_scheduler)
.app_data(auth)
.app_data(web::Data::from(analytics))
.app_data(web::Data::new(logs))
.app_data(
web::JsonConfig::default()
.limit(http_payload_size_limit)
.content_type(|mime| mime == mime::APPLICATION_JSON)
.error_handler(|err, req: &HttpRequest| match err {
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {

View File

@ -1,7 +1,6 @@
use std::env;
use std::io::{stderr, Write};
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use actix_web::http::KeepAlive;
@ -10,60 +9,33 @@ use actix_web::HttpServer;
use index_scheduler::IndexScheduler;
use is_terminal::IsTerminal;
use meilisearch::analytics::Analytics;
use meilisearch::{
analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType, Opt,
};
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use mimalloc::MiMalloc;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_subscriber::Layer;
#[global_allocator]
static ALLOC: MiMalloc = MiMalloc;
fn default_layer() -> LogRouteType {
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
}
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
/// does all the setup before meilisearch is launched
fn setup(opt: &Opt) -> anyhow::Result<LogRouteHandle> {
let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(default_layer());
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
fn setup(opt: &Opt) -> anyhow::Result<()> {
let mut log_builder = env_logger::Builder::new();
log_builder.parse_filters(&opt.log_level.to_string());
let subscriber = tracing_subscriber::registry().with(route_layer).with(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::NEW)
.with_filter(
tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string())
.unwrap(),
),
);
log_builder.init();
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
Ok(route_layer_handle)
}
fn on_panic(info: &std::panic::PanicInfo) {
let info = info.to_string().replace('\n', " ");
tracing::error!(%info);
Ok(())
}
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
std::panic::set_hook(Box::new(on_panic));
anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
);
let log_handle = setup(&opt)?;
setup(&opt)?;
match (opt.env.as_ref(), &opt.master_key) {
("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => {
@ -101,7 +73,7 @@ async fn main() -> anyhow::Result<()> {
print_launch_resume(&opt, analytics.clone(), config_read_from);
run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?;
run_http(index_scheduler, auth_controller, opt, analytics).await?;
Ok(())
}
@ -110,7 +82,6 @@ async fn run_http(
index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>,
opt: Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>,
) -> anyhow::Result<()> {
let enable_dashboard = &opt.env == "development";
@ -123,7 +94,6 @@ async fn run_http(
index_scheduler.clone(),
auth_controller.clone(),
opt.clone(),
logs.clone(),
analytics.clone(),
enable_dashboard,
)

View File

@ -20,8 +20,7 @@ use rustls::server::{
use rustls::RootCertStore;
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::{Deserialize, Serialize};
use sysinfo::{MemoryRefreshKind, RefreshKind, System};
use url::Url;
use sysinfo::{RefreshKind, System, SystemExt};
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
@ -29,8 +28,6 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
const MEILI_ENV: &str = "MEILI_ENV";
const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
#[cfg(feature = "analytics")]
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@ -51,12 +48,9 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
@ -160,14 +154,6 @@ pub struct Opt {
#[serde(default = "default_env")]
pub env: String,
/// Called whenever a task finishes so a third party can be notified.
#[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
pub task_webhook_url: Option<Url>,
/// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified.
#[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
pub task_webhook_authorization_header: Option<String>,
/// Deactivates Meilisearch's built-in telemetry when provided.
///
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@ -310,23 +296,11 @@ pub struct Opt {
#[serde(default)]
pub experimental_enable_metrics: bool,
/// Experimental logs route feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/721>
///
/// Enables the log route on the `POST /logs/stream` endpoint and the `DELETE /logs/stream` to stop receiving logs.
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)]
#[serde(default)]
pub experimental_enable_logs_route: bool,
/// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
#[serde(default)]
pub experimental_reduce_indexing_memory_usage: bool,
/// Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
#[clap(long, env = MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())]
#[serde(default = "default_limit_batched_tasks")]
pub experimental_max_number_of_batched_tasks: usize,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
@ -394,12 +368,9 @@ impl Opt {
http_addr,
master_key,
env,
task_webhook_url,
task_webhook_authorization_header,
max_index_size: _,
max_task_db_size: _,
http_payload_size_limit,
experimental_max_number_of_batched_tasks,
ssl_cert_path,
ssl_key_path,
ssl_auth_path,
@ -421,9 +392,8 @@ impl Opt {
config_file_path: _,
#[cfg(feature = "analytics")]
no_analytics,
experimental_enable_metrics,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
experimental_enable_metrics: enable_metrics_route,
experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@ -431,16 +401,6 @@ impl Opt {
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
}
export_to_env_if_not_present(MEILI_ENV, env);
if let Some(task_webhook_url) = task_webhook_url {
export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
}
if let Some(task_webhook_authorization_header) = task_webhook_authorization_header {
export_to_env_if_not_present(
MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER,
task_webhook_authorization_header,
);
}
#[cfg(feature = "analytics")]
{
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
@ -449,10 +409,6 @@ impl Opt {
MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
http_payload_size_limit.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
experimental_max_number_of_batched_tasks.to_string(),
);
if let Some(ssl_cert_path) = ssl_cert_path {
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
}
@ -477,15 +433,11 @@ impl Opt {
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_METRICS,
experimental_enable_metrics.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE,
experimental_enable_logs_route.to_string(),
enable_metrics_route.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
experimental_reduce_indexing_memory_usage.to_string(),
reduce_indexing_memory_usage.to_string(),
);
indexer_options.export_to_env();
}
@ -537,10 +489,7 @@ impl Opt {
}
pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures {
InstanceTogglableFeatures {
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
}
InstanceTogglableFeatures { metrics: self.experimental_enable_metrics }
}
}
@ -649,8 +598,8 @@ impl MaxMemory {
/// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> {
if sysinfo::IS_SUPPORTED_SYSTEM {
let memory_kind = RefreshKind::new().with_memory(MemoryRefreshKind::new().with_ram());
if System::IS_SUPPORTED {
let memory_kind = RefreshKind::new().with_memory();
let mut system = System::new_with_specifics(memory_kind);
system.refresh_memory();
Some(system.total_memory())
@ -778,10 +727,6 @@ fn default_http_payload_size_limit() -> Byte {
Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap()
}
fn default_limit_batched_tasks() -> usize {
usize::MAX
}
fn default_snapshot_dir() -> PathBuf {
PathBuf::from(DEFAULT_SNAPSHOT_DIR)
}

View File

@ -1,11 +1,11 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
@ -32,6 +32,6 @@ pub async fn create_dump(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create dump");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}

View File

@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::ActionPolicy;
@ -33,9 +33,8 @@ async fn get_features(
let features = index_scheduler.features();
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
let features = features.runtime_features();
debug!(returns = ?features, "Get features");
HttpResponse::Ok().json(features)
debug!("returns: {:?}", features.runtime_features());
HttpResponse::Ok().json(features.runtime_features())
}
#[derive(Debug, Deserr)]
@ -48,8 +47,6 @@ pub struct RuntimeTogglableFeatures {
#[deserr(default)]
pub metrics: Option<bool>,
#[deserr(default)]
pub logs_route: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>,
}
@ -63,14 +60,12 @@ async fn patch_features(
analytics: Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features();
debug!(parameters = ?new_features, "Patch features");
let old_features = features.runtime_features();
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
export_puffin_reports: new_features
.0
.export_puffin_reports
@ -84,7 +79,6 @@ async fn patch_features(
score_details,
vector_store,
metrics,
logs_route,
export_puffin_reports,
} = new_features;
@ -94,12 +88,10 @@ async fn patch_features(
"score_details": score_details,
"vector_store": vector_store,
"metrics": metrics,
"logs_route": logs_route,
"export_puffin_reports": export_puffin_reports,
}),
Some(&req),
);
index_scheduler.put_runtime_features(new_features)?;
debug!(returns = ?new_features, "Patch features");
Ok(HttpResponse::Ok().json(new_features))
}

View File

@ -3,11 +3,12 @@ use std::io::ErrorKind;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
use bstr::ByteSlice as _;
use bstr::ByteSlice;
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr;
use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
@ -27,7 +28,6 @@ use serde_json::Value;
use tempfile::tempfile;
use tokio::fs::File;
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
use tracing::debug;
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::error::MeilisearchHttpError;
@ -101,7 +101,6 @@ pub async fn get_document(
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner();
debug!(parameters = ?params, "Get document");
let index_uid = IndexUid::try_from(index_uid)?;
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
@ -111,7 +110,7 @@ pub async fn get_document(
let index = index_scheduler.index(&index_uid)?;
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
debug!(returns = ?document, "Get document");
debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document))
}
@ -132,7 +131,7 @@ pub async fn delete_document(
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete document");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -169,8 +168,9 @@ pub async fn documents_by_query_post(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with body: {:?}", body);
let body = body.into_inner();
debug!(parameters = ?body, "Get documents POST");
analytics.post_fetch_documents(
&DocumentFetchKind::Normal {
@ -191,7 +191,7 @@ pub async fn get_documents(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET");
debug!("called with params: {:?}", params);
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
@ -235,7 +235,7 @@ fn documents_by_query(
let ret = PaginationView::new(offset, limit, total as usize, documents);
debug!(returns = ?ret, "Get documents");
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
}
@ -271,7 +271,7 @@ pub async fn replace_documents(
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!(parameters = ?params, "Replace documents");
debug!("called with params: {:?}", params);
let params = params.into_inner();
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@ -288,7 +288,6 @@ pub async fn replace_documents(
allow_index_creation,
)
.await?;
debug!(returns = ?task, "Replace documents");
Ok(HttpResponse::Accepted().json(task))
}
@ -303,8 +302,8 @@ pub async fn update_documents(
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner();
debug!(parameters = ?params, "Update documents");
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@ -320,7 +319,6 @@ pub async fn update_documents(
allow_index_creation,
)
.await?;
debug!(returns = ?task, "Update documents");
Ok(HttpResponse::Accepted().json(task))
}
@ -429,10 +427,7 @@ async fn document_addition(
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
if e.kind() == ErrorKind::NotFound => {}
Err(e) => {
tracing::warn!(
index_uuid = %uuid,
"Unknown error happened while deleting a malformed update file: {e}"
);
log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
}
}
// We still want to return the original error to the end user.
@ -458,6 +453,7 @@ async fn document_addition(
}
};
debug!("returns: {:?}", task);
Ok(task.into())
}
@ -468,7 +464,7 @@ pub async fn delete_documents_batch(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by batch");
debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
@ -483,7 +479,7 @@ pub async fn delete_documents_batch(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete documents by batch");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -501,7 +497,7 @@ pub async fn delete_documents_by_filter(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by filter");
debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter;
@ -519,7 +515,7 @@ pub async fn delete_documents_by_filter(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete documents by filter");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -536,7 +532,7 @@ pub async fn clear_all_documents(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete all documents");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -616,8 +612,8 @@ fn retrieve_document<S: AsRef<str>>(
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index
.external_documents_ids()
.get(&txn, doc_id)?
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = index

View File

@ -2,20 +2,20 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use serde_json::Value;
use tracing::debug;
use crate::analytics::{Analytics, FacetSearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::search::{
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
@ -36,8 +36,6 @@ pub struct FacetSearchQuery {
pub q: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
pub vector: Option<Vec<f32>>,
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
pub hybrid: Option<HybridQuery>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
@ -56,7 +54,7 @@ pub async fn search(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner();
debug!(parameters = ?query, "Facet search");
debug!("facet search called with params: {:?}", query);
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
@ -83,7 +81,7 @@ pub async fn search(
let search_result = search_result?;
debug!(returns = ?search_result, "Facet search");
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
@ -97,7 +95,6 @@ impl From<FacetSearchQuery> for SearchQuery {
filter,
matching_strategy,
attributes_to_search_on,
hybrid,
} = value;
SearchQuery {
@ -122,7 +119,6 @@ impl From<FacetSearchQuery> for SearchQuery {
matching_strategy,
vector,
attributes_to_search_on,
hybrid,
}
}
}

View File

@ -5,6 +5,7 @@ use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
@ -15,7 +16,6 @@ use meilisearch_types::tasks::KindWithContent;
use serde::Serialize;
use serde_json::json;
use time::OffsetDateTime;
use tracing::debug;
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::Analytics;
@ -93,7 +93,6 @@ pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?paginate, "List indexes");
let filters = index_scheduler.filters();
let indexes: Vec<Option<IndexView>> =
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
@ -106,7 +105,7 @@ pub async fn list_indexes(
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
debug!(returns = ?ret, "List indexes");
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
}
@ -125,7 +124,6 @@ pub async fn create_index(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Create index");
let IndexCreateRequest { primary_key, uid } = body.into_inner();
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
@ -139,7 +137,6 @@ pub async fn create_index(
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create index");
Ok(HttpResponse::Accepted().json(task))
} else {
@ -180,7 +177,7 @@ pub async fn get_index(
let index = index_scheduler.index(&index_uid)?;
let index_view = IndexView::new(index_uid.into_inner(), &index)?;
debug!(returns = ?index_view, "Get index");
debug!("returns: {:?}", index_view);
Ok(HttpResponse::Ok().json(index_view))
}
@ -192,7 +189,7 @@ pub async fn update_index(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Update index");
debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner();
analytics.publish(
@ -209,7 +206,7 @@ pub async fn update_index(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Update index");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -221,7 +218,6 @@ pub async fn delete_index(
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete index");
Ok(HttpResponse::Accepted().json(task))
}
@ -259,6 +255,6 @@ pub async fn get_index_stats(
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
debug!(returns = ?stats, "Get index stats");
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
}

View File

@ -2,25 +2,23 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli;
use meilisearch_types::milli::vector::DistributionShift;
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::{debug, warn};
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
@ -76,31 +74,6 @@ pub struct SearchQueryGet {
matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)]
pub attributes_to_search_on: Option<CS<String>>,
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
pub hybrid_embedder: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
pub struct SemanticRatioGet(SemanticRatio);
impl std::convert::TryFrom<String> for SemanticRatioGet {
type Error = InvalidSearchSemanticRatio;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f32 = s.parse().map_err(|_| InvalidSearchSemanticRatio)?;
Ok(SemanticRatioGet(SemanticRatio::try_from(f)?))
}
}
impl std::ops::Deref for SemanticRatioGet {
type Target = SemanticRatio;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl From<SearchQueryGet> for SearchQuery {
@ -113,20 +86,6 @@ impl From<SearchQueryGet> for SearchQuery {
None => None,
};
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
(None, None) => None,
(None, Some(semantic_ratio)) => {
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None })
}
(Some(embedder), None) => Some(HybridQuery {
semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
embedder: Some(embedder),
}),
(Some(embedder), Some(semantic_ratio)) => {
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) })
}
};
Self {
q: other.q,
vector: other.vector.map(CS::into_inner),
@ -149,7 +108,6 @@ impl From<SearchQueryGet> for SearchQuery {
crop_marker: other.crop_marker,
matching_strategy: other.matching_strategy,
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
hybrid,
}
}
}
@ -186,7 +144,7 @@ pub async fn search_with_url_query(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Search get");
debug!("called with params: {:?}", params);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().into();
@ -200,12 +158,8 @@ pub async fn search_with_url_query(
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let distribution = embed(&mut query, index_scheduler.get_ref(), &index).await?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution))
.await?;
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
@ -213,7 +167,7 @@ pub async fn search_with_url_query(
let search_result = search_result?;
debug!(returns = ?search_result, "Search get");
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
@ -227,7 +181,7 @@ pub async fn search_with_post(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query = params.into_inner();
debug!(parameters = ?query, "Search post");
debug!("search called with params: {:?}", query);
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
@ -239,12 +193,8 @@ pub async fn search_with_post(
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let distribution = embed(&mut query, index_scheduler.get_ref(), &index).await?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution))
.await?;
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
@ -252,84 +202,10 @@ pub async fn search_with_post(
let search_result = search_result?;
debug!(returns = ?search_result, "Search post");
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
pub async fn embed(
query: &mut SearchQuery,
index_scheduler: &IndexScheduler,
index: &milli::Index,
) -> Result<Option<DistributionShift>, ResponseError> {
match (&query.hybrid, &query.vector, &query.q) {
(Some(HybridQuery { semantic_ratio: _, embedder }), None, Some(q))
if !q.trim().is_empty() =>
{
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let embedder = if let Some(embedder_name) = embedder {
embedders.get(embedder_name)
} else {
embedders.get_default()
};
let embedder = embedder
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
.map_err(milli::Error::from)?
.0;
let distribution = embedder.distribution();
let embeddings = embedder
.embed(vec![q.to_owned()])
.await
.map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?
.pop()
.expect("No vector returned from embedding");
if embeddings.iter().nth(1).is_some() {
warn!("Ignoring embeddings past the first one in long search query");
query.vector = Some(embeddings.iter().next().unwrap().to_vec());
} else {
query.vector = Some(embeddings.into_inner());
}
Ok(distribution)
}
(Some(hybrid), vector, _) => {
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let embedder = if let Some(embedder_name) = &hybrid.embedder {
embedders.get(embedder_name)
} else {
embedders.get_default()
};
let embedder = embedder
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
.map_err(milli::Error::from)?
.0;
if let Some(vector) = vector {
if vector.len() != embedder.dimensions() {
return Err(meilisearch_types::milli::Error::UserError(
meilisearch_types::milli::UserError::InvalidVectorDimensions {
expected: embedder.dimensions(),
found: vector.len(),
},
)
.into());
}
}
Ok(embedder.distribution())
}
_ => Ok(None),
}
}
#[cfg(test)]
mod test {
use super::*;

View File

@ -2,15 +2,14 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
@ -24,12 +23,12 @@ macro_rules! make_setting_route {
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse, Resource};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, Settings};
use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::policies::*;
use $crate::extractors::authentication::GuardedData;
@ -61,7 +60,7 @@ macro_rules! make_setting_route {
.await??
.into();
debug!(returns = ?task, "Delete settings");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -78,9 +77,7 @@ macro_rules! make_setting_route {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner();
debug!(parameters = ?body, "Update settings");
#[allow(clippy::redundant_closure_call)]
$analytics(&body, &req);
let new_settings = Settings {
@ -91,11 +88,6 @@ macro_rules! make_setting_route {
..Default::default()
};
let new_settings = $crate::routes::indexes::settings::validate_settings(
new_settings,
&index_scheduler,
)?;
let allow_index_creation =
index_scheduler.filters().allow_index_creation(&index_uid);
@ -110,7 +102,7 @@ macro_rules! make_setting_route {
.await??
.into();
debug!(returns = ?task, "Update settings");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -127,7 +119,7 @@ macro_rules! make_setting_route {
let rtxn = index.read_txn()?;
let settings = settings(&index, &rtxn)?;
debug!(returns = ?settings, "Update settings");
debug!("returns: {:?}", settings);
let mut json = serde_json::json!(&settings);
let val = json[$camelcase_attr].take();
@ -442,31 +434,6 @@ make_setting_route!(
}
);
make_setting_route!(
"/proximity-precision",
put,
meilisearch_types::settings::ProximityPrecisionView,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
>,
proximity_precision,
"proximityPrecision",
analytics,
|precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"ProximityPrecision Updated".to_string(),
json!({
"proximity_precision": {
"set": precision.is_some(),
"value": precision.unwrap_or_default(),
}
}),
Some(req),
);
}
);
make_setting_route!(
"/ranking-rules",
put,
@ -553,67 +520,6 @@ make_setting_route!(
}
);
make_setting_route!(
"/embedders",
patch,
std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
>,
embedders,
"embedders",
analytics,
|setting: &Option<std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>>, req: &HttpRequest| {
analytics.publish(
"Embedders Updated".to_string(),
serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}),
Some(req),
);
}
);
fn embedder_analytics(
setting: Option<
&std::collections::BTreeMap<
String,
Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
>,
>,
) -> serde_json::Value {
let mut sources = std::collections::HashSet::new();
if let Some(s) = &setting {
for source in s
.values()
.filter_map(|config| config.clone().set())
.filter_map(|config| config.source.set())
{
use meilisearch_types::milli::vector::settings::EmbedderSource;
match source {
EmbedderSource::OpenAi => sources.insert("openAi"),
EmbedderSource::HuggingFace => sources.insert("huggingFace"),
EmbedderSource::UserProvided => sources.insert("userProvided"),
};
}
};
let document_template_used = setting.as_ref().map(|map| {
map.values()
.filter_map(|config| config.clone().set())
.any(|config| config.document_template.set().is_some())
});
json!(
{
"total": setting.as_ref().map(|s| s.len()),
"sources": sources,
"document_template_used": document_template_used,
}
)
}
macro_rules! generate_configure {
($($mod:ident),*) => {
pub fn configure(cfg: &mut web::ServiceConfig) {
@ -634,7 +540,6 @@ generate_configure!(
displayed_attributes,
searchable_attributes,
distinct_attribute,
proximity_precision,
stop_words,
separator_tokens,
non_separator_tokens,
@ -643,8 +548,7 @@ generate_configure!(
ranking_rules,
typo_tolerance,
pagination,
faceting,
embedders
faceting
);
pub async fn update_all(
@ -657,8 +561,6 @@ pub async fn update_all(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let new_settings = body.into_inner();
debug!(parameters = ?new_settings, "Update all settings");
let new_settings = validate_settings(new_settings, &index_scheduler)?;
analytics.publish(
"Settings Updated".to_string(),
@ -691,10 +593,6 @@ pub async fn update_all(
"distinct_attribute": {
"set": new_settings.distinct_attribute.as_ref().set().is_some()
},
"proximity_precision": {
"set": new_settings.proximity_precision.as_ref().set().is_some(),
"value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
},
"typo_tolerance": {
"enabled": new_settings.typo_tolerance
.as_ref()
@ -754,7 +652,6 @@ pub async fn update_all(
"synonyms": {
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
},
"embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set())
}),
Some(&req),
);
@ -770,7 +667,7 @@ pub async fn update_all(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Update all settings");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -783,7 +680,7 @@ pub async fn get_all(
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let new_settings = settings(&index, &rtxn)?;
debug!(returns = ?new_settings, "Get all settings");
debug!("returns: {:?}", new_settings);
Ok(HttpResponse::Ok().json(new_settings))
}
@ -806,16 +703,6 @@ pub async fn delete_all(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete all settings");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
fn validate_settings(
settings: Settings<Unchecked>,
index_scheduler: &IndexScheduler,
) -> Result<Settings<Unchecked>, ResponseError> {
if matches!(settings.embedders, Setting::Set(_)) {
index_scheduler.features().check_vector("Passing `embedders` in settings")?
}
Ok(settings.validate()?)
}

View File

@ -1,281 +0,0 @@
use std::convert::Infallible;
use std::io::Write;
use std::ops::ControlFlow;
use std::pin::Pin;
use std::str::FromStr;
use std::sync::Arc;
use actix_web::web::{Bytes, Data};
use actix_web::{web, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use futures_util::Stream;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use tokio::sync::mpsc;
use tracing_subscriber::filter::Targets;
use tracing_subscriber::Layer;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::LogRouteHandle;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("stream")
.route(web::post().to(SeqHandler(get_logs)))
.route(web::delete().to(SeqHandler(cancel_logs))),
);
}
#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)]
#[deserr(rename_all = lowercase)]
pub enum LogMode {
#[default]
Fmt,
Profile,
}
/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Clone, Debug)]
struct MyTargets(Targets);
/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Debug, thiserror::Error)]
enum MyParseError {
#[error(transparent)]
ParseError(#[from] tracing_subscriber::filter::ParseError),
#[error(
"Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `info:meilisearch`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`"
)]
Example,
}
impl FromStr for MyTargets {
type Err = MyParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.is_empty() {
Err(MyParseError::Example)
} else {
Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?))
}
}
}
impl MergeWithError<MyParseError> for DeserrJsonError<BadRequest> {
fn merge(
_self_: Option<Self>,
other: MyParseError,
merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
Self::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: other.to_string() },
merge_location,
)
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub struct GetLogs {
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
target: MyTargets,
#[deserr(default, error = DeserrJsonError<BadRequest>)]
mode: LogMode,
#[deserr(default = false, error = DeserrJsonError<BadRequest>)]
profile_memory: bool,
}
fn validate_get_logs<E: DeserializeError>(
logs: GetLogs,
location: ValuePointerRef,
) -> Result<GetLogs, E> {
if logs.profile_memory && logs.mode != LogMode::Profile {
Err(deserr::take_cf_content(E::error::<Infallible>(
None,
ErrorKind::Unexpected {
msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode),
},
location,
)))
} else {
Ok(logs)
}
}
struct LogWriter {
sender: mpsc::UnboundedSender<Vec<u8>>,
}
impl Write for LogWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?;
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
struct HandleGuard {
/// We need to keep an handle on the logs to make it available again when the streamer is dropped
logs: Arc<LogRouteHandle>,
}
impl Drop for HandleGuard {
fn drop(&mut self) {
if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
}
}
fn byte_stream(
receiver: mpsc::UnboundedReceiver<Vec<u8>>,
guard: HandleGuard,
) -> impl futures_util::Stream<Item = Result<Bytes, ResponseError>> {
futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move {
let vec = receiver.recv().await;
vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard)))
})
}
type PinnedByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, ResponseError>>>>;
fn make_layer<
S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>,
>(
opt: &GetLogs,
logs: Data<LogRouteHandle>,
) -> (Box<dyn Layer<S> + Send + Sync>, PinnedByteStream) {
let guard = HandleGuard { logs: logs.into_inner() };
match opt.mode {
LogMode::Fmt => {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(move || LogWriter { sender: sender.clone() })
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE);
let stream = byte_stream(receiver, guard);
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
LogMode::Profile => {
let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory);
let stream = entry_stream(trace, guard);
(Box::new(layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
}
}
fn entry_stream(
trace: tracing_trace::Trace,
guard: HandleGuard,
) -> impl Stream<Item = Result<Bytes, ResponseError>> {
let receiver = trace.into_receiver();
let entry_buf = Vec::new();
futures_util::stream::unfold(
(receiver, entry_buf, guard),
move |(mut receiver, mut entry_buf, guard)| async move {
let mut bytes = Vec::new();
while bytes.len() < 8192 {
entry_buf.clear();
let Ok(count) = tokio::time::timeout(
std::time::Duration::from_secs(1),
receiver.recv_many(&mut entry_buf, 100),
)
.await
else {
break;
};
if count == 0 {
if !bytes.is_empty() {
break;
}
// channel closed, exit
return None;
}
for entry in &entry_buf {
if let Err(error) = serde_json::to_writer(&mut bytes, entry) {
tracing::error!(
error = &error as &dyn std::error::Error,
"deserializing entry"
);
return Some((
Err(ResponseError::from_msg(
format!("error deserializing entry: {error}"),
Code::Internal,
)),
(receiver, entry_buf, guard),
));
}
}
}
Some((Ok(bytes.into()), (receiver, entry_buf, guard)))
},
)
}
pub async fn get_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
body: AwebJson<GetLogs, DeserrJsonError>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
let opt = body.into_inner();
let mut stream = None;
logs.modify(|layer| match layer.inner_mut() {
None => {
// there is no one getting logs
*layer.filter_mut() = opt.target.0.clone();
let (new_layer, new_stream) = make_layer(&opt, logs.clone());
*layer.inner_mut() = Some(new_layer);
stream = Some(new_stream);
}
Some(_) => {
// there is already someone getting logs
}
})
.unwrap();
if let Some(stream) = stream {
Ok(HttpResponse::Ok().streaming(stream))
} else {
Err(MeilisearchHttpError::AlreadyUsedLogRoute.into())
}
}
pub async fn cancel_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
Ok(HttpResponse::NoContent().finish())
}

View File

@ -3,6 +3,7 @@ use std::collections::BTreeMap;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::settings::{Settings, Unchecked};
@ -10,7 +11,6 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
@ -22,7 +22,6 @@ mod api_key;
mod dump;
pub mod features;
pub mod indexes;
mod logs;
mod metrics;
mod multi_search;
mod snapshot;
@ -32,7 +31,6 @@ pub mod tasks;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::scope("/tasks").configure(tasks::configure))
.service(web::resource("/health").route(web::get().to(get_health)))
.service(web::scope("/logs").configure(logs::configure))
.service(web::scope("/keys").configure(api_key::configure))
.service(web::scope("/dumps").configure(dump::configure))
.service(web::scope("/snapshots").configure(snapshot::configure))
@ -252,7 +250,7 @@ async fn get_stats(
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
debug!(returns = ?stats, "Get stats");
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
}

View File

@ -3,17 +3,16 @@ use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::Serialize;
use tracing::debug;
use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::embed;
use crate::search::{
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
};
@ -47,51 +46,49 @@ pub async fn multi_search_with_post(
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
// changes.
let search_results: Result<_, (ResponseError, usize)> = async {
let mut search_results = Vec::with_capacity(queries.len());
for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
{
debug!(on_index = query_index, parameters = ?query, "Multi-search");
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
return Err(AuthenticationError::InvalidToken).with_index(query_index);
}
// Apply search rules from tenant token
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
let search_results: Result<_, (ResponseError, usize)> = (|| {
async {
let mut search_results = Vec::with_capacity(queries.len());
for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
{
add_search_rules(&mut query, search_rules);
debug!("multi-search #{query_index}: called with params: {:?}", query);
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
return Err(AuthenticationError::InvalidToken).with_index(query_index);
}
// Apply search rules from tenant token
if let Some(search_rules) =
index_scheduler.filters().get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let index = index_scheduler
.index(&index_uid)
.map_err(|err| {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err
})
.with_index(query_index)?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features))
.await
.with_index(query_index)?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
});
}
let index = index_scheduler
.index(&index_uid)
.map_err(|err| {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err
})
.with_index(query_index)?;
let distribution = embed(&mut query, index_scheduler.get_ref(), &index)
.await
.with_index(query_index)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, features, distribution)
})
.await
.with_index(query_index)?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
});
Ok(search_results)
}
Ok(search_results)
}
})()
.await;
if search_results.is_ok() {
@ -107,7 +104,7 @@ pub async fn multi_search_with_post(
err
})?;
debug!(returns = ?search_results, "Multi-search");
debug!("returns: {:?}", search_results);
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
}

View File

@ -1,10 +1,10 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
@ -27,6 +27,6 @@ pub async fn create_snapshot(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create snapshot");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}

View File

@ -8,9 +8,11 @@ use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
use meilisearch_types::tasks::{
serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
};
use serde::Serialize;
use serde_json::json;
use time::format_description::well_known::Rfc3339;
@ -35,6 +37,140 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskView {
pub uid: TaskId,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
#[serde(rename = "type")]
pub kind: Kind,
pub canceled_by: Option<TaskId>,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<DetailsView>,
pub error: Option<ResponseError>,
#[serde(serialize_with = "serialize_duration", default)]
pub duration: Option<Duration>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option", default)]
pub started_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
}
impl TaskView {
pub fn from_task(task: &Task) -> TaskView {
TaskView {
uid: task.uid,
index_uid: task.index_uid().map(ToOwned::to_owned),
status: task.status,
kind: task.kind.as_kind(),
canceled_by: task.canceled_by,
details: task.details.clone().map(DetailsView::from),
error: task.error.clone(),
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
}
}
}
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub received_documents: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexed_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub provided_ids: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub matched_tasks: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub canceled_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_filter: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub swaps: Option<Vec<IndexSwap>>,
}
impl From<Details> for DetailsView {
fn from(details: Details) -> Self {
match details {
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
DetailsView {
received_documents: Some(received_documents),
indexed_documents: Some(indexed_documents),
..DetailsView::default()
}
}
Details::SettingsUpdate { settings } => {
DetailsView { settings: Some(settings), ..DetailsView::default() }
}
Details::IndexInfo { primary_key } => {
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
}
Details::DocumentDeletion {
provided_ids: received_document_ids,
deleted_documents,
} => DetailsView {
provided_ids: Some(received_document_ids),
deleted_documents: Some(deleted_documents),
original_filter: Some(None),
..DetailsView::default()
},
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
DetailsView {
provided_ids: Some(0),
original_filter: Some(Some(original_filter)),
deleted_documents: Some(deleted_documents),
..DetailsView::default()
}
}
Details::ClearAll { deleted_documents } => {
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
}
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
canceled_tasks: Some(canceled_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
deleted_tasks: Some(deleted_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::Dump { dump_uid } => {
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
}
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
}
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TasksFilterQuery {

View File

@ -7,21 +7,24 @@ use deserr::Deserr;
use either::Either;
use index_scheduler::RoFeatures;
use indexmap::IndexMap;
use log::warn;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::DistributionShift;
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues};
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::{
dot_product_similarity, FacetValueHit, InternalError, OrderBy, SearchForFacetValues,
};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
use milli::{
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
SortError, TermsMatchingStrategy, VectorOrArrayOfVectors, DEFAULT_VALUES_PER_FACET,
};
use ordered_float::OrderedFloat;
use regex::Regex;
use serde::Serialize;
use serde_json::{json, Value};
@ -36,7 +39,6 @@ pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
#[derive(Debug, Clone, Default, PartialEq, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
@ -45,8 +47,6 @@ pub struct SearchQuery {
pub q: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
pub vector: Option<Vec<f32>>,
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
pub hybrid: Option<HybridQuery>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
@ -87,48 +87,6 @@ pub struct SearchQuery {
pub attributes_to_search_on: Option<Vec<String>>,
}
#[derive(Debug, Clone, Default, PartialEq, Deserr)]
#[deserr(error = DeserrJsonError<InvalidHybridQuery>, rename_all = camelCase, deny_unknown_fields)]
pub struct HybridQuery {
/// TODO validate that sementic ratio is between 0.0 and 1,0
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
pub semantic_ratio: SemanticRatio,
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
pub embedder: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
pub struct SemanticRatio(f32);
impl Default for SemanticRatio {
fn default() -> Self {
DEFAULT_SEMANTIC_RATIO()
}
}
impl std::convert::TryFrom<f32> for SemanticRatio {
type Error = InvalidSearchSemanticRatio;
fn try_from(f: f32) -> Result<Self, Self::Error> {
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
#[allow(clippy::manual_range_contains)]
if f > 1.0 || f < 0.0 {
Err(InvalidSearchSemanticRatio)
} else {
Ok(SemanticRatio(f))
}
}
}
impl std::ops::Deref for SemanticRatio {
type Target = f32;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl SearchQuery {
pub fn is_finite_pagination(&self) -> bool {
self.page.or(self.hits_per_page).is_some()
@ -148,8 +106,6 @@ pub struct SearchQueryWithIndex {
pub q: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub vector: Option<Vec<f32>>,
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
pub hybrid: Option<HybridQuery>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
@ -215,7 +171,6 @@ impl SearchQueryWithIndex {
crop_marker,
matching_strategy,
attributes_to_search_on,
hybrid,
} = self;
(
index_uid,
@ -241,7 +196,6 @@ impl SearchQueryWithIndex {
crop_marker,
matching_strategy,
attributes_to_search_on,
hybrid,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
@ -381,44 +335,19 @@ fn prepare_search<'t>(
rtxn: &'t RoTxn,
query: &'t SearchQuery,
features: RoFeatures,
distribution: Option<DistributionShift>,
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn);
if query.vector.is_some() {
features.check_vector("Passing `vector` as a query parameter")?;
if query.vector.is_some() && query.q.is_some() {
warn!("Ignoring the query string `q` when used with the `vector` parameter.");
}
if query.hybrid.is_some() {
features.check_vector("Passing `hybrid` as a query parameter")?;
}
if query.hybrid.is_none() && query.q.is_some() && query.vector.is_some() {
return Err(MeilisearchHttpError::MissingSearchHybrid);
}
search.distribution_shift(distribution);
if let Some(ref vector) = query.vector {
match &query.hybrid {
// If semantic ratio is 0.0, only the query search will impact the search results,
// skip the vector
Some(hybrid) if *hybrid.semantic_ratio == 0.0 => (),
_otherwise => {
search.vector(vector.clone());
}
}
search.vector(vector.clone());
}
if let Some(ref q) = query.q {
match &query.hybrid {
// If semantic ratio is 1.0, only the vector search will impact the search results,
// skip the query
Some(hybrid) if *hybrid.semantic_ratio == 1.0 => (),
_otherwise => {
search.query(q);
}
}
if let Some(ref query) = query.q {
search.query(query);
}
if let Some(ref searchable) = query.attributes_to_search_on {
@ -431,7 +360,6 @@ fn prepare_search<'t>(
let max_total_hits = index
.pagination_max_total_hits(rtxn)
.map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination);
@ -445,8 +373,8 @@ fn prepare_search<'t>(
features.check_score_details()?;
}
if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid {
search.embedder_name(embedder);
if query.vector.is_some() {
features.check_vector()?;
}
// compute the offset on the limit depending on the pagination mode.
@ -492,22 +420,15 @@ pub fn perform_search(
index: &Index,
query: SearchQuery,
features: RoFeatures,
distribution: Option<DistributionShift>,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, features, distribution)?;
prepare_search(index, &rtxn, &query, features)?;
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
match &query.hybrid {
Some(hybrid) => match *hybrid.semantic_ratio {
ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?,
ratio => search.execute_hybrid(ratio)?,
},
None => search.execute()?,
};
search.execute()?;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@ -616,17 +537,13 @@ pub fn perform_search(
insert_geo_distance(sort, &mut document);
}
let mut semantic_score = None;
for details in &score {
if let ScoreDetails::Vector(score_details::Vector {
target_vector: _,
value_similarity: Some((_matching_vector, similarity)),
}) = details
{
semantic_score = Some(*similarity);
break;
}
}
let semantic_score = match query.vector.as_ref() {
Some(vector) => match extract_field("_vectors", &fields_ids_map, obkv)? {
Some(vectors) => compute_semantic_score(vector, vectors)?,
None => None,
},
None => None,
};
let ranking_score =
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
@ -669,7 +586,6 @@ pub fn perform_search(
let max_values_by_facet = index
.max_values_per_facet(&rtxn)
.map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet);
@ -729,15 +645,11 @@ pub fn perform_facet_search(
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?;
let mut facet_search =
SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some());
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features)?;
let mut facet_search = SearchForFacetValues::new(facet_name, search);
if let Some(facet_query) = &facet_query {
facet_search.query(facet_query);
}
if let Some(max_facets) = index.max_values_per_facet(&rtxn)? {
facet_search.max_values(max_facets as usize);
}
Ok(FacetSearchResult {
facet_hits: facet_search.execute()?,
@ -762,6 +674,18 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
}
}
fn compute_semantic_score(query: &[f32], vectors: Value) -> milli::Result<Option<f32>> {
let vectors = serde_json::from_value(vectors)
.map(VectorOrArrayOfVectors::into_array_of_vectors)
.map_err(InternalError::SerdeJson)?;
Ok(vectors
.into_iter()
.flatten()
.map(|v| OrderedFloat(dot_product_similarity(query, &v)))
.max()
.map(OrderedFloat::into_inner))
}
fn compute_formatted_options(
attr_to_highlight: &HashSet<String>,
attr_to_crop: &[String],
@ -889,6 +813,22 @@ fn make_document(
Ok(document)
}
/// Extract the JSON value under the field name specified
/// but doesn't support nested objects.
fn extract_field(
field_name: &str,
field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16,
) -> Result<Option<serde_json::Value>, MeilisearchHttpError> {
match field_ids_map.id(field_name) {
Some(fid) => match obkv.get(fid) {
Some(value) => Ok(serde_json::from_slice(value).map(Some)?),
None => Ok(None),
},
None => Ok(None),
}
}
fn format_fields<'a>(
document: &Document,
field_ids_map: &FieldsIdsMap,
@ -900,14 +840,6 @@ fn format_fields<'a>(
let mut matches_position = compute_matches.then(BTreeMap::new);
let mut document = document.clone();
// reduce the formatted option list to the attributes that should be formatted,
// instead of all the attributes to display.
let formatting_fields_options: Vec<_> = formatted_options
.iter()
.filter(|(_, option)| option.should_format())
.map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
.collect();
// select the attributes to retrieve
let displayable_names =
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
@ -916,15 +848,13 @@ fn format_fields<'a>(
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
// highlighted.
// Warn: The time to compute the format list scales with the number of fields to format;
// cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
// d*f where d is the total number of fields to display and f is the total number of fields to format.
let format = formatting_fields_options
let format = formatted_options
.iter()
.filter(|(name, _option)| {
.filter(|(field, _option)| {
let name = field_ids_map.name(**field).unwrap();
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
})
.map(|(_, option)| **option)
.map(|(_, option)| *option)
.reduce(|acc, option| acc.merge(option));
let mut infos = Vec::new();
@ -1021,7 +951,7 @@ fn format_value<'a>(
let value = matcher.format(format_options);
Value::String(value.into_owned())
}
None => Value::String(s),
None => Value::Number(number),
}
}
value => value,

View File

@ -59,8 +59,6 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"},
("GET", "/version") => hashset!{"version", "*"},
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},

View File

@ -5,11 +5,9 @@ pub mod service;
use std::fmt::{self, Display};
#[allow(unused)]
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
use meili_snap::json_string;
use serde::{Deserialize, Serialize};
#[allow(unused)]
pub use server::{default_settings, Server};
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
@ -64,7 +62,7 @@ impl Display for Value {
write!(
f,
"{}",
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
json_string!(self, { ".enqueuedAt" => "[date]", ".processedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
)
}
}

View File

@ -13,8 +13,6 @@ use meilisearch::{analytics, create_app, setup_meilisearch};
use once_cell::sync::Lazy;
use tempfile::TempDir;
use tokio::time::sleep;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use super::index::Index;
use super::service::Service;
@ -83,16 +81,10 @@ impl Server {
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
> {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
actix_web::test::init_service(create_app(
self.service.index_scheduler.clone().into(),
self.service.auth.clone().into(),
self.service.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&self.service.options),
true,
))

View File

@ -7,8 +7,6 @@ use actix_web::test::TestRequest;
use index_scheduler::IndexScheduler;
use meilisearch::{analytics, create_app, Opt};
use meilisearch_auth::AuthController;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use crate::common::encoder::Encoder;
use crate::common::Value;
@ -107,16 +105,10 @@ impl Service {
}
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let app = test::init_service(create_app(
self.index_scheduler.clone().into(),
self.auth.clone().into(),
self.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&self.options),
true,
))

View File

@ -1760,181 +1760,6 @@ async fn add_documents_invalid_geo_field() {
"finishedAt": "[date]"
}
"###);
// The three next tests are related to #4333
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "12",
"_geo": { "lng": null, "lat": 67}
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 14,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse longitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "12",
"_geo": { "lng": 35, "lat": null }
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 15,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "13",
"_geo": { "lng": null, "lat": null }
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 16,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude nor longitude in the document with the id: `13`. Was expecting finite numbers but instead got `null` and `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
// Related to #4333
#[actix_rt::test]
async fn add_invalid_geo_and_then_settings() {
let server = Server::new().await;
let index = server.index("test");
index.create(Some("id")).await;
// _geo is not an object
let documents = json!([
{
"id": "11",
"_geo": { "lat": null, "lng": null },
}
]);
let (ret, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let ret = index.wait_task(ret.uid()).await;
snapshot!(ret, @r###"
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (ret, code) = index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
snapshot!(code, @"202 Accepted");
let ret = index.wait_task(ret.uid()).await;
snapshot!(ret, @r###"
{
"uid": 2,
"indexUid": "test",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"sortableAttributes": [
"_geo"
]
},
"error": {
"message": "Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]

View File

@ -397,7 +397,7 @@ async fn delete_document_by_complex_filter() {
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 2,
"deletedDocuments": 4,
"originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
},
"error": null,

View File

@ -20,8 +20,6 @@ pub enum GetDump {
RubyGemsWithSettingsV4,
TestV5,
TestV6WithExperimental,
}
impl GetDump {
@ -70,10 +68,6 @@ impl GetDump {
GetDump::TestV5 => {
exist_relative_path!("tests/assets/v5_v0.28.0_test_dump.dump").into()
}
GetDump::TestV6WithExperimental => exist_relative_path!(
"tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump"
)
.into(),
}
}
}

View File

@ -59,7 +59,6 @@ async fn import_dump_v1_movie_raw() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -220,7 +219,6 @@ async fn import_dump_v1_movie_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -367,7 +365,6 @@ async fn import_dump_v1_rubygems_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -500,7 +497,6 @@ async fn import_dump_v2_movie_raw() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -645,7 +641,6 @@ async fn import_dump_v2_movie_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -789,7 +784,6 @@ async fn import_dump_v2_rubygems_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -922,7 +916,6 @@ async fn import_dump_v3_movie_raw() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1067,7 +1060,6 @@ async fn import_dump_v3_movie_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1211,7 +1203,6 @@ async fn import_dump_v3_rubygems_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1344,7 +1335,6 @@ async fn import_dump_v4_movie_raw() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1489,7 +1479,6 @@ async fn import_dump_v4_movie_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1633,7 +1622,6 @@ async fn import_dump_v4_rubygems_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1822,108 +1810,3 @@ async fn import_dump_v5() {
json_string!(tasks, { ".results[].details.dumpUid" => "[uid]", ".results[].duration" => "[duration]" , ".results[].startedAt" => "[date]" , ".results[].finishedAt" => "[date]" })
);
}
#[actix_rt::test]
async fn import_dump_v6_containing_experimental_features() {
let temp = tempfile::tempdir().unwrap();
let options = Opt {
import_dump: Some(GetDump::TestV6WithExperimental.path()),
..default_settings(temp.path())
};
let mut server = Server::new_auth_with_options(options, temp).await;
server.use_api_key("MASTER_KEY");
let (indexes, code) = server.list_indexes(None, None).await;
assert_eq!(code, 200, "{indexes}");
assert_eq!(indexes["results"].as_array().unwrap().len(), 1);
assert_eq!(indexes["results"][0]["uid"], json!("movies"));
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
let (response, code) = server.get_features().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
let index = server.index("movies");
let (response, code) = index.settings().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity"
],
"stopWords": [],
"nonSeparatorTokens": [],
"separatorTokens": [],
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byAttribute",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100,
"sortFacetValuesBy": {
"*": "alpha"
}
},
"pagination": {
"maxTotalHits": 1000
}
}
"###);
// the expected order is [1, 3, 2] instead of [3, 1, 2]
// because the attribute scale doesn't make the difference between 1 and 3.
index
.search(json!({"q": "the soup of day"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish"
}
]
"###);
})
.await;
}

View File

@ -21,7 +21,6 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -34,7 +33,6 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -47,7 +45,6 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -61,7 +58,6 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -75,7 +71,6 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -96,7 +91,6 @@ async fn experimental_feature_metrics() {
"scoreDetails": false,
"vectorStore": false,
"metrics": true,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -152,7 +146,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -5,7 +5,6 @@ mod documents;
mod dumps;
mod features;
mod index;
mod logs;
mod search;
mod settings;
mod snapshot;

View File

@ -1,182 +0,0 @@
use meili_snap::*;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn logs_stream_bad_target() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Our help message
let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `info:meilisearch`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// An error from the target parser
let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_mode() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong value
let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `tamo` at `.mode`: expected one of `fmt`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_profile_memory() {
let server = Server::new().await;
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Used with default parameters
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Fmt mode.",
"code": "invalid_settings_typo_tolerance",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance"
}
"###);
// Used with an unsupported mode
let (response, code) =
server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Fmt mode.",
"code": "invalid_settings_typo_tolerance",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_without_enabling_the_route() {
let server = Server::new().await;
let (response, code) = server.service.post("/logs/stream", json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.service.delete("/logs/stream").await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View File

@ -1,92 +0,0 @@
mod error;
use std::rc::Rc;
use std::str::FromStr;
use actix_web::http::header::ContentType;
use meili_snap::snapshot;
use meilisearch::{analytics, create_app, Opt};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Layer;
use crate::common::{default_settings, Server};
use crate::json;
#[actix_web::test]
async fn basic_test_log_stream_route() {
let db_path = tempfile::tempdir().unwrap();
let server = Server::new_with_options(Opt {
experimental_enable_logs_route: true,
..default_settings(db_path.path())
})
.await
.unwrap();
let (route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let subscriber = tracing_subscriber::registry().with(route_layer).with(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("INFO").unwrap()),
);
let app = actix_web::test::init_service(create_app(
server.service.index_scheduler.clone().into(),
server.service.auth.clone().into(),
server.service.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&server.service.options),
true,
))
.await;
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
let app = Rc::new(app);
// First, we start listening on the `/logs/stream` route
let handle_app = app.clone();
let handle = tokio::task::spawn_local(async move {
let req = actix_web::test::TestRequest::post()
.uri("/logs/stream")
.insert_header(ContentType::json())
.set_payload(
serde_json::to_vec(&json!({
"mode": "fmt",
"target": "info",
}))
.unwrap(),
);
let req = req.to_request();
let ret = actix_web::test::call_service(&*handle_app, req).await;
actix_web::test::read_body(ret).await
});
// We're going to create an index to get at least one info log saying we processed a batch of task
let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await;
snapshot!(ret, @r###"
{
"taskUid": 0,
"indexUid": "tamo",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
server.wait_task(ret.uid()).await;
let req = actix_web::test::TestRequest::delete().uri("/logs/stream");
let req = req.to_request();
let ret = actix_web::test::call_service(&*app, req).await;
let code = ret.status();
snapshot!(code, @"204 No Content");
let logs = handle.await.unwrap();
let logs = String::from_utf8(logs.to_vec()).unwrap();
assert!(logs.contains("INFO"), "{logs}");
}

View File

@ -4,111 +4,23 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value};
use crate::json;
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 1,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": "Brown"
},
{
"id": 2,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": "Black"
},
{
"id": 3,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": "Blue"
},
{
"id": 4,
"description": "T-Shirt",
"brand": "Nike",
"product_id": "789012",
"color": "Red"
},
{
"id": 5,
"description": "T-Shirt",
"brand": "Nike",
"product_id": "789012",
"color": "Blue"
},
{
"id": 6,
"description": "Running Shoes",
"brand": "Adidas",
"product_id": "456789",
"color": "Black"
},
{
"id": 7,
"description": "Running Shoes",
"brand": "Adidas",
"product_id": "456789",
"color": "White"
},
{
"id": 8,
"description": "Hoodie",
"brand": "Puma",
"product_id": "987654",
"color": "Gray"
},
{
"id": 9,
"description": "Sweater",
"brand": "Gap",
"product_id": "234567",
"color": "Green"
},
{
"id": 10,
"description": "Sweater",
"brand": "Gap",
"product_id": "234567",
"color": "Red"
},
{
"id": 11,
"description": "Sweater",
"brand": "Gap",
"product_id": "234567",
"color": "Blue"
},
{
"id": 12,
"description": "Jeans",
"brand": "Levi's",
"product_id": "345678",
"color": "Indigo"
},
{
"id": 13,
"description": "Jeans",
"brand": "Levi's",
"product_id": "345678",
"color": "Black"
},
{
"id": 14,
"description": "Jeans",
"brand": "Levi's",
"product_id": "345678",
"color": "Stone Wash"
}
{"productId": 1, "shopId": 1},
{"productId": 2, "shopId": 1},
{"productId": 3, "shopId": 2},
{"productId": 4, "shopId": 2},
{"productId": 5, "shopId": 3},
{"productId": 6, "shopId": 3},
{"productId": 7, "shopId": 4},
{"productId": 8, "shopId": 4},
{"productId": 9, "shopId": 5},
{"productId": 10, "shopId": 5}
])
});
static DOCUMENT_PRIMARY_KEY: &str = "id";
static DOCUMENT_DISTINCT_KEY: &str = "product_id";
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
#[actix_rt::test]
@ -121,121 +33,31 @@ async fn distinct_search_with_offset_no_ranking() {
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
index.wait_task(1).await;
fn get_hits(response: &Value) -> Vec<&str> {
fn get_hits(Value(response): Value) -> Vec<i64> {
let hits_array = response["hits"].as_array().unwrap();
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
}
let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
let hits = get_hits(&response);
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
snapshot!(response["estimatedTotalHits"] , @"11");
snapshot!(format!("{:?}", hits), @"[1, 2]");
let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
let hits = get_hits(&response);
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
snapshot!(response["estimatedTotalHits"], @"10");
snapshot!(format!("{:?}", hits), @"[3, 4]");
let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
snapshot!(response["estimatedTotalHits"], @"6");
let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
let hits = get_hits(&response);
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"1");
snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
snapshot!(response["estimatedTotalHits"], @"6");
snapshot!(format!("{:?}", hits), @"[5]");
let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
let hits = get_hits(&response);
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"0");
snapshot!(format!("{:?}", hits), @r#"[]"#);
snapshot!(response["estimatedTotalHits"], @"6");
let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"0");
snapshot!(format!("{:?}", hits), @r#"[]"#);
snapshot!(response["estimatedTotalHits"], @"6");
}
/// testing: https://github.com/meilisearch/meilisearch/issues/4130
#[actix_rt::test]
async fn distinct_search_with_pagination_no_ranking() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
index.wait_task(1).await;
fn get_hits(response: &Value) -> Vec<&str> {
let hits_array = response["hits"].as_array().unwrap();
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
}
let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"0");
snapshot!(format!("{:?}", hits), @r#"[]"#);
snapshot!(response["page"], @"0");
snapshot!(response["totalPages"], @"3");
snapshot!(response["totalHits"], @"6");
let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
snapshot!(response["page"], @"1");
snapshot!(response["totalPages"], @"3");
snapshot!(response["totalHits"], @"6");
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
snapshot!(response["page"], @"2");
snapshot!(response["totalPages"], @"3");
snapshot!(response["totalHits"], @"6");
let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
snapshot!(response["page"], @"3");
snapshot!(response["totalPages"], @"3");
snapshot!(response["totalHits"], @"6");
let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"0");
snapshot!(format!("{:?}", hits), @r#"[]"#);
snapshot!(response["page"], @"4");
snapshot!(response["totalPages"], @"3");
snapshot!(response["totalHits"], @"6");
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"3");
snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
snapshot!(response["page"], @"2");
snapshot!(response["totalPages"], @"2");
snapshot!(response["totalHits"], @"6");
}

View File

@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value};
use crate::json;
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
@ -105,24 +105,6 @@ async fn more_advanced_facet_search() {
snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
}
#[actix_rt::test]
async fn simple_facet_search_with_max_values() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await;
index.update_settings_filterable_attributes(json!(["genres"])).await;
index.add_documents(documents, None).await;
index.wait_task(2).await;
let (response, code) =
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
assert_eq!(code, 200, "{}", response);
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
}
#[actix_rt::test]
async fn non_filterable_facet_search_error() {
let server = Server::new().await;

View File

@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value};
use crate::json;
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 1,

View File

@ -1,176 +0,0 @@
use meili_snap::snapshot;
use once_cell::sync::Lazy;
use crate::common::index::Index;
use crate::common::{Server, Value};
use crate::json;
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
let (response, code) = index
.update_settings(json!({ "embedders": {"default": {
"source": "userProvided",
"dimensions": 2}}} ))
.await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
index
}
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
"_vectors": {"default": [1.0, 3.0]},
},
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
"_vectors": {"default": [1.0, 2.0]},
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
"_vectors": {"default": [2.0, 3.0]},
}])
});
static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
json!([{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
"_vectors": {"default": [1.0, 3.0]},
}])
});
#[actix_rt::test]
async fn simple_search() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]}}]"###);
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###);
}
#[actix_rt::test]
async fn invalid_semantic_ratio() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.hybrid.semanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
"code": "invalid_search_semantic_ratio",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
}
"###);
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.hybrid.semanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
"code": "invalid_search_semantic_ratio",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
}
"###);
let (response, code) = index
.search_get(
&yaup::to_string(
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}),
)
.unwrap(),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value in parameter `hybridSemanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
"code": "invalid_search_semantic_ratio",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
}
"###);
let (response, code) = index
.search_get(
&yaup::to_string(
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}),
)
.unwrap(),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value in parameter `hybridSemanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
"code": "invalid_search_semantic_ratio",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
}
"###);
}
#[actix_rt::test]
async fn single_document() {
let server = Server::new().await;
let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
let (response, code) = index
.search_post(
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
}

Some files were not shown because too many files have changed in this diff Show More