Compare commits

..

2 Commits

Author SHA1 Message Date
ManyTheFish
b06a7a4861 Update tests 2023-12-20 14:54:44 +01:00
ManyTheFish
8cc2bc4e17 CHange prefix treasholds to reduce their impact on the indexing time 2023-12-20 14:54:34 +01:00
139 changed files with 3056 additions and 6263 deletions

View File

@@ -1,2 +0,0 @@
[alias]
xtask = "run --package xtask --"

View File

@@ -27,23 +27,6 @@ Related spec: WIP
- [ ] If prototype validated, merge changes into `main` - [ ] If prototype validated, merge changes into `main`
- [ ] Update the spec - [ ] Update the spec
### Reminders when modifying the Setting API
<!--- Special steps to remind when adding a new index setting -->
- [ ] Ensure the new setting route is at least tested by the [`test_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/tests/settings/get_settings.rs#L276)
- [ ] Ensure Analytics are fully implemented
- [ ] `/settings/my-new-setting` configurated in the [`make_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L141-L165)
- [ ] global `/settings` route configurated in the [`update_all` function](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L655-L751)
- [ ] Ensure the dump serializing is consistent with the `/settings` route serializing, e.g., enums case can be different (`camelCase` in route and `PascalCase` in the dump)
#### Special cases when adding a setting for an experimental feature
- [ ] ⚠️ API stability: The setting does not appear on the main settings route when the feature has never been enabled (e.g. mark it `Unset` when returned from the index in this situation. See [an example](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch-types/src/settings.rs#L608))
- [ ] The setting cannot be set when the feature is disabled, either by the main settings route or the subroute (see [`validate_settings` function](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch/src/routes/indexes/settings.rs#L811))
- [ ] If possible, the setting is reset when the feature is disabled (hard if it requires reindexing)
## Impacted teams ## Impacted teams
<!---Ping the related teams. Ask for the engine manager if any hesitation--> <!---Ping the related teams. Ask for the engine manager if any hesitation-->
<!---@meilisearch/docs-team when there is any API change, e.g. settings addition-->

View File

@@ -97,7 +97,7 @@ jobs:
- name: Send CI information to Cloud team - name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event) # Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: github.event_name == 'push' if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@v3 uses: peter-evans/repository-dispatch@v2
with: with:
token: ${{ secrets.MEILI_BOT_GH_PAT }} token: ${{ secrets.MEILI_BOT_GH_PAT }}
repository: meilisearch/meilisearch-cloud repository: meilisearch/meilisearch-cloud

View File

@@ -22,7 +22,7 @@ jobs:
outputs: outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }} docker-image: ${{ steps.define-image.outputs.docker-image }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- name: Define the Docker image we need to use - name: Define the Docker image we need to use
id: define-image id: define-image
run: | run: |
@@ -46,11 +46,11 @@ jobs:
MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }} MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-dotnet repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core - name: Setup .NET Core
uses: actions/setup-dotnet@v4 uses: actions/setup-dotnet@v3
with: with:
dotnet-version: "6.0.x" dotnet-version: "6.0.x"
- name: Install dependencies - name: Install dependencies
@@ -75,12 +75,12 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-dart repository: meilisearch/meilisearch-dart
- uses: dart-lang/setup-dart@v1 - uses: dart-lang/setup-dart@v1
with: with:
sdk: 'latest' sdk: 3.1.1
- name: Install dependencies - name: Install dependencies
run: dart pub get run: dart pub get
- name: Run integration tests - name: Run integration tests
@@ -100,10 +100,10 @@ jobs:
- '7700:7700' - '7700:7700'
steps: steps:
- name: Set up Go - name: Set up Go
uses: actions/setup-go@v5 uses: actions/setup-go@v4
with: with:
go-version: stable go-version: stable
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-go repository: meilisearch/meilisearch-go
- name: Get dependencies - name: Get dependencies
@@ -129,11 +129,11 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-java repository: meilisearch/meilisearch-java
- name: Set up Java - name: Set up Java
uses: actions/setup-java@v4 uses: actions/setup-java@v3
with: with:
java-version: 8 java-version: 8
distribution: 'zulu' distribution: 'zulu'
@@ -156,7 +156,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-js repository: meilisearch/meilisearch-js
- name: Setup node - name: Setup node
@@ -191,7 +191,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-php repository: meilisearch/meilisearch-php
- name: Install PHP - name: Install PHP
@@ -220,11 +220,11 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-python repository: meilisearch/meilisearch-python
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v5 uses: actions/setup-python@v4
- name: Install pipenv - name: Install pipenv
uses: dschep/install-pipenv-action@v1 uses: dschep/install-pipenv-action@v1
- name: Install dependencies - name: Install dependencies
@@ -245,7 +245,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-ruby repository: meilisearch/meilisearch-ruby
- name: Set up Ruby 3 - name: Set up Ruby 3
@@ -270,7 +270,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-rust repository: meilisearch/meilisearch-rust
- name: Build - name: Build
@@ -291,7 +291,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-swift repository: meilisearch/meilisearch-swift
- name: Run tests - name: Run tests
@@ -314,7 +314,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-js-plugins repository: meilisearch/meilisearch-js-plugins
- name: Setup node - name: Setup node
@@ -345,7 +345,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-rails repository: meilisearch/meilisearch-rails
- name: Set up Ruby 3 - name: Set up Ruby 3
@@ -369,7 +369,7 @@ jobs:
ports: ports:
- '7700:7700' - '7700:7700'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v3
with: with:
repository: meilisearch/meilisearch-symfony repository: meilisearch/meilisearch-symfony
- name: Install PHP - name: Install PHP

View File

@@ -66,10 +66,6 @@ jobs:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Cache dependencies - name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1 uses: Swatinem/rust-cache@v2.7.1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo check without any default features - name: Run cargo check without any default features
uses: actions-rs/cargo@v1 uses: actions-rs/cargo@v1
with: with:
@@ -82,7 +78,7 @@ jobs:
args: --locked --release --all args: --locked --release --all
test-all-features: test-all-features:
name: Tests almost all features name: Tests all features
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
@@ -98,12 +94,16 @@ jobs:
with: with:
toolchain: stable toolchain: stable
override: true override: true
- name: Run cargo build with almost all features - name: Run cargo build with all features
run: | uses: actions-rs/cargo@v1
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" with:
- name: Run cargo test with almost all features command: build
run: | args: --workspace --locked --release --all-features
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" - name: Run cargo test with all features
uses: actions-rs/cargo@v1
with:
command: test
args: --workspace --locked --release --all-features
test-disabled-tokenization: test-disabled-tokenization:
name: Test disabled tokenization name: Test disabled tokenization
@@ -164,7 +164,7 @@ jobs:
- uses: actions-rs/toolchain@v1 - uses: actions-rs/toolchain@v1
with: with:
profile: minimal profile: minimal
toolchain: 1.75.0 toolchain: 1.71.1
override: true override: true
components: clippy components: clippy
- name: Cache dependencies - name: Cache dependencies

View File

@@ -75,12 +75,6 @@ If you get a "Too many open files" error you might want to increase the open fil
ulimit -Sn 3000 ulimit -Sn 3000
``` ```
#### Build tools
Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) workflow to provide some build tools.
Run `cargo xtask --help` from the root of the repository to find out what is available.
## Git Guidelines ## Git Guidelines
### Git Branches ### Git Branches

1964
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -16,16 +16,11 @@ members = [
"json-depth-checker", "json-depth-checker",
"benchmarks", "benchmarks",
"fuzzers", "fuzzers",
"tracing-trace",
"xtask",
] ]
[workspace.package] [workspace.package]
version = "1.7.0" version = "1.6.0"
authors = [ authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
]
description = "Meilisearch HTTP server" description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com" homepage = "https://meilisearch.com"
readme = "README.md" readme = "README.md"

View File

@@ -1,5 +1,5 @@
# Compile # Compile
FROM rust:1.75.0-alpine3.18 AS compiler FROM rust:1.71.1-alpine3.18 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev RUN apk add -q --update-cache --no-cache build-base openssl-dev

View File

@@ -1,6 +1,6 @@
MIT License MIT License
Copyright (c) 2019-2024 Meili SAS Copyright (c) 2019-2022 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View File

@@ -41,10 +41,10 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
## ✨ Features ## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds - **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results - **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet - **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling - **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
@@ -61,6 +61,8 @@ You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide. For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
## ⚡ Supercharge your Meilisearch experience ## ⚡ Supercharge your Meilisearch experience
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required. Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
@@ -99,7 +101,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions) - For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)! - Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact) - Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
Thank you for your support! Thank you for your support!

View File

@@ -106,7 +106,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "meilisearch_index_count{job=\"$job\", instance=\"$instance\"}", "expr": "meilisearch_index_count{job=\"meilisearch\", instance=\"$instance\"}",
"interval": "", "interval": "",
"legendFormat": "", "legendFormat": "",
"range": true, "range": true,
@@ -165,7 +165,7 @@
"type": "prometheus" "type": "prometheus"
}, },
"editorMode": "builder", "editorMode": "builder",
"expr": "meilisearch_index_docs_count{job=\"$job\", index=\"$Index\", instance=\"$instance\"}", "expr": "meilisearch_index_docs_count{job=\"meilisearch\", index=\"$Index\", instance=\"$instance\"}",
"hide": false, "hide": false,
"range": true, "range": true,
"refId": "A" "refId": "A"
@@ -228,7 +228,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[1h]))", "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))",
"interval": "", "interval": "",
"legendFormat": "", "legendFormat": "",
"range": true, "range": true,
@@ -288,7 +288,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[24h]))", "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))",
"interval": "", "interval": "",
"legendFormat": "", "legendFormat": "",
"range": true, "range": true,
@@ -348,7 +348,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[30d]))", "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))",
"interval": "", "interval": "",
"legendFormat": "", "legendFormat": "",
"range": true, "range": true,
@@ -447,7 +447,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "meilisearch_db_size_bytes{job=\"$job\", instance=\"$instance\"}", "expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
"interval": "", "interval": "",
"legendFormat": "Database size on disk", "legendFormat": "Database size on disk",
"range": true, "range": true,
@@ -458,7 +458,7 @@
"type": "prometheus" "type": "prometheus"
}, },
"editorMode": "builder", "editorMode": "builder",
"expr": "meilisearch_used_db_size_bytes{job=\"$job\", instance=\"$instance\"}", "expr": "meilisearch_used_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
"hide": false, "hide": false,
"legendFormat": "Used bytes", "legendFormat": "Used bytes",
"range": true, "range": true,
@@ -553,7 +553,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"$job\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])", "expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])",
"interval": "", "interval": "",
"legendFormat": "{{method}} {{path}}", "legendFormat": "{{method}} {{path}}",
"range": true, "range": true,
@@ -646,7 +646,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"$job\"}[5m])", "expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])",
"interval": "", "interval": "",
"legendFormat": "{{method}} {{path}}", "legendFormat": "{{method}} {{path}}",
"range": true, "range": true,
@@ -744,7 +744,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"$job\"}[30s]))", "expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))",
"format": "heatmap", "format": "heatmap",
"interval": "", "interval": "",
"legendFormat": "{{le}}", "legendFormat": "{{le}}",
@@ -854,7 +854,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"statuses\"}", "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"statuses\"}",
"interval": "", "interval": "",
"legendFormat": "{{value}} ", "legendFormat": "{{value}} ",
"range": true, "range": true,
@@ -947,7 +947,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"types\"}", "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"types\"}",
"interval": "", "interval": "",
"legendFormat": "{{value}} ", "legendFormat": "{{value}} ",
"range": true, "range": true,
@@ -1040,7 +1040,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"indexes\"}", "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"indexes\"}",
"interval": "", "interval": "",
"legendFormat": "{{value}} ", "legendFormat": "{{value}} ",
"range": true, "range": true,
@@ -1161,7 +1161,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "rate(process_cpu_seconds_total{job=\"$job\", instance=\"$instance\"}[1m])", "expr": "rate(process_cpu_seconds_total{job=\"meilisearch\", instance=\"$instance\"}[1m])",
"interval": "", "interval": "",
"legendFormat": "process", "legendFormat": "process",
"range": true, "range": true,
@@ -1264,7 +1264,7 @@
}, },
"editorMode": "builder", "editorMode": "builder",
"exemplar": true, "exemplar": true,
"expr": "process_resident_memory_bytes{job=\"$job\", instance=\"$instance\"} / 1024 / 1024", "expr": "process_resident_memory_bytes{job=\"meilisearch\", instance=\"$instance\"} / 1024 / 1024",
"interval": "", "interval": "",
"legendFormat": "process", "legendFormat": "process",
"range": true, "range": true,
@@ -1342,33 +1342,6 @@
"skipUrlSync": false, "skipUrlSync": false,
"sort": 0, "sort": 0,
"type": "query" "type": "query"
},
{
"current": {
"selected": true,
"text": "meilisearch",
"value": "meilisearch"
},
"datasource": {
"type": "prometheus"
},
"definition": "label_values(job)",
"description": "Prometheus job_name from scrape config (default is meilisearch)",
"hide": 0,
"includeAll": false,
"label": "Job",
"multi": false,
"name": "job",
"options": [],
"query": {
"query": "label_values(job)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
} }
] ]
}, },

View File

@@ -11,24 +11,24 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.79" anyhow = "1.0.70"
csv = "1.3.0" csv = "1.2.1"
milli = { path = "../milli" } milli = { path = "../milli" }
mimalloc = { version = "0.1.39", default-features = false } mimalloc = { version = "0.1.37", default-features = false }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
[dev-dependencies] [dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] } criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5" rand = "0.8.5"
rand_chacha = "0.3.1" rand_chacha = "0.3.1"
roaring = "0.10.2" roaring = "0.10.1"
[build-dependencies] [build-dependencies]
anyhow = "1.0.79" anyhow = "1.0.70"
bytes = "1.5.0" bytes = "1.4.0"
convert_case = "0.6.0" convert_case = "0.6.0"
flate2 = "1.0.28" flate2 = "1.0.25"
reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false } reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
[features] [features]
default = ["milli/all-tokenizations"] default = ["milli/all-tokenizations"]

View File

@@ -11,22 +11,22 @@ readme.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.79" anyhow = "1.0.70"
flate2 = "1.0.28" flate2 = "1.0.25"
http = "0.2.11" http = "0.2.9"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0" once_cell = "1.17.1"
regex = "1.10.2" regex = "1.7.3"
roaring = { version = "0.10.2", features = ["serde"] } roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
tar = "0.4.40" tar = "0.4.38"
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40" uuid = { version = "1.3.1", features = ["serde", "v4"] }
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
big_s = "1.0.2" big_s = "1.0.2"

View File

@@ -120,7 +120,7 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
criterion.as_ref().map(ToString::to_string) criterion.as_ref().map(ToString::to_string)
} }
Err(()) => { Err(()) => {
tracing::warn!( log::warn!(
"Could not import the following ranking rule: `{}`.", "Could not import the following ranking rule: `{}`.",
ranking_rule ranking_rule
); );
@@ -152,11 +152,11 @@ impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
use v2::updates::UpdateStatus as UpdateStatusV2; use v2::updates::UpdateStatus as UpdateStatusV2;
Some(match source { Some(match source {
UpdateStatusV1::Enqueued { content } => { UpdateStatusV1::Enqueued { content } => {
tracing::warn!( log::warn!(
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)", "Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
content.update_id content.update_id
); );
tracing::warn!("Task will be skipped in the queue of imported tasks."); log::warn!("Task will be skipped in the queue of imported tasks.");
return None; return None;
} }
@@ -229,7 +229,7 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
Some(match source { Some(match source {
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments, v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
v1::update::UpdateType::Customs => { v1::update::UpdateType::Customs => {
tracing::warn!("Ignoring task with type 'Customs' that is no longer supported"); log::warn!("Ignoring task with type 'Customs' that is no longer supported");
return None; return None;
} }
v1::update::UpdateType::DocumentsAddition { .. } => { v1::update::UpdateType::DocumentsAddition { .. } => {
@@ -296,7 +296,7 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity), v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute), v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
v1::settings::RankingRule::WordsPosition => { v1::settings::RankingRule::WordsPosition => {
tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
None None
} }
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness), v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),

View File

@@ -146,8 +146,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
started_processing_at: processing.started_processing_at, started_processing_at: processing.started_processing_at,
}), }),
Err(e) => { Err(e) => {
tracing::warn!("Error with task {}: {}", processing.from.update_id, e); log::warn!("Error with task {}: {}", processing.from.update_id, e);
tracing::warn!("Task will be marked as `Failed`."); log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed { v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing { from: v3::updates::Processing {
from: v3::updates::Enqueued { from: v3::updates::Enqueued {
@@ -172,8 +172,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
enqueued_at: enqueued.enqueued_at, enqueued_at: enqueued.enqueued_at,
}), }),
Err(e) => { Err(e) => {
tracing::warn!("Error with task {}: {}", enqueued.update_id, e); log::warn!("Error with task {}: {}", enqueued.update_id, e);
tracing::warn!("Task will be marked as `Failed`."); log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed { v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing { from: v3::updates::Processing {
from: v3::updates::Enqueued { from: v3::updates::Enqueued {
@@ -353,7 +353,7 @@ impl From<String> for v3::Code {
"malformed_payload" => v3::Code::MalformedPayload, "malformed_payload" => v3::Code::MalformedPayload,
"missing_payload" => v3::Code::MissingPayload, "missing_payload" => v3::Code::MissingPayload,
other => { other => {
tracing::warn!("Unknown error code {}", other); log::warn!("Unknown error code {}", other);
v3::Code::UnretrievableErrorCode v3::Code::UnretrievableErrorCode
} }
} }

View File

@@ -76,20 +76,20 @@ impl CompatV3ToV4 {
let index_uid = match index_uid { let index_uid = match index_uid {
Some(uid) => uid, Some(uid) => uid,
None => { None => {
tracing::warn!( log::warn!(
"Error while importing the update {}.", "Error while importing the update {}.",
task.update.id() task.update.id()
); );
tracing::warn!( log::warn!(
"The index associated to the uuid `{}` could not be retrieved.", "The index associated to the uuid `{}` could not be retrieved.",
task.uuid.to_string() task.uuid.to_string()
); );
if task.update.is_finished() { if task.update.is_finished() {
// we're fucking with his history but not his data, that's ok-ish. // we're fucking with his history but not his data, that's ok-ish.
tracing::warn!("The index-uuid will be set as `unknown`."); log::warn!("The index-uuid will be set as `unknown`.");
String::from("unknown") String::from("unknown")
} else { } else {
tracing::warn!("The task will be ignored."); log::warn!("The task will be ignored.");
return None; return None;
} }
} }

View File

@@ -305,7 +305,7 @@ impl From<v4::ResponseError> for v5::ResponseError {
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt, "invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription, "invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
other => { other => {
tracing::warn!("Unknown error code {}", other); log::warn!("Unknown error code {}", other);
v5::Code::UnretrievableErrorCode v5::Code::UnretrievableErrorCode
} }
}; };

View File

@@ -304,7 +304,7 @@ impl From<v5::ResponseError> for v6::ResponseError {
"immutable_field" => v6::Code::BadRequest, "immutable_field" => v6::Code::BadRequest,
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists, "api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
other => { other => {
tracing::warn!("Unknown error code {}", other); log::warn!("Unknown error code {}", other);
v6::Code::UnretrievableErrorCode v6::Code::UnretrievableErrorCode
} }
}; };
@@ -329,7 +329,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
new_ranking_rules.push(new_rule); new_ranking_rules.push(new_rule);
} }
Err(_) => { Err(_) => {
tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.") log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
} }
} }
} }

View File

@@ -1,6 +1,5 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[allow(clippy::enum_variant_names)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)] #[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code { pub enum Code {
// index related error // index related error

View File

@@ -95,7 +95,6 @@ impl fmt::Display for ErrorType {
} }
} }
#[allow(clippy::enum_variant_names)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)] #[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code { pub enum Code {
// index related error // index related error

View File

@@ -31,7 +31,6 @@ impl ResponseError {
} }
} }
#[allow(clippy::enum_variant_names)]
#[derive(Deserialize, Debug, Clone, Copy)] #[derive(Deserialize, Debug, Clone, Copy)]
#[cfg_attr(test, derive(serde::Serialize))] #[cfg_attr(test, derive(serde::Serialize))]
pub enum Code { pub enum Code {

View File

@@ -2,10 +2,10 @@ use std::fs::{self, File};
use std::io::{BufRead, BufReader, ErrorKind}; use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path; use std::path::Path;
use log::debug;
pub use meilisearch_types::milli; pub use meilisearch_types::milli;
use tempfile::TempDir; use tempfile::TempDir;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use uuid::Uuid; use uuid::Uuid;
use super::Document; use super::Document;

View File

@@ -11,9 +11,9 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
faux = "0.1.10" faux = "0.1.9"

View File

@@ -13,8 +13,8 @@ license.workspace = true
[dependencies] [dependencies]
nom = "7.1.3" nom = "7.1.3"
nom_locate = "4.2.0" nom_locate = "4.1.0"
unescaper = "0.1.3" unescaper = "0.1.2"
[dev-dependencies] [dev-dependencies]
insta = "1.34.0" insta = "1.29.0"

View File

@@ -11,10 +11,10 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] } arbitrary = { version = "1.3.0", features = ["derive"] }
clap = { version = "4.4.17", features = ["derive"] } clap = { version = "4.3.0", features = ["derive"] }
fastrand = "2.0.1" fastrand = "2.0.0"
milli = { path = "../milli" } milli = { path = "../milli" }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
tempfile = "3.9.0" tempfile = "3.5.0"

View File

@@ -11,36 +11,30 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.79" anyhow = "1.0.70"
bincode = "1.3.3" bincode = "1.3.3"
csv = "1.3.0" csv = "1.2.1"
derive_builder = "0.12.0" derive_builder = "0.12.0"
dump = { path = "../dump" } dump = { path = "../dump" }
enum-iterator = "1.5.0" enum-iterator = "1.4.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.28" log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0" page_size = "0.5.0"
puffin = { version = "0.16.0", features = ["serialization"] } puffin = { version = "0.16.0", features = ["serialization"] }
roaring = { version = "0.10.2", features = ["serde"] } roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
synchronoise = "1.0.1" synchronoise = "1.0.1"
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = [ time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
"serde-well-known", uuid = { version = "1.3.1", features = ["serde", "v4"] }
"formatting",
"parsing",
"macros",
] }
tracing = "0.1.40"
ureq = "2.9.1"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
big_s = "1.0.2" big_s = "1.0.2"
crossbeam = "0.8.4" crossbeam = "0.8.2"
insta = { version = "1.34.0", features = ["json", "redactions"] } insta = { version = "1.29.0", features = ["json", "redactions"] }
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}

View File

@@ -24,6 +24,7 @@ use std::fs::{self, File};
use std::io::BufWriter; use std::io::BufWriter;
use dump::IndexMetadata; use dump::IndexMetadata;
use log::{debug, error, info, trace};
use meilisearch_types::error::Code; use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
@@ -59,7 +60,7 @@ pub(crate) enum Batch {
/// The list of tasks that were processing when this task cancelation appeared. /// The list of tasks that were processing when this task cancelation appeared.
previous_processing_tasks: RoaringBitmap, previous_processing_tasks: RoaringBitmap,
}, },
TaskDeletions(Vec<Task>), TaskDeletion(Task),
SnapshotCreation(Vec<Task>), SnapshotCreation(Vec<Task>),
Dump(Task), Dump(Task),
IndexOperation { IndexOperation {
@@ -145,12 +146,13 @@ impl Batch {
pub fn ids(&self) -> Vec<TaskId> { pub fn ids(&self) -> Vec<TaskId> {
match self { match self {
Batch::TaskCancelation { task, .. } Batch::TaskCancelation { task, .. }
| Batch::TaskDeletion(task)
| Batch::Dump(task) | Batch::Dump(task)
| Batch::IndexCreation { task, .. } | Batch::IndexCreation { task, .. }
| Batch::IndexUpdate { task, .. } => vec![task.uid], | Batch::IndexUpdate { task, .. } => vec![task.uid],
Batch::SnapshotCreation(tasks) Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
| Batch::TaskDeletions(tasks) tasks.iter().map(|task| task.uid).collect()
| Batch::IndexDeletion { tasks, .. } => tasks.iter().map(|task| task.uid).collect(), }
Batch::IndexOperation { op, .. } => match op { Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. } IndexOperation::DocumentOperation { tasks, .. }
| IndexOperation::Settings { tasks, .. } | IndexOperation::Settings { tasks, .. }
@@ -178,7 +180,7 @@ impl Batch {
use Batch::*; use Batch::*;
match self { match self {
TaskCancelation { .. } TaskCancelation { .. }
| TaskDeletions(_) | TaskDeletion(_)
| SnapshotCreation(_) | SnapshotCreation(_)
| Dump(_) | Dump(_)
| IndexSwap { .. } => None, | IndexSwap { .. } => None,
@@ -197,7 +199,7 @@ impl fmt::Display for Batch {
let tasks = self.ids(); let tasks = self.ids();
match self { match self {
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?, Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?, Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?, Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
Batch::Dump(_) => f.write_str("Dump")?, Batch::Dump(_) => f.write_str("Dump")?,
Batch::IndexOperation { op, .. } => write!(f, "{op}")?, Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
@@ -513,7 +515,6 @@ impl IndexScheduler {
/// 3. We get the *next* snapshot to process. /// 3. We get the *next* snapshot to process.
/// 4. We get the *next* dump to process. /// 4. We get the *next* dump to process.
/// 5. We get the *next* tasks to process for a specific index. /// 5. We get the *next* tasks to process for a specific index.
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> { pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> {
#[cfg(test)] #[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
@@ -538,9 +539,9 @@ impl IndexScheduler {
// 2. we get the next task to delete // 2. we get the next task to delete
let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
if !to_delete.is_empty() { if let Some(task_id) = to_delete.min() {
let tasks = self.get_existing_tasks(rtxn, to_delete)?; let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
return Ok(Some(Batch::TaskDeletions(tasks))); return Ok(Some(Batch::TaskDeletion(task)));
} }
// 3. we batch the snapshot. // 3. we batch the snapshot.
@@ -619,7 +620,6 @@ impl IndexScheduler {
/// The list of tasks that were processed. The metadata of each task in the returned /// The list of tasks that were processed. The metadata of each task in the returned
/// list is updated accordingly, with the exception of the its date fields /// list is updated accordingly, with the exception of the its date fields
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> { pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> {
#[cfg(test)] #[cfg(test)]
{ {
@@ -669,10 +669,9 @@ impl IndexScheduler {
Ok(()) => { Ok(()) => {
for content_uuid in canceled_tasks_content_uuids { for content_uuid in canceled_tasks_content_uuids {
if let Err(error) = self.delete_update_file(content_uuid) { if let Err(error) = self.delete_update_file(content_uuid) {
tracing::error!( error!(
file_content_uuid = %content_uuid, "We failed deleting the content file indentified as {}: {}",
%error, content_uuid, error
"Failed deleting content file"
) )
} }
} }
@@ -682,43 +681,31 @@ impl IndexScheduler {
Ok(vec![task]) Ok(vec![task])
} }
Batch::TaskDeletions(mut tasks) => { Batch::TaskDeletion(mut task) => {
// 1. Retrieve the tasks that matched the query at enqueue-time. // 1. Retrieve the tasks that matched the query at enqueue-time.
let mut matched_tasks = RoaringBitmap::new(); let matched_tasks =
for task in tasks.iter() {
if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind { if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
matched_tasks |= tasks; tasks
} else { } else {
unreachable!() unreachable!()
}
}
let mut wtxn = self.env.write_txn()?;
let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
wtxn.commit()?;
for task in tasks.iter_mut() {
task.status = Status::Succeeded;
let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
unreachable!()
}; };
let deleted_tasks_count = deleted_tasks.intersection_len(tasks); let mut wtxn = self.env.write_txn()?;
deleted_tasks -= tasks; let deleted_tasks_count = self.delete_matched_tasks(&mut wtxn, matched_tasks)?;
match &mut task.details { task.status = Status::Succeeded;
Some(Details::TaskDeletion { match &mut task.details {
matched_tasks: _, Some(Details::TaskDeletion {
deleted_tasks, matched_tasks: _,
original_filter: _, deleted_tasks,
}) => { original_filter: _,
*deleted_tasks = Some(deleted_tasks_count); }) => {
} *deleted_tasks = Some(deleted_tasks_count);
_ => unreachable!(),
} }
_ => unreachable!(),
} }
Ok(tasks) wtxn.commit()?;
Ok(vec![task])
} }
Batch::SnapshotCreation(mut tasks) => { Batch::SnapshotCreation(mut tasks) => {
fs::create_dir_all(&self.snapshots_path)?; fs::create_dir_all(&self.snapshots_path)?;
@@ -949,8 +936,8 @@ impl IndexScheduler {
}; };
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
self.index_mapper *self.currently_updating_index.write().unwrap() =
.set_currently_updating_index(Some((index_uid.clone(), index.clone()))); Some((index_uid.clone(), index.clone()));
let mut index_wtxn = index.write_txn()?; let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?; let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
@@ -971,10 +958,7 @@ impl IndexScheduler {
match res { match res {
Ok(_) => (), Ok(_) => (),
Err(e) => tracing::error!( Err(e) => error!("Could not write the stats of the index {}", e),
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
} }
Ok(tasks) Ok(tasks)
@@ -1002,7 +986,7 @@ impl IndexScheduler {
builder.set_primary_key(primary_key); builder.set_primary_key(primary_key);
let must_stop_processing = self.must_stop_processing.clone(); let must_stop_processing = self.must_stop_processing.clone();
builder.execute( builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step), |indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;
index_wtxn.commit()?; index_wtxn.commit()?;
@@ -1029,10 +1013,7 @@ impl IndexScheduler {
match res { match res {
Ok(_) => (), Ok(_) => (),
Err(e) => tracing::error!( Err(e) => error!("Could not write the stats of the index {}", e),
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
} }
Ok(vec![task]) Ok(vec![task])
@@ -1151,11 +1132,6 @@ impl IndexScheduler {
/// ///
/// ## Return /// ## Return
/// The list of processed tasks. /// The list of processed tasks.
#[tracing::instrument(
level = "trace",
skip(self, index_wtxn, index),
target = "indexing::scheduler"
)]
fn apply_index_operation<'i>( fn apply_index_operation<'i>(
&self, &self,
index_wtxn: &mut RwTxn<'i>, index_wtxn: &mut RwTxn<'i>,
@@ -1216,7 +1192,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config); milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.set_primary_key(primary_key); builder.set_primary_key(primary_key);
builder.execute( builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step), |indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(), || must_stop_processing.clone().get(),
)?; )?;
primary_key_has_been_set = true; primary_key_has_been_set = true;
@@ -1235,7 +1211,7 @@ impl IndexScheduler {
index, index,
indexer_config, indexer_config,
config, config,
|indexing_step| tracing::trace!(?indexing_step, "Update"), |indexing_step| trace!("update: {:?}", indexing_step),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;
@@ -1307,7 +1283,7 @@ impl IndexScheduler {
if !tasks.iter().all(|res| res.error.is_some()) { if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?; let addition = builder.execute()?;
tracing::info!(indexing_result = ?addition, "document indexing done"); info!("document addition done: {:?}", addition);
} else if primary_key_has_been_set { } else if primary_key_has_been_set {
// Everything failed but we've set a primary key. // Everything failed but we've set a primary key.
// We need to remove it. // We need to remove it.
@@ -1315,7 +1291,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config); milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.reset_primary_key(); builder.reset_primary_key();
builder.execute( builder.execute(
|indexing_step| tracing::trace!(update = ?indexing_step), |indexing_step| trace!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(), || must_stop_processing.clone().get(),
)?; )?;
} }
@@ -1375,6 +1351,9 @@ impl IndexScheduler {
for (task, (_, settings)) in tasks.iter_mut().zip(settings) { for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
let checked_settings = settings.clone().check(); let checked_settings = settings.clone().check();
if matches!(checked_settings.embedders, milli::update::Setting::Set(_)) {
self.features().check_vector("Passing `embedders` in settings")?
}
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) }); task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
apply_settings_to_builder(&checked_settings, &mut builder); apply_settings_to_builder(&checked_settings, &mut builder);
@@ -1385,7 +1364,7 @@ impl IndexScheduler {
let must_stop_processing = self.must_stop_processing.clone(); let must_stop_processing = self.must_stop_processing.clone();
builder.execute( builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step), |indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;
@@ -1459,11 +1438,7 @@ impl IndexScheduler {
/// Delete each given task from all the databases (if it is deleteable). /// Delete each given task from all the databases (if it is deleteable).
/// ///
/// Return the number of tasks that were actually deleted. /// Return the number of tasks that were actually deleted.
fn delete_matched_tasks( fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
&self,
wtxn: &mut RwTxn,
matched_tasks: &RoaringBitmap,
) -> Result<RoaringBitmap> {
// 1. Remove from this list the tasks that we are not allowed to delete // 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?; let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
@@ -1528,7 +1503,7 @@ impl IndexScheduler {
} }
} }
Ok(to_delete_tasks) Ok(to_delete_tasks.len())
} }
/// Cancel each given task from all the databases (if it is cancelable). /// Cancel each given task from all the databases (if it is cancelable).
@@ -1597,7 +1572,7 @@ fn delete_document_by_filter<'a>(
index, index,
indexer_config, indexer_config,
config, config,
|indexing_step| tracing::debug!(update = ?indexing_step), |indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;

View File

@@ -30,6 +30,19 @@ impl RoFeatures {
self.runtime self.runtime
} }
pub fn check_score_details(&self) -> Result<()> {
if self.runtime.score_details {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Computing score details",
feature: "score details",
issue_link: "https://github.com/meilisearch/product/discussions/674",
}
.into())
}
}
pub fn check_metrics(&self) -> Result<()> { pub fn check_metrics(&self) -> Result<()> {
if self.runtime.metrics { if self.runtime.metrics {
Ok(()) Ok(())
@@ -43,19 +56,6 @@ impl RoFeatures {
} }
} }
pub fn check_logs_route(&self) -> Result<()> {
if self.runtime.logs_route {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Modifying logs through the `/logs/*` routes",
feature: "logs route",
issue_link: "https://github.com/orgs/meilisearch/discussions/721",
}
.into())
}
}
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> { pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.vector_store { if self.runtime.vector_store {
Ok(()) Ok(())
@@ -94,7 +94,6 @@ impl FeatureData {
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default(); runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: instance_features.metrics || persisted_features.metrics, metrics: instance_features.metrics || persisted_features.metrics,
logs_route: instance_features.logs_route || persisted_features.logs_route,
..persisted_features ..persisted_features
})); }));

View File

@@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock};
use std::time::Duration; use std::time::Duration;
use std::{fs, thread}; use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index}; use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::error;
use uuid::Uuid; use uuid::Uuid;
use self::index_map::IndexMap; use self::index_map::IndexMap;
@@ -69,10 +69,6 @@ pub struct IndexMapper {
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not. /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
enable_mdb_writemap: bool, enable_mdb_writemap: bool,
pub indexer_config: Arc<IndexerConfig>, pub indexer_config: Arc<IndexerConfig>,
/// A few types of long running batches of tasks that act on a single index set this field
/// so that a handle to the index is available from other threads (search) in an optimized manner.
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
} }
/// Whether the index is available for use or is forbidden to be inserted back in the index map /// Whether the index is available for use or is forbidden to be inserted back in the index map
@@ -155,7 +151,6 @@ impl IndexMapper {
index_growth_amount, index_growth_amount,
enable_mdb_writemap, enable_mdb_writemap,
indexer_config: Arc::new(indexer_config), indexer_config: Arc::new(indexer_config),
currently_updating_index: Default::default(),
}) })
} }
@@ -308,14 +303,6 @@ impl IndexMapper {
/// Return an index, may open it if it wasn't already opened. /// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> { pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
if let Some((current_name, current_index)) =
self.currently_updating_index.read().unwrap().as_ref()
{
if current_name == name {
return Ok(current_index.clone());
}
}
let uuid = self let uuid = self
.index_mapping .index_mapping
.get(rtxn, name)? .get(rtxn, name)?
@@ -487,8 +474,4 @@ impl IndexMapper {
pub fn indexer_config(&self) -> &IndexerConfig { pub fn indexer_config(&self) -> &IndexerConfig {
&self.indexer_config &self.indexer_config
} }
pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
*self.currently_updating_index.write().unwrap() = index;
}
} }

View File

@@ -37,11 +37,10 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
snapshots_path: _, snapshots_path: _,
auth_path: _, auth_path: _,
version_file_path: _, version_file_path: _,
webhook_url: _,
webhook_authorization_header: _,
test_breakpoint_sdr: _, test_breakpoint_sdr: _,
planned_failures: _, planned_failures: _,
run_loop_iteration: _, run_loop_iteration: _,
currently_updating_index: _,
embedders: _, embedders: _,
} = scheduler; } = scheduler;

View File

@@ -34,7 +34,6 @@ pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::fs::File; use std::fs::File;
use std::io::{self, BufReader, Read};
use std::ops::{Bound, RangeBounds}; use std::ops::{Bound, RangeBounds};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicBool;
@@ -46,8 +45,6 @@ use dump::{KindDump, TaskDump, UpdateFile};
pub use error::Error; pub use error::Error;
pub use features::RoFeatures; pub use features::RoFeatures;
use file_store::FileStore; use file_store::FileStore;
use flate2::bufread::GzEncoder;
use flate2::Compression;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE; use meilisearch_types::heed::byteorder::BE;
@@ -57,7 +54,6 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use puffin::FrameView; use puffin::FrameView;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@@ -174,8 +170,8 @@ impl ProcessingTasks {
} }
/// Set the processing tasks to an empty list /// Set the processing tasks to an empty list
fn stop_processing(&mut self) -> RoaringBitmap { fn stop_processing(&mut self) {
std::mem::take(&mut self.processing) self.processing = RoaringBitmap::new();
} }
/// Returns `true` if there, at least, is one task that is currently processing that we must stop. /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
@@ -245,10 +241,6 @@ pub struct IndexSchedulerOptions {
pub snapshots_path: PathBuf, pub snapshots_path: PathBuf,
/// The path to the folder containing the dumps. /// The path to the folder containing the dumps.
pub dumps_path: PathBuf, pub dumps_path: PathBuf,
/// The URL on which we must send the tasks statuses
pub webhook_url: Option<String>,
/// The value we will send into the Authorization HTTP header on the webhook URL
pub webhook_authorization_header: Option<String>,
/// The maximum size, in bytes, of the task index. /// The maximum size, in bytes, of the task index.
pub task_db_size: usize, pub task_db_size: usize,
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index. /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@@ -331,11 +323,6 @@ pub struct IndexScheduler {
/// The maximum number of tasks that will be batched together. /// The maximum number of tasks that will be batched together.
pub(crate) max_number_of_batched_tasks: usize, pub(crate) max_number_of_batched_tasks: usize,
/// The webhook url we should send tasks to after processing every batches.
pub(crate) webhook_url: Option<String>,
/// The Authorization header to send to the webhook URL.
pub(crate) webhook_authorization_header: Option<String>,
/// A frame to output the indexation profiling files to disk. /// A frame to output the indexation profiling files to disk.
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>, pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
@@ -351,6 +338,10 @@ pub struct IndexScheduler {
/// The path to the version file of Meilisearch. /// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf, pub(crate) version_file_path: PathBuf,
/// A few types of long running batches of tasks that act on a single index set this field
/// so that a handle to the index is available from other threads (search) in an optimized manner.
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>, embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
// ================= test // ================= test
@@ -397,8 +388,7 @@ impl IndexScheduler {
dumps_path: self.dumps_path.clone(), dumps_path: self.dumps_path.clone(),
auth_path: self.auth_path.clone(), auth_path: self.auth_path.clone(),
version_file_path: self.version_file_path.clone(), version_file_path: self.version_file_path.clone(),
webhook_url: self.webhook_url.clone(), currently_updating_index: self.currently_updating_index.clone(),
webhook_authorization_header: self.webhook_authorization_header.clone(),
embedders: self.embedders.clone(), embedders: self.embedders.clone(),
#[cfg(test)] #[cfg(test)]
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(), test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
@@ -497,8 +487,7 @@ impl IndexScheduler {
snapshots_path: options.snapshots_path, snapshots_path: options.snapshots_path,
auth_path: options.auth_path, auth_path: options.auth_path,
version_file_path: options.version_file_path, version_file_path: options.version_file_path,
webhook_url: options.webhook_url, currently_updating_index: Arc::new(RwLock::new(None)),
webhook_authorization_header: options.webhook_authorization_header,
embedders: Default::default(), embedders: Default::default(),
#[cfg(test)] #[cfg(test)]
@@ -535,17 +524,17 @@ impl IndexScheduler {
let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) { let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) {
DEFAULT_BUDGET DEFAULT_BUDGET
} else { } else {
tracing::debug!("determining budget with dichotomic search"); log::debug!("determining budget with dichotomic search");
utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| { utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| {
Self::is_good_heed(tasks_path, map_size) Self::is_good_heed(tasks_path, map_size)
}) })
}; };
tracing::debug!("memmap budget: {budget}B"); log::debug!("memmap budget: {budget}B");
let mut budget = budget / 2; let mut budget = budget / 2;
if task_db_size > (budget / 2) { if task_db_size > (budget / 2) {
task_db_size = clamp_to_page_size(budget * 2 / 5); task_db_size = clamp_to_page_size(budget * 2 / 5);
tracing::debug!( log::debug!(
"Decreasing max size of task DB to {task_db_size}B due to constrained memory space" "Decreasing max size of task DB to {task_db_size}B due to constrained memory space"
); );
} }
@@ -555,13 +544,13 @@ impl IndexScheduler {
let budget = budget; let budget = budget;
let task_db_size = task_db_size; let task_db_size = task_db_size;
tracing::debug!("index budget: {budget}B"); log::debug!("index budget: {budget}B");
let mut index_count = budget / base_map_size; let mut index_count = budget / base_map_size;
if index_count < 2 { if index_count < 2 {
// take a bit less than half than the budget to make sure we can always afford to open an index // take a bit less than half than the budget to make sure we can always afford to open an index
let map_size = (budget * 2) / 5; let map_size = (budget * 2) / 5;
// single index of max budget // single index of max budget
tracing::debug!("1 index of {map_size}B can be opened simultaneously."); log::debug!("1 index of {map_size}B can be opened simultaneously.");
return IndexBudget { map_size, index_count: 1, task_db_size }; return IndexBudget { map_size, index_count: 1, task_db_size };
} }
// give us some space for an additional index when the cache is already full // give us some space for an additional index when the cache is already full
@@ -570,7 +559,7 @@ impl IndexScheduler {
if index_count > max_index_count { if index_count > max_index_count {
index_count = max_index_count; index_count = max_index_count;
} }
tracing::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously."); log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
IndexBudget { map_size: base_map_size, index_count, task_db_size } IndexBudget { map_size: base_map_size, index_count, task_db_size }
} }
@@ -617,7 +606,7 @@ impl IndexScheduler {
Ok(TickOutcome::TickAgain(_)) => (), Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(), Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => { Err(e) => {
tracing::error!("{e}"); log::error!("{e}");
// Wait one second when an irrecoverable error occurs. // Wait one second when an irrecoverable error occurs.
if !e.is_recoverable() { if !e.is_recoverable() {
std::thread::sleep(Duration::from_secs(1)); std::thread::sleep(Duration::from_secs(1));
@@ -634,15 +623,15 @@ impl IndexScheduler {
let mut file = match File::create(format!("{}.puffin", now)) { let mut file = match File::create(format!("{}.puffin", now)) {
Ok(file) => file, Ok(file) => file,
Err(e) => { Err(e) => {
tracing::error!("{e}"); log::error!("{e}");
continue; continue;
} }
}; };
if let Err(e) = frame_view.save_to_writer(&mut file) { if let Err(e) = frame_view.save_to_writer(&mut file) {
tracing::error!("{e}"); log::error!("{e}");
} }
if let Err(e) = file.sync_all() { if let Err(e) = file.sync_all() {
tracing::error!("{e}"); log::error!("{e}");
} }
// We erase this frame view as it is no more useful. We want to // We erase this frame view as it is no more useful. We want to
// measure the new frames now that we exported the previous ones. // measure the new frames now that we exported the previous ones.
@@ -682,6 +671,13 @@ impl IndexScheduler {
/// If you need to fetch information from or perform an action on all indexes, /// If you need to fetch information from or perform an action on all indexes,
/// see the `try_for_each_index` function. /// see the `try_for_each_index` function.
pub fn index(&self, name: &str) -> Result<Index> { pub fn index(&self, name: &str) -> Result<Index> {
if let Some((current_name, current_index)) =
self.currently_updating_index.read().unwrap().as_ref()
{
if current_name == name {
return Ok(current_index.clone());
}
}
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, name) self.index_mapper.index(&rtxn, name)
} }
@@ -1162,7 +1158,7 @@ impl IndexScheduler {
}; };
// Reset the currently updating index to relinquish the index handle // Reset the currently updating index to relinquish the index handle
self.index_mapper.set_currently_updating_index(None); *self.currently_updating_index.write().unwrap() = None;
#[cfg(test)] #[cfg(test)]
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
@@ -1190,10 +1186,10 @@ impl IndexScheduler {
self.update_task(&mut wtxn, &task) self.update_task(&mut wtxn, &task)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
if let Err(e) = self.delete_persisted_task_data(&task) { if let Err(e) = self.delete_persisted_task_data(&task) {
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
} }
} }
tracing::info!("A batch of tasks was successfully completed."); log::info!("A batch of tasks was successfully completed.");
} }
// If we have an abortion error we must stop the tick here and re-schedule tasks. // If we have an abortion error we must stop the tick here and re-schedule tasks.
Err(Error::Milli(milli::Error::InternalError( Err(Error::Milli(milli::Error::InternalError(
@@ -1247,7 +1243,7 @@ impl IndexScheduler {
self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;
if let Err(e) = self.delete_persisted_task_data(&task) { if let Err(e) = self.delete_persisted_task_data(&task) {
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
} }
self.update_task(&mut wtxn, &task) self.update_task(&mut wtxn, &task)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
@@ -1255,99 +1251,19 @@ impl IndexScheduler {
} }
} }
let processed = self.processing_tasks.write().unwrap().stop_processing(); self.processing_tasks.write().unwrap().stop_processing();
#[cfg(test)] #[cfg(test)]
self.maybe_fail(tests::FailureLocation::CommittingWtxn)?; self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;
wtxn.commit().map_err(Error::HeedTransaction)?; wtxn.commit().map_err(Error::HeedTransaction)?;
// We shouldn't crash the tick function if we can't send data to the webhook.
let _ = self.notify_webhook(&processed);
#[cfg(test)] #[cfg(test)]
self.breakpoint(Breakpoint::AfterProcessing); self.breakpoint(Breakpoint::AfterProcessing);
Ok(TickOutcome::TickAgain(processed_tasks)) Ok(TickOutcome::TickAgain(processed_tasks))
} }
/// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
if let Some(ref url) = self.webhook_url {
struct TaskReader<'a, 'b> {
rtxn: &'a RoTxn<'a>,
index_scheduler: &'a IndexScheduler,
tasks: &'b mut roaring::bitmap::Iter<'b>,
buffer: Vec<u8>,
written: usize,
}
impl<'a, 'b> Read for TaskReader<'a, 'b> {
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
if self.buffer.is_empty() {
match self.tasks.next() {
None => return Ok(0),
Some(task_id) => {
let task = self
.index_scheduler
.get_task(self.rtxn, task_id)
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
Error::CorruptedTaskQueue,
)
})?;
serde_json::to_writer(
&mut self.buffer,
&TaskView::from_task(&task),
)?;
self.buffer.push(b'\n');
}
}
}
let mut to_write = &self.buffer[self.written..];
let wrote = io::copy(&mut to_write, &mut buf)?;
self.written += wrote as usize;
// we wrote everything and must refresh our buffer on the next call
if self.written == self.buffer.len() {
self.written = 0;
self.buffer.clear();
}
Ok(wrote as usize)
}
}
let rtxn = self.env.read_txn()?;
let task_reader = TaskReader {
rtxn: &rtxn,
index_scheduler: self,
tasks: &mut updated.into_iter(),
buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
written: 0,
};
// let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
let request = ureq::post(url).set("Content-Encoding", "gzip");
let request = match &self.webhook_authorization_header {
Some(header) => request.set("Authorization", header),
None => request,
};
if let Err(e) = request.send(reader) {
tracing::error!("While sending data to the webhook: {e}");
}
}
Ok(())
}
/// Register a task to cleanup the task queue if needed /// Register a task to cleanup the task queue if needed
fn cleanup_task_queue(&self) -> Result<()> { fn cleanup_task_queue(&self) -> Result<()> {
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@@ -1367,12 +1283,12 @@ impl IndexScheduler {
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
// the deletion tasks we enqueued ourselves. // the deletion tasks we enqueued ourselves.
if to_delete.len() < 2 { if to_delete.len() < 2 {
tracing::warn!("The task queue is almost full, but no task can be deleted yet."); log::warn!("The task queue is almost full, but no task can be deleted yet.");
// the only thing we can do is hope that the user tasks are going to finish // the only thing we can do is hope that the user tasks are going to finish
return Ok(()); return Ok(());
} }
tracing::info!( log::info!(
"The task queue is almost full. Deleting the oldest {} finished tasks.", "The task queue is almost full. Deleting the oldest {} finished tasks.",
to_delete.len() to_delete.len()
); );
@@ -1761,8 +1677,6 @@ mod tests {
indexes_path: tempdir.path().join("indexes"), indexes_path: tempdir.path().join("indexes"),
snapshots_path: tempdir.path().join("snapshots"), snapshots_path: tempdir.path().join("snapshots"),
dumps_path: tempdir.path().join("dumps"), dumps_path: tempdir.path().join("dumps"),
webhook_url: None,
webhook_authorization_header: None,
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false, enable_mdb_writemap: false,
@@ -2244,7 +2158,10 @@ mod tests {
.unwrap(); .unwrap();
index_scheduler.assert_internally_consistent(); index_scheduler.assert_internally_consistent();
} }
handle.advance_one_successful_batch(); for _ in 0..2 {
handle.advance_one_successful_batch();
index_scheduler.assert_internally_consistent();
}
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed");
} }

View File

@@ -34,10 +34,12 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} }
[timestamp] [3,] [timestamp] [3,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Started At: ### Started At:
[timestamp] [2,3,] [timestamp] [2,]
[timestamp] [3,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Finished At: ### Finished At:
[timestamp] [2,3,] [timestamp] [2,]
[timestamp] [3,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### File Store: ### File Store:
00000000-0000-0000-0000-000000000001 00000000-0000-0000-0000-000000000001

View File

@@ -11,6 +11,6 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
insta = { version = "^1.34.0", features = ["json", "redactions"] } insta = { version = "^1.29.0", features = ["json", "redactions"] }
md5 = "0.7.0" md5 = "0.7.0"
once_cell = "1.19" once_cell = "1.17"

View File

@@ -11,16 +11,16 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
base64 = "0.21.7" base64 = "0.21.0"
enum-iterator = "1.5.0" enum-iterator = "1.4.0"
hmac = "0.12.1" hmac = "0.12.1"
maplit = "1.0.2" maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5" rand = "0.8.5"
roaring = { version = "0.10.2", features = ["serde"] } roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
sha2 = "0.10.8" sha2 = "0.10.6"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.3.1", features = ["serde", "v4"] }

View File

@@ -11,31 +11,31 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
actix-web = { version = "4.4.1", default-features = false } actix-web = { version = "4.3.1", default-features = false }
anyhow = "1.0.79" anyhow = "1.0.70"
convert_case = "0.6.0" convert_case = "0.6.0"
csv = "1.3.0" csv = "1.2.1"
deserr = { version = "0.6.1", features = ["actix-web"] } deserr = { version = "0.6.0", features = ["actix-web"] }
either = { version = "1.9.0", features = ["serde"] } either = { version = "1.8.1", features = ["serde"] }
enum-iterator = "1.5.0" enum-iterator = "1.4.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.28" flate2 = "1.0.25"
fst = "0.4.7" fst = "0.4.7"
memmap2 = "0.7.1" memmap2 = "0.7.1"
milli = { path = "../milli" } milli = { path = "../milli" }
roaring = { version = "0.10.2", features = ["serde"] } roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde-cs = "0.2.4" serde-cs = "0.2.4"
serde_json = "1.0.111" serde_json = "1.0.95"
tar = "0.4.40" tar = "0.4.38"
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.35" tokio = "1.27"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
insta = "1.34.0" insta = "1.29.0"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
[features] [features]
@@ -54,5 +54,3 @@ thai = ["milli/thai"]
greek = ["milli/greek"] greek = ["milli/greek"]
# allow khmer specialized tokenization # allow khmer specialized tokenization
khmer = ["milli/khmer"] khmer = ["milli/khmer"]
# allow vietnamese specialized tokenization
vietnamese = ["milli/vietnamese"]

View File

@@ -310,8 +310,6 @@ TooManyVectors , InvalidRequest , BAD_REQUEST ;
UnretrievableDocument , Internal , BAD_REQUEST ; UnretrievableDocument , Internal , BAD_REQUEST ;
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ; UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
// Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST VectorEmbeddingError , InvalidRequest , BAD_REQUEST
} }
@@ -346,13 +344,7 @@ impl ErrorCode for milli::Error {
Code::InvalidDocumentId Code::InvalidDocumentId
} }
UserError::MissingDocumentField(_) => Code::InvalidDocumentFields, UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
UserError::InvalidFieldForSource { .. } UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
| UserError::MissingFieldForSource { .. }
| UserError::InvalidOpenAiModel { .. }
| UserError::InvalidOpenAiModelDimensions { .. }
| UserError::InvalidOpenAiModelDimensionsMax { .. }
| UserError::InvalidSettingsDimensions { .. }
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,

View File

@@ -3,14 +3,13 @@ use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
#[serde(rename_all = "camelCase", default)] #[serde(rename_all = "camelCase", default)]
pub struct RuntimeTogglableFeatures { pub struct RuntimeTogglableFeatures {
pub score_details: bool,
pub vector_store: bool, pub vector_store: bool,
pub metrics: bool, pub metrics: bool,
pub logs_route: bool,
pub export_puffin_reports: bool, pub export_puffin_reports: bool,
} }
#[derive(Default, Debug, Clone, Copy)] #[derive(Default, Debug, Clone, Copy)]
pub struct InstanceTogglableFeatures { pub struct InstanceTogglableFeatures {
pub metrics: bool, pub metrics: bool,
pub logs_route: bool,
} }

View File

@@ -9,7 +9,6 @@ pub mod index_uid_pattern;
pub mod keys; pub mod keys;
pub mod settings; pub mod settings;
pub mod star_or; pub mod star_or;
pub mod task_view;
pub mod tasks; pub mod tasks;
pub mod versioning; pub mod versioning;
pub use milli::{heed, Index}; pub use milli::{heed, Index};

View File

@@ -318,21 +318,6 @@ impl Settings<Unchecked> {
_kind: PhantomData, _kind: PhantomData,
} }
} }
pub fn validate(self) -> Result<Self, milli::Error> {
self.validate_embedding_settings()
}
fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
for (name, config) in configs.iter_mut() {
let config_to_check = std::mem::take(config);
let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
*config = checked_config
}
self.embedders = Setting::Set(configs);
Ok(self)
}
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -600,12 +585,11 @@ pub fn settings(
), ),
}; };
let embedders: BTreeMap<_, _> = index let embedders = index
.embedding_configs(rtxn)? .embedding_configs(rtxn)?
.into_iter() .into_iter()
.map(|(name, config)| (name, Setting::Set(config.into()))) .map(|(name, config)| (name, Setting::Set(config.into())))
.collect(); .collect();
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
Ok(Settings { Ok(Settings {
displayed_attributes: match displayed_attributes { displayed_attributes: match displayed_attributes {
@@ -627,12 +611,15 @@ pub fn settings(
Some(field) => Setting::Set(field), Some(field) => Setting::Set(field),
None => Setting::Reset, None => Setting::Reset,
}, },
proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()), proximity_precision: match proximity_precision {
Some(precision) => Setting::Set(precision),
None => Setting::Reset,
},
synonyms: Setting::Set(synonyms), synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance), typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting), faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination), pagination: Setting::Set(pagination),
embedders, embedders: Setting::Set(embedders),
_kind: PhantomData, _kind: PhantomData,
}) })
} }
@@ -733,11 +720,10 @@ impl From<RankingRuleView> for Criterion {
} }
} }
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")] #[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)] #[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
pub enum ProximityPrecisionView { pub enum ProximityPrecisionView {
#[default]
ByWord, ByWord,
ByAttribute, ByAttribute,
} }

View File

@@ -1,139 +0,0 @@
use serde::Serialize;
use time::{Duration, OffsetDateTime};
use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskView {
pub uid: TaskId,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
#[serde(rename = "type")]
pub kind: Kind,
pub canceled_by: Option<TaskId>,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<DetailsView>,
pub error: Option<ResponseError>,
#[serde(serialize_with = "serialize_duration", default)]
pub duration: Option<Duration>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option", default)]
pub started_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
}
impl TaskView {
pub fn from_task(task: &Task) -> TaskView {
TaskView {
uid: task.uid,
index_uid: task.index_uid().map(ToOwned::to_owned),
status: task.status,
kind: task.kind.as_kind(),
canceled_by: task.canceled_by,
details: task.details.clone().map(DetailsView::from),
error: task.error.clone(),
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
}
}
}
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub received_documents: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexed_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub provided_ids: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub matched_tasks: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub canceled_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_filter: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub swaps: Option<Vec<IndexSwap>>,
}
impl From<Details> for DetailsView {
fn from(details: Details) -> Self {
match details {
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
DetailsView {
received_documents: Some(received_documents),
indexed_documents: Some(indexed_documents),
..DetailsView::default()
}
}
Details::SettingsUpdate { settings } => {
DetailsView { settings: Some(settings), ..DetailsView::default() }
}
Details::IndexInfo { primary_key } => {
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
}
Details::DocumentDeletion {
provided_ids: received_document_ids,
deleted_documents,
} => DetailsView {
provided_ids: Some(received_document_ids),
deleted_documents: Some(deleted_documents),
original_filter: Some(None),
..DetailsView::default()
},
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
DetailsView {
provided_ids: Some(0),
original_filter: Some(Some(original_filter)),
deleted_documents: Some(deleted_documents),
..DetailsView::default()
}
}
Details::ClearAll { deleted_documents } => {
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
}
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
canceled_tasks: Some(canceled_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
deleted_tasks: Some(deleted_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::Dump { dump_uid } => {
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
}
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
}
}
}

View File

@@ -13,14 +13,14 @@ license.workspace = true
default-run = "meilisearch" default-run = "meilisearch"
[dependencies] [dependencies]
actix-cors = "0.7.0" actix-cors = "0.6.4"
actix-http = { version = "3.5.1", default-features = false, features = [ actix-http = { version = "3.3.1", default-features = false, features = [
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
"rustls", "rustls",
] } ] }
actix-utils = "3.0.1" actix-utils = "3.0.1"
actix-web = { version = "4.4.1", default-features = false, features = [ actix-web = { version = "4.3.1", default-features = false, features = [
"macros", "macros",
"compress-brotli", "compress-brotli",
"compress-gzip", "compress-gzip",
@@ -28,111 +28,108 @@ actix-web = { version = "4.4.1", default-features = false, features = [
"rustls", "rustls",
] } ] }
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] } anyhow = { version = "1.0.70", features = ["backtrace"] }
async-stream = "0.3.5" async-stream = "0.3.5"
async-trait = "0.1.77" async-trait = "0.1.68"
bstr = "1.9.0" bstr = "1.4.0"
byte-unit = { version = "4.0.19", default-features = false, features = [ byte-unit = { version = "4.0.19", default-features = false, features = [
"std", "std",
"serde", "serde",
] } ] }
bytes = "1.5.0" bytes = "1.4.0"
clap = { version = "4.4.17", features = ["derive", "env"] } clap = { version = "4.2.1", features = ["derive", "env"] }
crossbeam-channel = "0.5.11" crossbeam-channel = "0.5.8"
deserr = { version = "0.6.1", features = ["actix-web"] } deserr = { version = "0.6.0", features = ["actix-web"] }
dump = { path = "../dump" } dump = { path = "../dump" }
either = "1.9.0" either = "1.8.1"
env_logger = "0.10.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.28" flate2 = "1.0.25"
fst = "0.4.7" fst = "0.4.7"
futures = "0.3.30" futures = "0.3.28"
futures-util = "0.3.30" futures-util = "0.3.28"
http = "0.2.11" http = "0.2.9"
index-scheduler = { path = "../index-scheduler" } index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.1.0", features = ["serde"] } indexmap = { version = "2.0.0", features = ["serde"] }
is-terminal = "0.4.10" is-terminal = "0.4.8"
itertools = "0.11.0" itertools = "0.11.0"
jsonwebtoken = "8.3.0" jsonwebtoken = "8.3.0"
lazy_static = "1.4.0" lazy_static = "1.4.0"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.39", default-features = false } mimalloc = { version = "0.1.37", default-features = false }
mime = "0.3.17" mime = "0.3.17"
num_cpus = "1.16.0" num_cpus = "1.15.0"
obkv = "0.2.1" obkv = "0.2.0"
once_cell = "1.19.0" once_cell = "1.17.1"
ordered-float = "4.2.0" ordered-float = "3.7.0"
parking_lot = "0.12.1" parking_lot = "0.12.1"
permissive-json-pointer = { path = "../permissive-json-pointer" } permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.13" pin-project-lite = "0.2.9"
platform-dirs = "0.3.0" platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] } prometheus = { version = "0.13.3", features = ["process"] }
puffin = { version = "0.16.0", features = ["serialization"] } puffin = { version = "0.16.0", features = ["serialization"] }
rand = "0.8.5" rand = "0.8.5"
rayon = "1.8.0" rayon = "1.7.0"
regex = "1.10.2" regex = "1.7.3"
reqwest = { version = "0.11.23", features = [ reqwest = { version = "0.11.16", features = [
"rustls-tls", "rustls-tls",
"json", "json",
], default-features = false } ], default-features = false }
rustls = "0.20.8" rustls = "0.20.8"
rustls-pemfile = "1.0.2" rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true } segment = { version = "0.2.2", optional = true }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
sha2 = "0.10.8" sha2 = "0.10.6"
siphasher = "1.0.0" siphasher = "0.3.10"
slice-group-by = "0.3.1" slice-group-by = "0.3.0"
static-files = { version = "0.2.3", optional = true } static-files = { version = "0.2.3", optional = true }
sysinfo = "0.30.5" sysinfo = "0.29.7"
tar = "0.4.40" tar = "0.4.38"
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = [ time = { version = "0.3.20", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
tokio = { version = "1.35.1", features = ["full"] } tokio = { version = "1.27.0", features = ["full"] }
tokio-stream = "0.1.14" tokio-stream = "0.1.12"
toml = "0.8.8" toml = "0.7.3"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.3.1", features = ["serde", "v4"] }
walkdir = "2.4.0" walkdir = "2.3.3"
yaup = "0.2.1" yaup = "0.2.1"
serde_urlencoded = "0.7.1" serde_urlencoded = "0.7.1"
termcolor = "1.4.1" termcolor = "1.2.0"
url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.9"
[dev-dependencies] [dev-dependencies]
actix-rt = "2.9.0" actix-rt = "2.8.0"
assert-json-diff = "2.0.2" assert-json-diff = "2.0.2"
brotli = "3.4.0" brotli = "3.3.4"
insta = "1.34.0" insta = "1.29.0"
manifest-dir-macros = "0.1.18" manifest-dir-macros = "0.1.16"
maplit = "1.0.2" maplit = "1.0.2"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6" temp-env = "0.3.3"
urlencoding = "2.1.3" urlencoding = "2.1.2"
yaup = "0.2.1" yaup = "0.2.1"
[build-dependencies] [build-dependencies]
anyhow = { version = "1.0.79", optional = true } anyhow = { version = "1.0.70", optional = true }
cargo_toml = { version = "0.18.0", optional = true } cargo_toml = { version = "0.15.2", optional = true }
hex = { version = "0.4.3", optional = true } hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.11.23", features = [ reqwest = { version = "0.11.16", features = [
"blocking", "blocking",
"rustls-tls", "rustls-tls",
], default-features = false, optional = true } ], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true } sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.3", optional = true } static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.9.0", optional = true } tempfile = { version = "3.5.0", optional = true }
vergen = { version = "7.5.1", default-features = false, features = ["git"] } vergen = { version = "7.5.1", default-features = false, features = ["git"] }
zip = { version = "0.6.6", optional = true } zip = { version = "0.6.4", optional = true }
[features] [features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
@@ -154,8 +151,7 @@ japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"] thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"] greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"] khmer = ["meilisearch-types/khmer"]
vietnamese = ["meilisearch-types/vietnamese"]
[package.metadata.mini-dashboard] [package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip" assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff" sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"

View File

@@ -18,7 +18,7 @@ use segment::message::{Identify, Track, User};
use segment::{AutoBatcher, Batcher, HttpClient}; use segment::{AutoBatcher, Batcher, HttpClient};
use serde::Serialize; use serde::Serialize;
use serde_json::{json, Value}; use serde_json::{json, Value};
use sysinfo::{Disks, System}; use sysinfo::{DiskExt, System, SystemExt};
use time::OffsetDateTime; use time::OffsetDateTime;
use tokio::select; use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender}; use tokio::sync::mpsc::{self, Receiver, Sender};
@@ -28,9 +28,7 @@ use super::{
config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH, config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH,
}; };
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::option::{ use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
};
use crate::routes::indexes::documents::UpdateDocumentsQuery; use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::indexes::facet_search::FacetSearchQuery; use crate::routes::indexes::facet_search::FacetSearchQuery;
use crate::routes::tasks::TasksFilterQuery; use crate::routes::tasks::TasksFilterQuery;
@@ -252,8 +250,6 @@ impl super::Analytics for SegmentAnalytics {
struct Infos { struct Infos {
env: String, env: String,
experimental_enable_metrics: bool, experimental_enable_metrics: bool,
experimental_logs_mode: LogMode,
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool, experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize, experimental_max_number_of_batched_tasks: usize,
db_path: bool, db_path: bool,
@@ -268,8 +264,6 @@ struct Infos {
ignore_snapshot_if_db_exists: bool, ignore_snapshot_if_db_exists: bool,
http_addr: bool, http_addr: bool,
http_payload_size_limit: Byte, http_payload_size_limit: Byte,
task_queue_webhook: bool,
task_webhook_authorization_header: bool,
log_level: String, log_level: String,
max_indexing_memory: MaxMemory, max_indexing_memory: MaxMemory,
max_indexing_threads: MaxThreads, max_indexing_threads: MaxThreads,
@@ -291,15 +285,11 @@ impl From<Opt> for Infos {
let Opt { let Opt {
db_path, db_path,
experimental_enable_metrics, experimental_enable_metrics,
experimental_logs_mode,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
http_addr, http_addr,
master_key: _, master_key: _,
env, env,
task_webhook_url,
task_webhook_authorization_header,
max_index_size: _, max_index_size: _,
max_task_db_size: _, max_task_db_size: _,
http_payload_size_limit, http_payload_size_limit,
@@ -339,8 +329,6 @@ impl From<Opt> for Infos {
Self { Self {
env, env,
experimental_enable_metrics, experimental_enable_metrics,
experimental_logs_mode,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
db_path: db_path != PathBuf::from("./data.ms"), db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(), import_dump: import_dump.is_some(),
@@ -355,8 +343,6 @@ impl From<Opt> for Infos {
http_addr: http_addr != default_http_addr(), http_addr: http_addr != default_http_addr(),
http_payload_size_limit, http_payload_size_limit,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(), log_level: log_level.to_string(),
max_indexing_memory, max_indexing_memory,
max_indexing_threads, max_indexing_threads,
@@ -394,17 +380,16 @@ impl Segment {
fn compute_traits(opt: &Opt, stats: Stats) -> Value { fn compute_traits(opt: &Opt, stats: Stats) -> Value {
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now); static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
static SYSTEM: Lazy<Value> = Lazy::new(|| { static SYSTEM: Lazy<Value> = Lazy::new(|| {
let disks = Disks::new_with_refreshed_list();
let mut sys = System::new_all(); let mut sys = System::new_all();
sys.refresh_all(); sys.refresh_all();
let kernel_version = System::kernel_version() let kernel_version =
.and_then(|k| k.split_once('-').map(|(k, _)| k.to_string())); sys.kernel_version().and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
json!({ json!({
"distribution": System::name(), "distribution": sys.name(),
"kernel_version": kernel_version, "kernel_version": kernel_version,
"cores": sys.cpus().len(), "cores": sys.cpus().len(),
"ram_size": sys.total_memory(), "ram_size": sys.total_memory(),
"disk_size": disks.iter().map(|disk| disk.total_space()).max(), "disk_size": sys.disks().iter().map(|disk| disk.total_space()).max(),
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
}) })
}); });

View File

@@ -12,8 +12,6 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}", #[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))] .0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>), MissingContentType(Vec<String>),
#[error("The `/logs/stream` route is currently in use by someone else.")]
AlreadyUsedLogRoute,
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")] #[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String), CsvDelimiterWithWrongContentType(String),
#[error( #[error(
@@ -61,7 +59,6 @@ impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code { fn error_code(&self) -> Code {
match self { match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType, MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType, MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,

View File

@@ -29,6 +29,7 @@ use error::PayloadError;
use extractors::payload::PayloadConfig; use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE; use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::error;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@@ -38,8 +39,6 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt; pub use option::Opt;
use option::ScheduleSnapshot; use option::ScheduleSnapshot;
use tracing::{error, info_span};
use tracing_subscriber::filter::Targets;
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
@@ -87,35 +86,10 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
} }
} }
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
pub type LogRouteHandle =
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
pub type LogRouteType = tracing_subscriber::filter::Filtered<
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
Targets,
tracing_subscriber::Registry,
>;
pub type SubscriberForSecondLayer = tracing_subscriber::layer::Layered<
tracing_subscriber::reload::Layer<LogRouteType, tracing_subscriber::Registry>,
tracing_subscriber::Registry,
>;
pub type LogStderrHandle =
tracing_subscriber::reload::Handle<LogStderrType, SubscriberForSecondLayer>;
pub type LogStderrType = tracing_subscriber::filter::Filtered<
Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
Targets,
SubscriberForSecondLayer,
>;
pub fn create_app( pub fn create_app(
index_scheduler: Data<IndexScheduler>, index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>, auth_controller: Data<AuthController>,
opt: Opt, opt: Opt,
logs: (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>, analytics: Arc<dyn Analytics>,
enable_dashboard: bool, enable_dashboard: bool,
) -> actix_web::App< ) -> actix_web::App<
@@ -134,7 +108,6 @@ pub fn create_app(
index_scheduler.clone(), index_scheduler.clone(),
auth_controller.clone(), auth_controller.clone(),
&opt, &opt,
logs,
analytics.clone(), analytics.clone(),
) )
}) })
@@ -150,49 +123,11 @@ pub fn create_app(
.allow_any_method() .allow_any_method()
.max_age(86_400), // 24h .max_age(86_400), // 24h
) )
.wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new()) .wrap(actix_web::middleware::Logger::default())
.wrap(actix_web::middleware::Compress::default()) .wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim)) .wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
} }
struct AwebTracingLogger;
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
use tracing::field::Empty;
let conn_info = request.connection_info();
let headers = request.headers();
let user_agent = headers
.get(http::header::USER_AGENT)
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
.unwrap_or_default();
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
}
fn on_request_end<B: MessageBody>(
span: tracing::Span,
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
) {
match &outcome {
Ok(response) => {
let code: i32 = response.response().status().as_u16().into();
span.record("status_code", code);
if let Some(error) = response.response().error() {
// use the status code already constructed for the outgoing HTTP response
span.record("error", &tracing::field::display(error.as_response_error()));
}
}
Err(error) => {
let code: i32 = error.error_response().status().as_u16().into();
span.record("status_code", code);
span.record("error", &tracing::field::display(error.as_response_error()));
}
};
}
}
enum OnFailure { enum OnFailure {
RemoveDb, RemoveDb,
KeepDb, KeepDb,
@@ -293,8 +228,6 @@ fn open_or_create_database_unchecked(
indexes_path: opt.db_path.join("indexes"), indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(), snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(), dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize, task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize, index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
@@ -345,15 +278,15 @@ fn import_dump(
let mut dump_reader = dump::DumpReader::open(reader)?; let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() { if let Some(date) = dump_reader.date() {
tracing::info!( log::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version "Importing a dump of meilisearch `{:?}` from the {}",
%date, dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
"Importing a dump of meilisearch" date
); );
} else { } else {
tracing::info!( log::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version "Importing a dump of meilisearch `{:?}`",
"Importing a dump of meilisearch", dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
); );
} }
@@ -387,7 +320,7 @@ fn import_dump(
for index_reader in dump_reader.indexes()? { for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?; let mut index_reader = index_reader?;
let metadata = index_reader.metadata(); let metadata = index_reader.metadata();
tracing::info!("Importing index `{}`.", metadata.uid); log::info!("Importing index `{}`.", metadata.uid);
let date = Some((metadata.created_at, metadata.updated_at)); let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?; let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
@@ -401,15 +334,14 @@ fn import_dump(
} }
// 4.2 Import the settings. // 4.2 Import the settings.
tracing::info!("Importing the settings."); log::info!("Importing the settings.");
let settings = index_reader.settings()?; let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder); apply_settings_to_builder(&settings, &mut builder);
builder builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
// 4.3 Import the documents. // 4.3 Import the documents.
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index. // 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents."); log::info!("Importing the documents.");
let file = tempfile::tempfile()?; let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? { for document in index_reader.documents()? {
@@ -431,16 +363,15 @@ fn import_dump(
update_method: IndexDocumentsMethod::ReplaceDocuments, update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default() ..Default::default()
}, },
|indexing_step| tracing::trace!("update: {:?}", indexing_step), |indexing_step| log::trace!("update: {:?}", indexing_step),
|| false, || false,
)?; )?;
let (builder, user_result) = builder.add_documents(reader)?; let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?; log::info!("{} documents found.", user_result?);
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?; builder.execute()?;
wtxn.commit()?; wtxn.commit()?;
tracing::info!("All documents successfully imported."); log::info!("All documents successfully imported.");
} }
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
@@ -458,7 +389,6 @@ pub fn configure_data(
index_scheduler: Data<IndexScheduler>, index_scheduler: Data<IndexScheduler>,
auth: Data<AuthController>, auth: Data<AuthController>,
opt: &Opt, opt: &Opt,
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>, analytics: Arc<dyn Analytics>,
) { ) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
@@ -466,8 +396,6 @@ pub fn configure_data(
.app_data(index_scheduler) .app_data(index_scheduler)
.app_data(auth) .app_data(auth)
.app_data(web::Data::from(analytics)) .app_data(web::Data::from(analytics))
.app_data(web::Data::new(logs_route))
.app_data(web::Data::new(logs_stderr))
.app_data( .app_data(
web::JsonConfig::default() web::JsonConfig::default()
.limit(http_payload_size_limit) .limit(http_payload_size_limit)

View File

@@ -1,7 +1,6 @@
use std::env; use std::env;
use std::io::{stderr, LineWriter, Write}; use std::io::{stderr, Write};
use std::path::PathBuf; use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use actix_web::http::KeepAlive; use actix_web::http::KeepAlive;
@@ -10,78 +9,37 @@ use actix_web::HttpServer;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use is_terminal::IsTerminal; use is_terminal::IsTerminal;
use meilisearch::analytics::Analytics; use meilisearch::analytics::Analytics;
use meilisearch::option::LogMode; use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
use meilisearch::{
analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType,
LogStderrHandle, LogStderrType, Opt, SubscriberForSecondLayer,
};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use mimalloc::MiMalloc;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_subscriber::Layer;
#[global_allocator] #[global_allocator]
static ALLOC: MiMalloc = MiMalloc; static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn default_log_route_layer() -> LogRouteType {
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
}
fn default_log_stderr_layer(opt: &Opt) -> LogStderrType {
let layer = tracing_subscriber::fmt::layer()
.with_writer(|| LineWriter::new(std::io::stderr()))
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
let layer = match opt.experimental_logs_mode {
LogMode::Human => Box::new(layer)
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
LogMode::Json => Box::new(layer.json())
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
};
layer.with_filter(
tracing_subscriber::filter::Targets::new()
.with_target("", LevelFilter::from_str(&opt.log_level.to_string()).unwrap()),
)
}
/// does all the setup before meilisearch is launched /// does all the setup before meilisearch is launched
fn setup(opt: &Opt) -> anyhow::Result<(LogRouteHandle, LogStderrHandle)> { fn setup(opt: &Opt) -> anyhow::Result<()> {
let (route_layer, route_layer_handle) = let mut log_builder = env_logger::Builder::new();
tracing_subscriber::reload::Layer::new(default_log_route_layer()); let log_filters = format!(
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer; "{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn",
opt.log_level
);
log_builder.parse_filters(&log_filters);
let (stderr_layer, stderr_layer_handle) = log_builder.init();
tracing_subscriber::reload::Layer::new(default_log_stderr_layer(opt));
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
let subscriber = tracing_subscriber::registry().with(route_layer).with(stderr_layer); Ok(())
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
Ok((route_layer_handle, stderr_layer_handle))
}
fn on_panic(info: &std::panic::PanicInfo) {
let info = info.to_string().replace('\n', " ");
tracing::error!(%info);
} }
#[actix_web::main] #[actix_web::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?; let (opt, config_read_from) = Opt::try_build()?;
std::panic::set_hook(Box::new(on_panic));
anyhow::ensure!( anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
); );
let log_handle = setup(&opt)?; setup(&opt)?;
match (opt.env.as_ref(), &opt.master_key) { match (opt.env.as_ref(), &opt.master_key) {
("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => { ("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => {
@@ -119,7 +77,7 @@ async fn main() -> anyhow::Result<()> {
print_launch_resume(&opt, analytics.clone(), config_read_from); print_launch_resume(&opt, analytics.clone(), config_read_from);
run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?; run_http(index_scheduler, auth_controller, opt, analytics).await?;
Ok(()) Ok(())
} }
@@ -128,7 +86,6 @@ async fn run_http(
index_scheduler: Arc<IndexScheduler>, index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>, auth_controller: Arc<AuthController>,
opt: Opt, opt: Opt,
logs: (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>, analytics: Arc<dyn Analytics>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let enable_dashboard = &opt.env == "development"; let enable_dashboard = &opt.env == "development";
@@ -141,7 +98,6 @@ async fn run_http(
index_scheduler.clone(), index_scheduler.clone(),
auth_controller.clone(), auth_controller.clone(),
opt.clone(), opt.clone(),
logs.clone(),
analytics.clone(), analytics.clone(),
enable_dashboard, enable_dashboard,
) )

View File

@@ -20,8 +20,7 @@ use rustls::server::{
use rustls::RootCertStore; use rustls::RootCertStore;
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use sysinfo::{MemoryRefreshKind, RefreshKind, System}; use sysinfo::{RefreshKind, System, SystemExt};
use url::Url;
const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
@@ -29,8 +28,6 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR"; const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY"; const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
const MEILI_ENV: &str = "MEILI_ENV"; const MEILI_ENV: &str = "MEILI_ENV";
const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
#[cfg(feature = "analytics")] #[cfg(feature = "analytics")]
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS"; const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT"; const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@@ -51,8 +48,6 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE";
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
@@ -80,39 +75,6 @@ const DEFAULT_LOG_EVERY_N: usize = 100_000;
pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB
pub const TASK_DB_SIZE: u64 = 20 * 1024 * 1024 * 1024; // 20 GiB pub const TASK_DB_SIZE: u64 = 20 * 1024 * 1024 * 1024; // 20 GiB
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]
pub enum LogMode {
#[default]
Human,
Json,
}
impl Display for LogMode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
LogMode::Human => Display::fmt("HUMAN", f),
LogMode::Json => Display::fmt("JSON", f),
}
}
}
impl FromStr for LogMode {
type Err = LogModeError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.trim().to_lowercase().as_str() {
"human" => Ok(LogMode::Human),
"json" => Ok(LogMode::Json),
_ => Err(LogModeError(s.to_owned())),
}
}
}
#[derive(Debug, thiserror::Error)]
#[error("Unsupported log mode level `{0}`. Supported values are `HUMAN` and `JSON`.")]
pub struct LogModeError(String);
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] #[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")] #[serde(rename_all = "UPPERCASE")]
pub enum LogLevel { pub enum LogLevel {
@@ -194,14 +156,6 @@ pub struct Opt {
#[serde(default = "default_env")] #[serde(default = "default_env")]
pub env: String, pub env: String,
/// Called whenever a task finishes so a third party can be notified.
#[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
pub task_webhook_url: Option<Url>,
/// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified.
#[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
pub task_webhook_authorization_header: Option<String>,
/// Deactivates Meilisearch's built-in telemetry when provided. /// Deactivates Meilisearch's built-in telemetry when provided.
/// ///
/// Meilisearch automatically collects data from all instances that do not opt out using this flag. /// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@@ -344,20 +298,6 @@ pub struct Opt {
#[serde(default)] #[serde(default)]
pub experimental_enable_metrics: bool, pub experimental_enable_metrics: bool,
/// Experimental logs mode feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/723>
///
/// Change the mode of the logs on the console.
#[clap(long, env = MEILI_EXPERIMENTAL_LOGS_MODE, default_value_t)]
#[serde(default)]
pub experimental_logs_mode: LogMode,
/// Experimental logs route feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/721>
///
/// Enables the log routes on the `POST /logs/stream`, `POST /logs/stderr` endpoints, and the `DELETE /logs/stream` to stop receiving logs.
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)]
#[serde(default)]
pub experimental_enable_logs_route: bool,
/// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652> /// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
#[serde(default)] #[serde(default)]
@@ -435,8 +375,6 @@ impl Opt {
http_addr, http_addr,
master_key, master_key,
env, env,
task_webhook_url,
task_webhook_authorization_header,
max_index_size: _, max_index_size: _,
max_task_db_size: _, max_task_db_size: _,
http_payload_size_limit, http_payload_size_limit,
@@ -463,8 +401,6 @@ impl Opt {
#[cfg(feature = "analytics")] #[cfg(feature = "analytics")]
no_analytics, no_analytics,
experimental_enable_metrics, experimental_enable_metrics,
experimental_logs_mode,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
} = self; } = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_DB_PATH, db_path);
@@ -473,16 +409,6 @@ impl Opt {
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key); export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
} }
export_to_env_if_not_present(MEILI_ENV, env); export_to_env_if_not_present(MEILI_ENV, env);
if let Some(task_webhook_url) = task_webhook_url {
export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
}
if let Some(task_webhook_authorization_header) = task_webhook_authorization_header {
export_to_env_if_not_present(
MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER,
task_webhook_authorization_header,
);
}
#[cfg(feature = "analytics")] #[cfg(feature = "analytics")]
{ {
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
@@ -521,14 +447,6 @@ impl Opt {
MEILI_EXPERIMENTAL_ENABLE_METRICS, MEILI_EXPERIMENTAL_ENABLE_METRICS,
experimental_enable_metrics.to_string(), experimental_enable_metrics.to_string(),
); );
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_LOGS_MODE,
experimental_logs_mode.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE,
experimental_enable_logs_route.to_string(),
);
export_to_env_if_not_present( export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
experimental_reduce_indexing_memory_usage.to_string(), experimental_reduce_indexing_memory_usage.to_string(),
@@ -583,10 +501,7 @@ impl Opt {
} }
pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures { pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures {
InstanceTogglableFeatures { InstanceTogglableFeatures { metrics: self.experimental_enable_metrics }
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
}
} }
} }
@@ -695,8 +610,8 @@ impl MaxMemory {
/// Returns the total amount of bytes available or `None` if this system isn't supported. /// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> { fn total_memory_bytes() -> Option<u64> {
if sysinfo::IS_SUPPORTED_SYSTEM { if System::IS_SUPPORTED {
let memory_kind = RefreshKind::new().with_memory(MemoryRefreshKind::new().with_ram()); let memory_kind = RefreshKind::new().with_memory();
let mut system = System::new_with_specifics(memory_kind); let mut system = System::new_with_specifics(memory_kind);
system.refresh_memory(); system.refresh_memory();
Some(system.total_memory()) Some(system.total_memory())

View File

@@ -1,11 +1,11 @@
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@@ -32,6 +32,6 @@ pub async fn create_dump(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create dump"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }

View File

@@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use deserr::Deserr; use deserr::Deserr;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::policies::ActionPolicy;
@@ -33,21 +33,20 @@ async fn get_features(
let features = index_scheduler.features(); let features = index_scheduler.features();
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
let features = features.runtime_features(); debug!("returns: {:?}", features.runtime_features());
debug!(returns = ?features, "Get features"); HttpResponse::Ok().json(features.runtime_features())
HttpResponse::Ok().json(features)
} }
#[derive(Debug, Deserr)] #[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct RuntimeTogglableFeatures { pub struct RuntimeTogglableFeatures {
#[deserr(default)]
pub score_details: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub vector_store: Option<bool>, pub vector_store: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub metrics: Option<bool>, pub metrics: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub logs_route: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>, pub export_puffin_reports: Option<bool>,
} }
@@ -61,13 +60,12 @@ async fn patch_features(
analytics: Data<dyn Analytics>, analytics: Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features(); let features = index_scheduler.features();
debug!(parameters = ?new_features, "Patch features");
let old_features = features.runtime_features(); let old_features = features.runtime_features();
let new_features = meilisearch_types::features::RuntimeTogglableFeatures { let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics), metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
export_puffin_reports: new_features export_puffin_reports: new_features
.0 .0
.export_puffin_reports .export_puffin_reports
@@ -78,23 +76,22 @@ async fn patch_features(
// the it renames to camelCase, which we don't want for analytics. // the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures { let meilisearch_types::features::RuntimeTogglableFeatures {
score_details,
vector_store, vector_store,
metrics, metrics,
logs_route,
export_puffin_reports, export_puffin_reports,
} = new_features; } = new_features;
analytics.publish( analytics.publish(
"Experimental features Updated".to_string(), "Experimental features Updated".to_string(),
json!({ json!({
"score_details": score_details,
"vector_store": vector_store, "vector_store": vector_store,
"metrics": metrics, "metrics": metrics,
"logs_route": logs_route,
"export_puffin_reports": export_puffin_reports, "export_puffin_reports": export_puffin_reports,
}), }),
Some(&req), Some(&req),
); );
index_scheduler.put_runtime_features(new_features)?; index_scheduler.put_runtime_features(new_features)?;
debug!(returns = ?new_features, "Patch features");
Ok(HttpResponse::Ok().json(new_features)) Ok(HttpResponse::Ok().json(new_features))
} }

View File

@@ -8,6 +8,7 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr; use deserr::Deserr;
use futures::StreamExt; use futures::StreamExt;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
@@ -27,7 +28,6 @@ use serde_json::Value;
use tempfile::tempfile; use tempfile::tempfile;
use tokio::fs::File; use tokio::fs::File;
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
use tracing::debug;
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
@@ -101,7 +101,6 @@ pub async fn get_document(
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner(); let DocumentParam { index_uid, document_id } = document_param.into_inner();
debug!(parameters = ?params, "Get document");
let index_uid = IndexUid::try_from(index_uid)?; let index_uid = IndexUid::try_from(index_uid)?;
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req); analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
@@ -111,7 +110,7 @@ pub async fn get_document(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?; let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
debug!(returns = ?document, "Get document"); debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document)) Ok(HttpResponse::Ok().json(document))
} }
@@ -132,7 +131,7 @@ pub async fn delete_document(
}; };
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete document"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -169,8 +168,9 @@ pub async fn documents_by_query_post(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!("called with body: {:?}", body);
let body = body.into_inner(); let body = body.into_inner();
debug!(parameters = ?body, "Get documents POST");
analytics.post_fetch_documents( analytics.post_fetch_documents(
&DocumentFetchKind::Normal { &DocumentFetchKind::Normal {
@@ -191,7 +191,7 @@ pub async fn get_documents(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET"); debug!("called with params: {:?}", params);
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner(); let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
@@ -235,7 +235,7 @@ fn documents_by_query(
let ret = PaginationView::new(offset, limit, total as usize, documents); let ret = PaginationView::new(offset, limit, total as usize, documents);
debug!(returns = ?ret, "Get documents"); debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret)) Ok(HttpResponse::Ok().json(ret))
} }
@@ -271,7 +271,7 @@ pub async fn replace_documents(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!(parameters = ?params, "Replace documents"); debug!("called with params: {:?}", params);
let params = params.into_inner(); let params = params.into_inner();
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req); analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@@ -288,7 +288,6 @@ pub async fn replace_documents(
allow_index_creation, allow_index_creation,
) )
.await?; .await?;
debug!(returns = ?task, "Replace documents");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -303,8 +302,8 @@ pub async fn update_documents(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner(); let params = params.into_inner();
debug!(parameters = ?params, "Update documents");
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req); analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@@ -320,7 +319,6 @@ pub async fn update_documents(
allow_index_creation, allow_index_creation,
) )
.await?; .await?;
debug!(returns = ?task, "Update documents");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -429,10 +427,7 @@ async fn document_addition(
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
if e.kind() == ErrorKind::NotFound => {} if e.kind() == ErrorKind::NotFound => {}
Err(e) => { Err(e) => {
tracing::warn!( log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
index_uuid = %uuid,
"Unknown error happened while deleting a malformed update file: {e}"
);
} }
} }
// We still want to return the original error to the end user. // We still want to return the original error to the end user.
@@ -458,6 +453,7 @@ async fn document_addition(
} }
}; };
debug!("returns: {:?}", task);
Ok(task.into()) Ok(task.into())
} }
@@ -468,7 +464,7 @@ pub async fn delete_documents_batch(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by batch"); debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
@@ -483,7 +479,7 @@ pub async fn delete_documents_batch(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete documents by batch"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -501,7 +497,7 @@ pub async fn delete_documents_by_filter(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by filter"); debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner(); let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter; let filter = body.into_inner().filter;
@@ -519,7 +515,7 @@ pub async fn delete_documents_by_filter(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete documents by filter"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -536,7 +532,7 @@ pub async fn clear_all_documents(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete all documents"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }

View File

@@ -2,12 +2,12 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use serde_json::Value; use serde_json::Value;
use tracing::debug;
use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::analytics::{Analytics, FacetSearchAggregator};
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@@ -56,7 +56,7 @@ pub async fn search(
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner(); let query = params.into_inner();
debug!(parameters = ?query, "Facet search"); debug!("facet search called with params: {:?}", query);
let mut aggregate = FacetSearchAggregator::from_query(&query, &req); let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
@@ -83,7 +83,7 @@ pub async fn search(
let search_result = search_result?; let search_result = search_result?;
debug!(returns = ?search_result, "Facet search"); debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result)) Ok(HttpResponse::Ok().json(search_result))
} }

View File

@@ -5,6 +5,7 @@ use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef}; use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
@@ -15,7 +16,6 @@ use meilisearch_types::tasks::KindWithContent;
use serde::Serialize; use serde::Serialize;
use serde_json::json; use serde_json::json;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::Analytics; use crate::analytics::Analytics;
@@ -93,7 +93,6 @@ pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>, paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?paginate, "List indexes");
let filters = index_scheduler.filters(); let filters = index_scheduler.filters();
let indexes: Vec<Option<IndexView>> = let indexes: Vec<Option<IndexView>> =
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> { index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
@@ -106,7 +105,7 @@ pub async fn list_indexes(
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect(); let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter()); let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
debug!(returns = ?ret, "List indexes"); debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret)) Ok(HttpResponse::Ok().json(ret))
} }
@@ -125,7 +124,6 @@ pub async fn create_index(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Create index");
let IndexCreateRequest { primary_key, uid } = body.into_inner(); let IndexCreateRequest { primary_key, uid } = body.into_inner();
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
@@ -139,7 +137,6 @@ pub async fn create_index(
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create index");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} else { } else {
@@ -180,7 +177,7 @@ pub async fn get_index(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let index_view = IndexView::new(index_uid.into_inner(), &index)?; let index_view = IndexView::new(index_uid.into_inner(), &index)?;
debug!(returns = ?index_view, "Get index"); debug!("returns: {:?}", index_view);
Ok(HttpResponse::Ok().json(index_view)) Ok(HttpResponse::Ok().json(index_view))
} }
@@ -192,7 +189,7 @@ pub async fn update_index(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Update index"); debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner(); let body = body.into_inner();
analytics.publish( analytics.publish(
@@ -209,7 +206,7 @@ pub async fn update_index(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Update index"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -221,7 +218,6 @@ pub async fn delete_index(
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete index");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -259,6 +255,6 @@ pub async fn get_index_stats(
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?); let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
debug!(returns = ?stats, "Get index stats"); debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats)) Ok(HttpResponse::Ok().json(stats))
} }

View File

@@ -2,6 +2,7 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::{debug, warn};
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
@@ -11,7 +12,6 @@ use meilisearch_types::milli;
use meilisearch_types::milli::vector::DistributionShift; use meilisearch_types::milli::vector::DistributionShift;
use meilisearch_types::serde_cs::vec::CS; use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value; use serde_json::Value;
use tracing::{debug, warn};
use crate::analytics::{Analytics, SearchAggregator}; use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@@ -186,7 +186,7 @@ pub async fn search_with_url_query(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Search get"); debug!("called with params: {:?}", params);
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().into(); let mut query: SearchQuery = params.into_inner().into();
@@ -213,7 +213,7 @@ pub async fn search_with_url_query(
let search_result = search_result?; let search_result = search_result?;
debug!(returns = ?search_result, "Search get"); debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result)) Ok(HttpResponse::Ok().json(search_result))
} }
@@ -227,7 +227,7 @@ pub async fn search_with_post(
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query = params.into_inner(); let mut query = params.into_inner();
debug!(parameters = ?query, "Search post"); debug!("search called with params: {:?}", query);
// Tenant token search_rules. // Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
@@ -252,7 +252,7 @@ pub async fn search_with_post(
let search_result = search_result?; let search_result = search_result?;
debug!(returns = ?search_result, "Search post"); debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result)) Ok(HttpResponse::Ok().json(search_result))
} }

View File

@@ -2,6 +2,7 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::facet_values_sort::FacetValuesSort;
@@ -10,7 +11,6 @@ use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked}; use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@@ -24,12 +24,12 @@ macro_rules! make_setting_route {
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse, Resource}; use actix_web::{web, HttpRequest, HttpResponse, Resource};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, Settings}; use meilisearch_types::settings::{settings, Settings};
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use $crate::analytics::Analytics; use $crate::analytics::Analytics;
use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::policies::*;
use $crate::extractors::authentication::GuardedData; use $crate::extractors::authentication::GuardedData;
@@ -61,7 +61,7 @@ macro_rules! make_setting_route {
.await?? .await??
.into(); .into();
debug!(returns = ?task, "Delete settings"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -78,7 +78,6 @@ macro_rules! make_setting_route {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner(); let body = body.into_inner();
debug!(parameters = ?body, "Update settings");
#[allow(clippy::redundant_closure_call)] #[allow(clippy::redundant_closure_call)]
$analytics(&body, &req); $analytics(&body, &req);
@@ -91,11 +90,6 @@ macro_rules! make_setting_route {
..Default::default() ..Default::default()
}; };
let new_settings = $crate::routes::indexes::settings::validate_settings(
new_settings,
&index_scheduler,
)?;
let allow_index_creation = let allow_index_creation =
index_scheduler.filters().allow_index_creation(&index_uid); index_scheduler.filters().allow_index_creation(&index_uid);
@@ -110,7 +104,7 @@ macro_rules! make_setting_route {
.await?? .await??
.into(); .into();
debug!(returns = ?task, "Update settings"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -127,7 +121,7 @@ macro_rules! make_setting_route {
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let settings = settings(&index, &rtxn)?; let settings = settings(&index, &rtxn)?;
debug!(returns = ?settings, "Update settings"); debug!("returns: {:?}", settings);
let mut json = serde_json::json!(&settings); let mut json = serde_json::json!(&settings);
let val = json[$camelcase_attr].take(); let val = json[$camelcase_attr].take();
@@ -459,7 +453,7 @@ make_setting_route!(
json!({ json!({
"proximity_precision": { "proximity_precision": {
"set": precision.is_some(), "set": precision.is_some(),
"value": precision.unwrap_or_default(), "value": precision,
} }
}), }),
Some(req), Some(req),
@@ -588,13 +582,13 @@ fn embedder_analytics(
for source in s for source in s
.values() .values()
.filter_map(|config| config.clone().set()) .filter_map(|config| config.clone().set())
.filter_map(|config| config.source.set()) .filter_map(|config| config.embedder_options.set())
{ {
use meilisearch_types::milli::vector::settings::EmbedderSource; use meilisearch_types::milli::vector::settings::EmbedderSettings;
match source { match source {
EmbedderSource::OpenAi => sources.insert("openAi"), EmbedderSettings::OpenAi(_) => sources.insert("openAi"),
EmbedderSource::HuggingFace => sources.insert("huggingFace"), EmbedderSettings::HuggingFace(_) => sources.insert("huggingFace"),
EmbedderSource::UserProvided => sources.insert("userProvided"), EmbedderSettings::UserProvided(_) => sources.insert("userProvided"),
}; };
} }
}; };
@@ -657,8 +651,6 @@ pub async fn update_all(
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let new_settings = body.into_inner(); let new_settings = body.into_inner();
debug!(parameters = ?new_settings, "Update all settings");
let new_settings = validate_settings(new_settings, &index_scheduler)?;
analytics.publish( analytics.publish(
"Settings Updated".to_string(), "Settings Updated".to_string(),
@@ -692,8 +684,7 @@ pub async fn update_all(
"set": new_settings.distinct_attribute.as_ref().set().is_some() "set": new_settings.distinct_attribute.as_ref().set().is_some()
}, },
"proximity_precision": { "proximity_precision": {
"set": new_settings.proximity_precision.as_ref().set().is_some(), "set": new_settings.proximity_precision.as_ref().set().is_some()
"value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
}, },
"typo_tolerance": { "typo_tolerance": {
"enabled": new_settings.typo_tolerance "enabled": new_settings.typo_tolerance
@@ -770,7 +761,7 @@ pub async fn update_all(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Update all settings"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@@ -783,7 +774,7 @@ pub async fn get_all(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let new_settings = settings(&index, &rtxn)?; let new_settings = settings(&index, &rtxn)?;
debug!(returns = ?new_settings, "Get all settings"); debug!("returns: {:?}", new_settings);
Ok(HttpResponse::Ok().json(new_settings)) Ok(HttpResponse::Ok().json(new_settings))
} }
@@ -806,16 +797,6 @@ pub async fn delete_all(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete all settings"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
fn validate_settings(
settings: Settings<Unchecked>,
index_scheduler: &IndexScheduler,
) -> Result<Settings<Unchecked>, ResponseError> {
if matches!(settings.embedders, Setting::Set(_)) {
index_scheduler.features().check_vector("Passing `embedders` in settings")?
}
Ok(settings.validate()?)
}

View File

@@ -1,318 +0,0 @@
use std::convert::Infallible;
use std::io::Write;
use std::ops::ControlFlow;
use std::pin::Pin;
use std::str::FromStr;
use std::sync::Arc;
use actix_web::web::{Bytes, Data};
use actix_web::{web, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use futures_util::Stream;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use tokio::sync::mpsc;
use tracing_subscriber::filter::Targets;
use tracing_subscriber::Layer;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::{LogRouteHandle, LogStderrHandle};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("stream")
.route(web::post().to(SeqHandler(get_logs)))
.route(web::delete().to(SeqHandler(cancel_logs))),
)
.service(web::resource("stderr").route(web::post().to(SeqHandler(update_stderr_target))));
}
#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)]
#[deserr(rename_all = camelCase)]
pub enum LogMode {
#[default]
Human,
Json,
Profile,
}
/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Clone, Debug)]
struct MyTargets(Targets);
/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Debug, thiserror::Error)]
enum MyParseError {
#[error(transparent)]
ParseError(#[from] tracing_subscriber::filter::ParseError),
#[error(
"Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`"
)]
Example,
}
impl FromStr for MyTargets {
type Err = MyParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.is_empty() {
Err(MyParseError::Example)
} else {
Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?))
}
}
}
impl MergeWithError<MyParseError> for DeserrJsonError<BadRequest> {
fn merge(
_self_: Option<Self>,
other: MyParseError,
merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
Self::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: other.to_string() },
merge_location,
)
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub struct GetLogs {
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
target: MyTargets,
#[deserr(default, error = DeserrJsonError<BadRequest>)]
mode: LogMode,
#[deserr(default = false, error = DeserrJsonError<BadRequest>)]
profile_memory: bool,
}
fn validate_get_logs<E: DeserializeError>(
logs: GetLogs,
location: ValuePointerRef,
) -> Result<GetLogs, E> {
if logs.profile_memory && logs.mode != LogMode::Profile {
Err(deserr::take_cf_content(E::error::<Infallible>(
None,
ErrorKind::Unexpected {
msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode),
},
location,
)))
} else {
Ok(logs)
}
}
struct LogWriter {
sender: mpsc::UnboundedSender<Vec<u8>>,
}
impl Write for LogWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?;
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
struct HandleGuard {
/// We need to keep an handle on the logs to make it available again when the streamer is dropped
logs: Arc<LogRouteHandle>,
}
impl Drop for HandleGuard {
fn drop(&mut self) {
if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
}
}
fn byte_stream(
receiver: mpsc::UnboundedReceiver<Vec<u8>>,
guard: HandleGuard,
) -> impl futures_util::Stream<Item = Result<Bytes, ResponseError>> {
futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move {
let vec = receiver.recv().await;
vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard)))
})
}
type PinnedByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, ResponseError>>>>;
fn make_layer<
S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>,
>(
opt: &GetLogs,
logs: Data<LogRouteHandle>,
) -> (Box<dyn Layer<S> + Send + Sync>, PinnedByteStream) {
let guard = HandleGuard { logs: logs.into_inner() };
match opt.mode {
LogMode::Human => {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(move || LogWriter { sender: sender.clone() })
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
let stream = byte_stream(receiver, guard);
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
LogMode::Json => {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(move || LogWriter { sender: sender.clone() })
.json()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
let stream = byte_stream(receiver, guard);
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
LogMode::Profile => {
let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory);
let stream = entry_stream(trace, guard);
(Box::new(layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
}
}
fn entry_stream(
trace: tracing_trace::Trace,
guard: HandleGuard,
) -> impl Stream<Item = Result<Bytes, ResponseError>> {
let receiver = trace.into_receiver();
let entry_buf = Vec::new();
futures_util::stream::unfold(
(receiver, entry_buf, guard),
move |(mut receiver, mut entry_buf, guard)| async move {
let mut bytes = Vec::new();
while bytes.len() < 8192 {
entry_buf.clear();
let Ok(count) = tokio::time::timeout(
std::time::Duration::from_secs(1),
receiver.recv_many(&mut entry_buf, 100),
)
.await
else {
break;
};
if count == 0 {
if !bytes.is_empty() {
break;
}
// channel closed, exit
return None;
}
for entry in &entry_buf {
if let Err(error) = serde_json::to_writer(&mut bytes, entry) {
tracing::error!(
error = &error as &dyn std::error::Error,
"deserializing entry"
);
return Some((
Err(ResponseError::from_msg(
format!("error deserializing entry: {error}"),
Code::Internal,
)),
(receiver, entry_buf, guard),
));
}
}
}
Some((Ok(bytes.into()), (receiver, entry_buf, guard)))
},
)
}
pub async fn get_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
body: AwebJson<GetLogs, DeserrJsonError>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
let opt = body.into_inner();
let mut stream = None;
logs.modify(|layer| match layer.inner_mut() {
None => {
// there is no one getting logs
*layer.filter_mut() = opt.target.0.clone();
let (new_layer, new_stream) = make_layer(&opt, logs.clone());
*layer.inner_mut() = Some(new_layer);
stream = Some(new_stream);
}
Some(_) => {
// there is already someone getting logs
}
})
.unwrap();
if let Some(stream) = stream {
Ok(HttpResponse::Ok().streaming(stream))
} else {
Err(MeilisearchHttpError::AlreadyUsedLogRoute.into())
}
}
pub async fn cancel_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
Ok(HttpResponse::NoContent().finish())
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct UpdateStderrLogs {
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
target: MyTargets,
}
pub async fn update_stderr_target(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogStderrHandle>,
body: AwebJson<UpdateStderrLogs, DeserrJsonError>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
let opt = body.into_inner();
logs.modify(|layer| {
*layer.filter_mut() = opt.target.0.clone();
})
.unwrap();
Ok(HttpResponse::NoContent().finish())
}

View File

@@ -3,6 +3,7 @@ use std::collections::BTreeMap;
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::settings::{Settings, Unchecked};
@@ -10,7 +11,6 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::json; use serde_json::json;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@@ -22,7 +22,6 @@ mod api_key;
mod dump; mod dump;
pub mod features; pub mod features;
pub mod indexes; pub mod indexes;
mod logs;
mod metrics; mod metrics;
mod multi_search; mod multi_search;
mod snapshot; mod snapshot;
@@ -32,7 +31,6 @@ pub mod tasks;
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::scope("/tasks").configure(tasks::configure)) cfg.service(web::scope("/tasks").configure(tasks::configure))
.service(web::resource("/health").route(web::get().to(get_health))) .service(web::resource("/health").route(web::get().to(get_health)))
.service(web::scope("/logs").configure(logs::configure))
.service(web::scope("/keys").configure(api_key::configure)) .service(web::scope("/keys").configure(api_key::configure))
.service(web::scope("/dumps").configure(dump::configure)) .service(web::scope("/dumps").configure(dump::configure))
.service(web::scope("/snapshots").configure(snapshot::configure)) .service(web::scope("/snapshots").configure(snapshot::configure))
@@ -252,7 +250,7 @@ async fn get_stats(
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?; let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
debug!(returns = ?stats, "Get stats"); debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats)) Ok(HttpResponse::Ok().json(stats))
} }

View File

@@ -3,11 +3,11 @@ use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse}; use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use serde::Serialize; use serde::Serialize;
use tracing::debug;
use crate::analytics::{Analytics, MultiSearchAggregator}; use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::policies::ActionPolicy;
@@ -52,7 +52,7 @@ pub async fn multi_search_with_post(
for (query_index, (index_uid, mut query)) in for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
{ {
debug!(on_index = query_index, parameters = ?query, "Multi-search"); debug!("multi-search #{query_index}: called with params: {:?}", query);
// Check index from API key // Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) { if !index_scheduler.filters().is_index_authorized(&index_uid) {
@@ -107,7 +107,7 @@ pub async fn multi_search_with_post(
err err
})?; })?;
debug!(returns = ?search_results, "Multi-search"); debug!("returns: {:?}", search_results);
Ok(HttpResponse::Ok().json(SearchResults { results: search_results })) Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
} }

View File

@@ -1,10 +1,10 @@
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@@ -27,6 +27,6 @@ pub async fn create_snapshot(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create snapshot"); debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }

View File

@@ -8,9 +8,11 @@ use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{InvalidTaskDateError, ResponseError}; use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList}; use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
use meilisearch_types::task_view::TaskView; use meilisearch_types::tasks::{
use meilisearch_types::tasks::{Kind, KindWithContent, Status}; serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
};
use serde::Serialize; use serde::Serialize;
use serde_json::json; use serde_json::json;
use time::format_description::well_known::Rfc3339; use time::format_description::well_known::Rfc3339;
@@ -35,6 +37,140 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks)))) .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))); .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
} }
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskView {
pub uid: TaskId,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
#[serde(rename = "type")]
pub kind: Kind,
pub canceled_by: Option<TaskId>,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<DetailsView>,
pub error: Option<ResponseError>,
#[serde(serialize_with = "serialize_duration", default)]
pub duration: Option<Duration>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option", default)]
pub started_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
}
impl TaskView {
pub fn from_task(task: &Task) -> TaskView {
TaskView {
uid: task.uid,
index_uid: task.index_uid().map(ToOwned::to_owned),
status: task.status,
kind: task.kind.as_kind(),
canceled_by: task.canceled_by,
details: task.details.clone().map(DetailsView::from),
error: task.error.clone(),
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
}
}
}
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub received_documents: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexed_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub provided_ids: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub matched_tasks: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub canceled_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_filter: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub swaps: Option<Vec<IndexSwap>>,
}
impl From<Details> for DetailsView {
fn from(details: Details) -> Self {
match details {
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
DetailsView {
received_documents: Some(received_documents),
indexed_documents: Some(indexed_documents),
..DetailsView::default()
}
}
Details::SettingsUpdate { settings } => {
DetailsView { settings: Some(settings), ..DetailsView::default() }
}
Details::IndexInfo { primary_key } => {
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
}
Details::DocumentDeletion {
provided_ids: received_document_ids,
deleted_documents,
} => DetailsView {
provided_ids: Some(received_document_ids),
deleted_documents: Some(deleted_documents),
original_filter: Some(None),
..DetailsView::default()
},
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
DetailsView {
provided_ids: Some(0),
original_filter: Some(Some(original_filter)),
deleted_documents: Some(deleted_documents),
..DetailsView::default()
}
}
Details::ClearAll { deleted_documents } => {
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
}
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
canceled_tasks: Some(canceled_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
DetailsView {
matched_tasks: Some(matched_tasks),
deleted_tasks: Some(deleted_tasks),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
Details::Dump { dump_uid } => {
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
}
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
}
}
}
#[derive(Debug, Deserr)] #[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TasksFilterQuery { pub struct TasksFilterQuery {

View File

@@ -441,6 +441,10 @@ fn prepare_search<'t>(
ScoringStrategy::Skip ScoringStrategy::Skip
}); });
if query.show_ranking_score_details {
features.check_score_details()?;
}
if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid { if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid {
search.embedder_name(embedder); search.embedder_name(embedder);
} }
@@ -731,9 +735,6 @@ pub fn perform_facet_search(
if let Some(facet_query) = &facet_query { if let Some(facet_query) = &facet_query {
facet_search.query(facet_query); facet_search.query(facet_query);
} }
if let Some(max_facets) = index.max_values_per_facet(&rtxn)? {
facet_search.max_values(max_facets as usize);
}
Ok(FacetSearchResult { Ok(FacetSearchResult {
facet_hits: facet_search.execute()?, facet_hits: facet_search.execute()?,
@@ -896,14 +897,6 @@ fn format_fields<'a>(
let mut matches_position = compute_matches.then(BTreeMap::new); let mut matches_position = compute_matches.then(BTreeMap::new);
let mut document = document.clone(); let mut document = document.clone();
// reduce the formatted option list to the attributes that should be formatted,
// instead of all the attributes to display.
let formatting_fields_options: Vec<_> = formatted_options
.iter()
.filter(|(_, option)| option.should_format())
.map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
.collect();
// select the attributes to retrieve // select the attributes to retrieve
let displayable_names = let displayable_names =
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name")); displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
@@ -912,15 +905,13 @@ fn format_fields<'a>(
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo` // to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
// highlighted. // highlighted.
// Warn: The time to compute the format list scales with the number of fields to format; let format = formatted_options
// cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
// d*f where d is the total number of fields to display and f is the total number of fields to format.
let format = formatting_fields_options
.iter() .iter()
.filter(|(name, _option)| { .filter(|(field, _option)| {
let name = field_ids_map.name(**field).unwrap();
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name) milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
}) })
.map(|(_, option)| **option) .map(|(_, option)| *option)
.reduce(|acc, option| acc.merge(option)); .reduce(|acc, option| acc.merge(option));
let mut infos = Vec::new(); let mut infos = Vec::new();
@@ -1017,7 +1008,7 @@ fn format_value<'a>(
let value = matcher.format(format_options); let value = matcher.format(format_options);
Value::String(value.into_owned()) Value::String(value.into_owned())
} }
None => Value::String(s), None => Value::Number(number),
} }
} }
value => value, value => value,

View File

@@ -59,8 +59,6 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"}, ("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"},
("GET", "/version") => hashset!{"version", "*"}, ("GET", "/version") => hashset!{"version", "*"},
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"}, ("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"}, ("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"}, ("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"}, ("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},

View File

@@ -64,7 +64,7 @@ impl Display for Value {
write!( write!(
f, f,
"{}", "{}",
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }) json_string!(self, { ".enqueuedAt" => "[date]", ".processedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
) )
} }
} }

View File

@@ -9,12 +9,10 @@ use actix_web::http::StatusCode;
use byte_unit::{Byte, ByteUnit}; use byte_unit::{Byte, ByteUnit};
use clap::Parser; use clap::Parser;
use meilisearch::option::{IndexerOpts, MaxMemory, Opt}; use meilisearch::option::{IndexerOpts, MaxMemory, Opt};
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer}; use meilisearch::{analytics, create_app, setup_meilisearch};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use tempfile::TempDir; use tempfile::TempDir;
use tokio::time::sleep; use tokio::time::sleep;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use super::index::Index; use super::index::Index;
use super::service::Service; use super::service::Service;
@@ -83,24 +81,10 @@ impl Server {
Response = ServiceResponse<impl MessageBody>, Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error, Error = actix_web::Error,
> { > {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let (_stderr_layer, stderr_layer_handle) = tracing_subscriber::reload::Layer::new(
(Box::new(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE),
)
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
.with_filter(tracing_subscriber::filter::Targets::new()),
);
actix_web::test::init_service(create_app( actix_web::test::init_service(create_app(
self.service.index_scheduler.clone().into(), self.service.index_scheduler.clone().into(),
self.service.auth.clone().into(), self.service.auth.clone().into(),
self.service.options.clone(), self.service.options.clone(),
(route_layer_handle, stderr_layer_handle),
analytics::MockAnalytics::new(&self.service.options), analytics::MockAnalytics::new(&self.service.options),
true, true,
)) ))

View File

@@ -5,10 +5,8 @@ use actix_web::http::StatusCode;
use actix_web::test; use actix_web::test;
use actix_web::test::TestRequest; use actix_web::test::TestRequest;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; use meilisearch::{analytics, create_app, Opt};
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use crate::common::encoder::Encoder; use crate::common::encoder::Encoder;
use crate::common::Value; use crate::common::Value;
@@ -107,24 +105,10 @@ impl Service {
} }
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let (_stderr_layer, stderr_layer_handle) = tracing_subscriber::reload::Layer::new(
(Box::new(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE),
)
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
.with_filter(tracing_subscriber::filter::Targets::new()),
);
let app = test::init_service(create_app( let app = test::init_service(create_app(
self.index_scheduler.clone().into(), self.index_scheduler.clone().into(),
self.auth.clone().into(), self.auth.clone().into(),
self.options.clone(), self.options.clone(),
(route_layer_handle, stderr_layer_handle),
analytics::MockAnalytics::new(&self.options), analytics::MockAnalytics::new(&self.options),
true, true,
)) ))

View File

@@ -1760,181 +1760,6 @@ async fn add_documents_invalid_geo_field() {
"finishedAt": "[date]" "finishedAt": "[date]"
} }
"###); "###);
// The three next tests are related to #4333
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "12",
"_geo": { "lng": null, "lat": 67}
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 14,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse longitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "12",
"_geo": { "lng": 35, "lat": null }
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 15,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "13",
"_geo": { "lng": null, "lat": null }
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 16,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude nor longitude in the document with the id: `13`. Was expecting finite numbers but instead got `null` and `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
// Related to #4333
#[actix_rt::test]
async fn add_invalid_geo_and_then_settings() {
let server = Server::new().await;
let index = server.index("test");
index.create(Some("id")).await;
// _geo is not an object
let documents = json!([
{
"id": "11",
"_geo": { "lat": null, "lng": null },
}
]);
let (ret, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let ret = index.wait_task(ret.uid()).await;
snapshot!(ret, @r###"
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (ret, code) = index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
snapshot!(code, @"202 Accepted");
let ret = index.wait_task(ret.uid()).await;
snapshot!(ret, @r###"
{
"uid": 2,
"indexUid": "test",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"sortableAttributes": [
"_geo"
]
},
"error": {
"message": "Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
} }
#[actix_rt::test] #[actix_rt::test]

View File

@@ -59,7 +59,7 @@ async fn import_dump_v1_movie_raw() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -77,7 +77,8 @@ async fn import_dump_v1_movie_raw() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -220,7 +221,7 @@ async fn import_dump_v1_movie_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -238,7 +239,8 @@ async fn import_dump_v1_movie_with_settings() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -367,7 +369,7 @@ async fn import_dump_v1_rubygems_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -385,7 +387,8 @@ async fn import_dump_v1_rubygems_with_settings() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -500,7 +503,7 @@ async fn import_dump_v2_movie_raw() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -518,7 +521,8 @@ async fn import_dump_v2_movie_raw() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -645,7 +649,7 @@ async fn import_dump_v2_movie_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -663,7 +667,8 @@ async fn import_dump_v2_movie_with_settings() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -789,7 +794,7 @@ async fn import_dump_v2_rubygems_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -807,7 +812,8 @@ async fn import_dump_v2_rubygems_with_settings() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -922,7 +928,7 @@ async fn import_dump_v3_movie_raw() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -940,7 +946,8 @@ async fn import_dump_v3_movie_raw() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -1067,7 +1074,7 @@ async fn import_dump_v3_movie_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1085,7 +1092,8 @@ async fn import_dump_v3_movie_with_settings() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -1211,7 +1219,7 @@ async fn import_dump_v3_rubygems_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1229,7 +1237,8 @@ async fn import_dump_v3_rubygems_with_settings() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -1344,7 +1353,7 @@ async fn import_dump_v4_movie_raw() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1362,7 +1371,8 @@ async fn import_dump_v4_movie_raw() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -1489,7 +1499,7 @@ async fn import_dump_v4_movie_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1507,7 +1517,8 @@ async fn import_dump_v4_movie_with_settings() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -1633,7 +1644,7 @@ async fn import_dump_v4_rubygems_with_settings() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "byWord", "proximityPrecision": null,
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {
@@ -1651,7 +1662,8 @@ async fn import_dump_v4_rubygems_with_settings() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"### "###
); );
@@ -1845,9 +1857,9 @@ async fn import_dump_v6_containing_experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"scoreDetails": false,
"vectorStore": false, "vectorStore": false,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@@ -1895,7 +1907,8 @@ async fn import_dump_v6_containing_experimental_features() {
}, },
"pagination": { "pagination": {
"maxTotalHits": 1000 "maxTotalHits": 1000
} },
"embedders": {}
} }
"###); "###);

View File

@@ -18,9 +18,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"scoreDetails": false,
"vectorStore": false, "vectorStore": false,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@@ -30,9 +30,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"scoreDetails": false,
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@@ -42,9 +42,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"scoreDetails": false,
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@@ -55,9 +55,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"scoreDetails": false,
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@@ -68,9 +68,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"scoreDetails": false,
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@@ -88,9 +88,9 @@ async fn experimental_feature_metrics() {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"scoreDetails": false,
"vectorStore": false, "vectorStore": false,
"metrics": true, "metrics": true,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@@ -146,7 +146,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`", "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
"code": "bad_request", "code": "bad_request",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request" "link": "https://docs.meilisearch.com/errors#bad_request"

View File

@@ -5,7 +5,6 @@ mod documents;
mod dumps; mod dumps;
mod features; mod features;
mod index; mod index;
mod logs;
mod search; mod search;
mod settings; mod settings;
mod snapshot; mod snapshot;

View File

@@ -1,193 +0,0 @@
use meili_snap::*;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn logs_stream_bad_target() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Our help message
let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// An error from the target parser
let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_mode() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong value
let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `tamo` at `.mode`: expected one of `human`, `json`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_profile_memory() {
let server = Server::new().await;
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Used with default parameters
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Human mode.",
"code": "invalid_settings_typo_tolerance",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance"
}
"###);
// Used with an unsupported mode
let (response, code) =
server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `fmt` at `.mode`: expected one of `human`, `json`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_without_enabling_the_route() {
let server = Server::new().await;
let (response, code) = server.service.post("/logs/stream", json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Modifying logs through the `/logs/*` routes requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.service.delete("/logs/stream").await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Modifying logs through the `/logs/*` routes requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.service.post("/logs/stderr", json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Modifying logs through the `/logs/*` routes requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View File

@@ -1,99 +0,0 @@
mod error;
use std::rc::Rc;
use std::str::FromStr;
use actix_web::http::header::ContentType;
use meili_snap::snapshot;
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Layer;
use crate::common::{default_settings, Server};
use crate::json;
#[actix_web::test]
async fn basic_test_log_stream_route() {
let db_path = tempfile::tempdir().unwrap();
let server = Server::new_with_options(Opt {
experimental_enable_logs_route: true,
..default_settings(db_path.path())
})
.await
.unwrap();
let (route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let (_stderr_layer, stderr_layer_handle) = tracing_subscriber::reload::Layer::new(
(Box::new(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE),
) as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
.with_filter(tracing_subscriber::filter::Targets::new()),
);
let subscriber = tracing_subscriber::registry().with(route_layer).with(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("OFF").unwrap()),
);
let app = actix_web::test::init_service(create_app(
server.service.index_scheduler.clone().into(),
server.service.auth.clone().into(),
server.service.options.clone(),
(route_layer_handle, stderr_layer_handle),
analytics::MockAnalytics::new(&server.service.options),
true,
))
.await;
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
let app = Rc::new(app);
// First, we start listening on the `/logs/stream` route
let handle_app = app.clone();
let handle = tokio::task::spawn_local(async move {
let req = actix_web::test::TestRequest::post()
.uri("/logs/stream")
.insert_header(ContentType::json())
.set_payload(
serde_json::to_vec(&json!({
"mode": "human",
"target": "info",
}))
.unwrap(),
);
let req = req.to_request();
let ret = actix_web::test::call_service(&*handle_app, req).await;
actix_web::test::read_body(ret).await
});
// We're going to create an index to get at least one info log saying we processed a batch of task
let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await;
snapshot!(ret, @r###"
{
"taskUid": 0,
"indexUid": "tamo",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
server.wait_task(ret.uid()).await;
let req = actix_web::test::TestRequest::delete().uri("/logs/stream");
let req = req.to_request();
let ret = actix_web::test::call_service(&*app, req).await;
let code = ret.status();
snapshot!(code, @"204 No Content");
let logs = handle.await.unwrap();
let logs = String::from_utf8(logs.to_vec()).unwrap();
assert!(logs.contains("INFO"), "{logs}");
}

View File

@@ -105,24 +105,6 @@ async fn more_advanced_facet_search() {
snapshot!(response["facetHits"].as_array().unwrap().len(), @"1"); snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
} }
#[actix_rt::test]
async fn simple_facet_search_with_max_values() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await;
index.update_settings_filterable_attributes(json!(["genres"])).await;
index.add_documents(documents, None).await;
index.wait_task(2).await;
let (response, code) =
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
assert_eq!(code, 200, "{}", response);
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
}
#[actix_rt::test] #[actix_rt::test]
async fn non_filterable_facet_search_error() { async fn non_filterable_facet_search_error() {
let server = Server::new().await; let server = Server::new().await;

View File

@@ -13,17 +13,17 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"scoreDetails": false,
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
let (response, code) = index let (response, code) = index
.update_settings(json!({ "embedders": {"default": { .update_settings(
"source": "userProvided", json!({ "embedders": {"default": {"source": {"userProvided": {"dimensions": 2}}}} }),
"dimensions": 2}}} )) )
.await; .await;
assert_eq!(202, code, "{:?}", response); assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await; index.wait_task(response.uid()).await;
@@ -56,15 +56,6 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
}]) }])
}); });
static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
json!([{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
"_vectors": {"default": [1.0, 3.0]},
}])
});
#[actix_rt::test] #[actix_rt::test]
async fn simple_search() { async fn simple_search() {
let server = Server::new().await; let server = Server::new().await;
@@ -87,52 +78,6 @@ async fn simple_search() {
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###);
} }
#[actix_rt::test]
async fn highlighter() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.2},
"attributesToHighlight": [
"desc"
],
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**"
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.8},
"attributesToHighlight": [
"desc"
],
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**"
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_semanticScore":0.9472136}]"###);
// no highlighting on full semantic
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 1.0},
"attributesToHighlight": [
"desc"
],
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**"
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}}]"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn invalid_semantic_ratio() { async fn invalid_semantic_ratio() {
let server = Server::new().await; let server = Server::new().await;
@@ -204,18 +149,3 @@ async fn invalid_semantic_ratio() {
} }
"###); "###);
} }
#[actix_rt::test]
async fn single_document() {
let server = Server::new().await;
let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
let (response, code) = index
.search_post(
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
}

View File

@@ -766,14 +766,38 @@ async fn faceting_max_values_per_facet() {
} }
#[actix_rt::test] #[actix_rt::test]
async fn test_score_details() { async fn experimental_feature_score_details() {
let server = Server::new().await; let server = Server::new().await;
let index = server.index("test"); let index = server.index("test");
let documents = DOCUMENTS.clone(); let documents = DOCUMENTS.clone();
let res = index.add_documents(json!(documents), None).await; index.add_documents(json!(documents), None).await;
index.wait_task(res.0.uid()).await; index.wait_task(0).await;
index
.search(
json!({
"q": "train dragon",
"showRankingScoreDetails": true,
}),
|response, code| {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Computing score details requires enabling the `score details` experimental feature. See https://github.com/meilisearch/product/discussions/674",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
},
)
.await;
let (response, code) = server.set_features(json!({"scoreDetails": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response["scoreDetails"], @"true");
index index
.search( .search(
@@ -866,21 +890,13 @@ async fn experimental_feature_vector_store() {
let (response, code) = index let (response, code) = index
.update_settings(json!({"embedders": { .update_settings(json!({"embedders": {
"manual": { "manual": {
"source": "userProvided", "source": {
"dimensions": 3, "userProvided": {"dimensions": 3}
}
} }
}})) }}))
.await; .await;
meili_snap::snapshot!(response, @r###"
{
"taskUid": 1,
"indexUid": "test",
"status": "enqueued",
"type": "settingsUpdate",
"enqueuedAt": "[date]"
}
"###);
meili_snap::snapshot!(code, @"202 Accepted"); meili_snap::snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await; let response = index.wait_task(response.uid()).await;

View File

@@ -54,7 +54,7 @@ async fn get_settings() {
let (response, code) = index.settings().await; let (response, code) = index.settings().await;
assert_eq!(code, 200); assert_eq!(code, 200);
let settings = response.as_object().unwrap(); let settings = response.as_object().unwrap();
assert_eq!(settings.keys().len(), 15); assert_eq!(settings.keys().len(), 16);
assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["displayedAttributes"], json!(["*"]));
assert_eq!(settings["searchableAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"]));
assert_eq!(settings["filterableAttributes"], json!([])); assert_eq!(settings["filterableAttributes"], json!([]));
@@ -83,7 +83,7 @@ async fn get_settings() {
"maxTotalHits": 1000, "maxTotalHits": 1000,
}) })
); );
assert_eq!(settings["proximityPrecision"], json!("byWord")); assert_eq!(settings["embedders"], json!({}));
} }
#[actix_rt::test] #[actix_rt::test]

View File

@@ -1,5 +1,4 @@
mod errors; mod errors;
mod webhook;
use meili_snap::insta::assert_json_snapshot; use meili_snap::insta::assert_json_snapshot;
use time::format_description::well_known::Rfc3339; use time::format_description::well_known::Rfc3339;

View File

@@ -1,119 +0,0 @@
//! To test the webhook, we need to spawn a new server with a URL listening for
//! post requests. The webhook handle starts a server and forwards all the
//! received requests into a channel for you to handle.
use std::sync::Arc;
use actix_http::body::MessageBody;
use actix_web::dev::{ServiceFactory, ServiceResponse};
use actix_web::web::{Bytes, Data};
use actix_web::{post, App, HttpResponse, HttpServer};
use meili_snap::{json_string, snapshot};
use meilisearch::Opt;
use tokio::sync::mpsc;
use url::Url;
use crate::common::{default_settings, Server};
use crate::json;
#[post("/")]
async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
let body = body.to_vec();
sender.send(body).unwrap();
HttpResponse::Ok().into()
}
fn create_app(
sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
) -> actix_web::App<
impl ServiceFactory<
actix_web::dev::ServiceRequest,
Config = (),
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
InitError = (),
>,
> {
App::new().service(forward_body).app_data(Data::from(sender))
}
struct WebhookHandle {
pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
pub url: String,
pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
}
async fn create_webhook_server() -> WebhookHandle {
let (sender, receiver) = mpsc::unbounded_channel();
let sender = Arc::new(sender);
// By listening on the port 0, the system will give us any available port.
let server =
HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
let (ip, scheme) = server.addrs_with_scheme()[0];
let url = format!("{scheme}://{ip}/");
let server_handle = tokio::spawn(server.run());
WebhookHandle { server_handle, url, receiver }
}
#[actix_web::test]
async fn test_basic_webhook() {
let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
let db_path = tempfile::tempdir().unwrap();
let server = Server::new_with_options(Opt {
task_webhook_url: Some(Url::parse(&url).unwrap()),
..default_settings(db_path.path())
})
.await
.unwrap();
let index = server.index("tamo");
// May be flaky: we're relying on the fact that while the first document addition is processed, the other
// operations will be received and will be batched together. If it doesn't happen it's not a problem
// the rest of the test won't assume anything about the number of tasks per batch.
for i in 0..5 {
let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
}
let mut nb_tasks = 0;
while let Some(payload) = receiver.recv().await {
let payload = String::from_utf8(payload).unwrap();
let jsonl = payload.split('\n');
for json in jsonl {
if json.is_empty() {
break; // we reached EOF
}
nb_tasks += 1;
let json: serde_json::Value = serde_json::from_str(json).unwrap();
snapshot!(
json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": "[uid]",
"indexUid": "tamo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
if nb_tasks == 5 {
break;
}
}
assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
server_handle.abort();
}

View File

@@ -9,11 +9,11 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.79" anyhow = "1.0.75"
clap = { version = "4.4.17", features = ["derive"] } clap = { version = "4.2.1", features = ["derive"] }
dump = { path = "../dump" } dump = { path = "../dump" }
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
time = { version = "0.3.31", features = ["formatting"] } time = { version = "0.3.30", features = ["formatting"] }
uuid = { version = "1.6.1", features = ["v4"], default-features = false } uuid = { version = "1.5.0", features = ["v4"], default-features = false }

View File

@@ -14,14 +14,14 @@ license.workspace = true
[dependencies] [dependencies]
bimap = { version = "0.6.3", features = ["serde"] } bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3" bincode = "1.3.3"
bstr = "1.9.0" bstr = "1.4.0"
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] } bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.4.3"
charabia = { version = "0.8.7", default-features = false } charabia = { version = "0.8.5", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
crossbeam-channel = "0.5.11" crossbeam-channel = "0.5.8"
deserr = "0.6.1" deserr = "0.6.0"
either = { version = "1.9.0", features = ["serde"] } either = { version = "1.8.1", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" } flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7" fst = "0.4.7"
fxhash = "0.2.1" fxhash = "0.2.1"
@@ -35,32 +35,32 @@ heed = { version = "0.20.0-alpha.9", default-features = false, features = [
"serde-bincode", "serde-bincode",
"read-txn-no-tls", "read-txn-no-tls",
] } ] }
indexmap = { version = "2.1.0", features = ["serde"] } indexmap = { version = "2.0.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" } json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memmap2 = "0.7.1" memmap2 = "0.7.1"
obkv = "0.2.1" obkv = "0.2.0"
once_cell = "1.19.0" once_cell = "1.17.1"
ordered-float = "4.2.0" ordered-float = "3.6.0"
rand_pcg = { version = "0.3.1", features = ["serde1"] } rand_pcg = { version = "0.3.1", features = ["serde1"] }
rayon = "1.8.0" rayon = "1.7.0"
roaring = "0.10.2" roaring = "0.10.1"
rstar = { version = "0.11.0", features = ["serde"] } rstar = { version = "0.11.0", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }
slice-group-by = "0.3.1" slice-group-by = "0.3.0"
smallstr = { version = "0.3.0", features = ["serde"] } smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = "1.12.0" smallvec = "1.10.0"
smartstring = "1.0.1" smartstring = "1.0.1"
tempfile = "3.9.0" tempfile = "3.5.0"
thiserror = "1.0.56" thiserror = "1.0.40"
time = { version = "0.3.31", features = [ time = { version = "0.3.20", features = [
"serde-well-known", "serde-well-known",
"formatting", "formatting",
"parsing", "parsing",
"macros", "macros",
] } ] }
uuid = { version = "1.6.1", features = ["v4"] } uuid = { version = "1.3.1", features = ["v4"] }
filter-parser = { path = "../filter-parser" } filter-parser = { path = "../filter-parser" }
@@ -71,8 +71,9 @@ itertools = "0.11.0"
puffin = "0.16.0" puffin = "0.16.0"
# logging # logging
log = "0.4.17"
logging_timer = "1.1.0" logging_timer = "1.1.0"
csv = "1.3.0" csv = "1.2.1"
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
@@ -80,22 +81,21 @@ tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
"online", "online",
] } ] }
tokio = { version = "1.35.1", features = ["rt"] } tokio = { version = "1.34.0", features = ["rt"] }
futures = "0.3.30" futures = "0.3.29"
reqwest = { version = "0.11.23", features = [ reqwest = { version = "0.11.16", features = [
"rustls-tls", "rustls-tls",
"json", "json",
], default-features = false } ], default-features = false }
tiktoken-rs = "0.5.8" tiktoken-rs = "0.5.7"
liquid = "0.26.4" liquid = "0.26.4"
arroy = "0.2.0" arroy = { git = "https://github.com/meilisearch/arroy.git", version = "0.1.0" }
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.40"
[dev-dependencies] [dev-dependencies]
mimalloc = { version = "0.1.39", default-features = false } mimalloc = { version = "0.1.37", default-features = false }
big_s = "1.0.2" big_s = "1.0.2"
insta = "1.34.0" insta = "1.29.0"
maplit = "1.0.2" maplit = "1.0.2"
md5 = "0.7.0" md5 = "0.7.0"
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
@@ -110,7 +110,6 @@ all-tokenizations = [
"charabia/korean", "charabia/korean",
"charabia/greek", "charabia/greek",
"charabia/khmer", "charabia/khmer",
"charabia/vietnamese",
] ]
# Use POSIX semaphores instead of SysV semaphores in LMDB # Use POSIX semaphores instead of SysV semaphores in LMDB
@@ -138,8 +137,3 @@ greek = ["charabia/greek"]
# allow khmer specialized tokenization # allow khmer specialized tokenization
khmer = ["charabia/khmer"] khmer = ["charabia/khmer"]
vietnamese = ["charabia/vietnamese"]
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
cuda = ["candle-core/cuda"]

View File

@@ -25,7 +25,6 @@ impl<R: io::Read + io::Seek> DocumentsBatchReader<R> {
/// ///
/// It first retrieves the index, then moves to the first document. Use the `into_cursor` /// It first retrieves the index, then moves to the first document. Use the `into_cursor`
/// method to iterator over the documents, from the first to the last. /// method to iterator over the documents, from the first to the last.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn from_reader(reader: R) -> Result<Self, Error> { pub fn from_reader(reader: R) -> Result<Self, Error> {
let reader = grenad::Reader::new(reader)?; let reader = grenad::Reader::new(reader)?;
let mut cursor = reader.into_cursor()?; let mut cursor = reader.into_cursor()?;

View File

@@ -172,7 +172,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
TooManyDocumentIds { primary_key: String, document: Object }, TooManyDocumentIds { primary_key: String, document: Object },
#[error("The primary key inference failed as the engine did not find any field ending with `id` in its name. Please specify the primary key manually using the `primaryKey` query parameter.")] #[error("The primary key inference failed as the engine did not find any field ending with `id` in its name. Please specify the primary key manually using the `primaryKey` query parameter.")]
NoPrimaryKeyCandidateFound, NoPrimaryKeyCandidateFound,
#[error("The primary key inference failed as the engine found {} fields ending with `id` in their names: '{}' and '{}'. Please specify the primary key manually using the `primaryKey` query parameter.", .candidates.len(), .candidates.first().unwrap(), .candidates.get(1).unwrap())] #[error("The primary key inference failed as the engine found {} fields ending with `id` in their names: '{}' and '{}'. Please specify the primary key manually using the `primaryKey` query parameter.", .candidates.len(), .candidates.get(0).unwrap(), .candidates.get(1).unwrap())]
MultiplePrimaryKeyCandidatesFound { candidates: Vec<String> }, MultiplePrimaryKeyCandidatesFound { candidates: Vec<String> },
#[error("There is no more space left on the device. Consider increasing the size of the disk/partition.")] #[error("There is no more space left on the device. Consider increasing the size of the disk/partition.")]
NoSpaceLeftOnDevice, NoSpaceLeftOnDevice,
@@ -192,7 +192,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
MissingDocumentField(#[from] crate::prompt::error::RenderPromptError), MissingDocumentField(#[from] crate::prompt::error::RenderPromptError),
#[error(transparent)] #[error(transparent)]
InvalidPrompt(#[from] crate::prompt::error::NewPromptError), InvalidPrompt(#[from] crate::prompt::error::NewPromptError),
#[error("`.embedders.{0}.documentTemplate`: Invalid template: {1}.")] #[error("Invalid prompt in for embeddings with name '{0}': {1}.")]
InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError), InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError),
#[error("Too many embedders in the configuration. Found {0}, but limited to 256.")] #[error("Too many embedders in the configuration. Found {0}, but limited to 256.")]
TooManyEmbedders(usize), TooManyEmbedders(usize),
@@ -200,49 +200,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
InvalidEmbedder(String), InvalidEmbedder(String),
#[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")] #[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")]
TooManyVectors(String, usize), TooManyVectors(String, usize),
#[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}` (only available for sources: {}). Available fields: {}",
allowed_sources_for_field
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", "),
allowed_fields_for_source
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", ")
)]
InvalidFieldForSource {
embedder_name: String,
source_: crate::vector::settings::EmbedderSource,
field: &'static str,
allowed_fields_for_source: &'static [&'static str],
allowed_sources_for_field: &'static [crate::vector::settings::EmbedderSource],
},
#[error("`.embedders.{embedder_name}.model`: Invalid model `{model}` for OpenAI. Supported models: {:?}", crate::vector::openai::EmbeddingModel::supported_models())]
InvalidOpenAiModel { embedder_name: String, model: String },
#[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source {source_})")]
MissingFieldForSource {
field: &'static str,
source_: crate::vector::settings::EmbedderSource,
embedder_name: String,
},
#[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its native dimensions of {expected_dimensions}. Found {dimensions}")]
InvalidOpenAiModelDimensions {
embedder_name: String,
model: &'static str,
dimensions: usize,
expected_dimensions: usize,
},
#[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its dimensions to a value higher than {max_dimensions}. Found {dimensions}")]
InvalidOpenAiModelDimensionsMax {
embedder_name: String,
model: &'static str,
dimensions: usize,
max_dimensions: usize,
},
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
InvalidSettingsDimensions { embedder_name: String },
} }
impl From<crate::vector::Error> for Error { impl From<crate::vector::Error> for Error {
@@ -267,7 +224,6 @@ impl From<arroy::Error> for Error {
arroy::Error::DatabaseFull arroy::Error::DatabaseFull
| arroy::Error::InvalidItemAppend | arroy::Error::InvalidItemAppend
| arroy::Error::UnmatchingDistance { .. } | arroy::Error::UnmatchingDistance { .. }
| arroy::Error::MissingNode
| arroy::Error::MissingMetadata => { | arroy::Error::MissingMetadata => {
Error::InternalError(InternalError::ArroyError(value)) Error::InternalError(InternalError::ArroyError(value))
} }

View File

@@ -181,7 +181,7 @@ mod tests {
get_simple_string_index_with_multiple_field_ids(), get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(), get_random_looking_string_index_with_multiple_field_ids(),
]; ];
for index in indexes { for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap(); let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new(); let candidates = RoaringBitmap::new();
let mut results = String::new(); let mut results = String::new();
@@ -212,7 +212,7 @@ mod tests {
get_simple_string_index_with_multiple_field_ids(), get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(), get_random_looking_string_index_with_multiple_field_ids(),
]; ];
for index in indexes { for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap(); let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new(); let candidates = RoaringBitmap::new();
let mut results = String::new(); let mut results = String::new();

View File

@@ -195,7 +195,7 @@ mod tests {
get_simple_string_index_with_multiple_field_ids(), get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(), get_random_looking_string_index_with_multiple_field_ids(),
]; ];
for index in indexes { for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap(); let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new(); let candidates = RoaringBitmap::new();
let mut results = String::new(); let mut results = String::new();
@@ -226,7 +226,7 @@ mod tests {
get_simple_string_index_with_multiple_field_ids(), get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(), get_random_looking_string_index_with_multiple_field_ids(),
]; ];
for index in indexes { for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap(); let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new(); let candidates = RoaringBitmap::new();
let mut results = String::new(); let mut results = String::new();

View File

@@ -142,7 +142,7 @@ pub(crate) mod tests {
let mut txn = index.env.write_txn().unwrap(); let mut txn = index.env.write_txn().unwrap();
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
for key in std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128) { for (_i, key) in std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).enumerate() {
let mut bitmap = RoaringBitmap::new(); let mut bitmap = RoaringBitmap::new();
bitmap.insert(key); bitmap.insert(key);
bitmap.insert(key + 100); bitmap.insert(key + 100);
@@ -172,7 +172,7 @@ pub(crate) mod tests {
let keys = let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>(); std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for fid in 0..2 { for fid in 0..2 {
for &key in &keys { for (_i, &key) in keys.iter().enumerate() {
let mut bitmap = RoaringBitmap::new(); let mut bitmap = RoaringBitmap::new();
bitmap.insert(key); bitmap.insert(key);
bitmap.insert(key + 100); bitmap.insert(key + 100);
@@ -207,7 +207,7 @@ pub(crate) mod tests {
let keys = let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>(); std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for fid in 0..2 { for fid in 0..2 {
for &key in &keys { for (_i, &key) in keys.iter().enumerate() {
let mut bitmap = RoaringBitmap::new(); let mut bitmap = RoaringBitmap::new();
bitmap.insert(key); bitmap.insert(key);
bitmap.insert(key + 100); bitmap.insert(key + 100);

View File

@@ -102,7 +102,7 @@ impl ScoreWithRatioResult {
} }
SearchResult { SearchResult {
matching_words: right.matching_words, matching_words: left.matching_words,
candidates: left.candidates | right.candidates, candidates: left.candidates | right.candidates,
documents_ids, documents_ids,
document_scores, document_scores,

View File

@@ -6,9 +6,9 @@ use charabia::Normalize;
use fst::automaton::{Automaton, Str}; use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::error;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap; use roaring::bitmap::RoaringBitmap;
use tracing::error;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
@@ -27,8 +27,8 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true)); static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true)); static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
/// The maximum number of values per facet returned by the facet search route. /// The maximum number of facets returned by the facet search route.
const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100; const MAX_NUMBER_OF_FACETS: usize = 100;
pub mod facet; pub mod facet;
mod fst_utils; mod fst_utils;
@@ -306,7 +306,6 @@ pub struct SearchForFacetValues<'a> {
query: Option<String>, query: Option<String>,
facet: String, facet: String,
search_query: Search<'a>, search_query: Search<'a>,
max_values: usize,
is_hybrid: bool, is_hybrid: bool,
} }
@@ -316,13 +315,7 @@ impl<'a> SearchForFacetValues<'a> {
search_query: Search<'a>, search_query: Search<'a>,
is_hybrid: bool, is_hybrid: bool,
) -> SearchForFacetValues<'a> { ) -> SearchForFacetValues<'a> {
SearchForFacetValues { SearchForFacetValues { query: None, facet, search_query, is_hybrid }
query: None,
facet,
search_query,
max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
is_hybrid,
}
} }
pub fn query(&mut self, query: impl Into<String>) -> &mut Self { pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
@@ -330,11 +323,6 @@ impl<'a> SearchForFacetValues<'a> {
self self
} }
pub fn max_values(&mut self, max: usize) -> &mut Self {
self.max_values = max;
self
}
fn one_original_value_of( fn one_original_value_of(
&self, &self,
field_id: FieldId, field_id: FieldId,
@@ -474,7 +462,7 @@ impl<'a> SearchForFacetValues<'a> {
.unwrap_or_else(|| left_bound.to_string()); .unwrap_or_else(|| left_bound.to_string());
results.push(FacetValueHit { value, count }); results.push(FacetValueHit { value, count });
} }
if results.len() >= self.max_values { if results.len() >= MAX_NUMBER_OF_FACETS {
break; break;
} }
} }
@@ -519,7 +507,7 @@ impl<'a> SearchForFacetValues<'a> {
.unwrap_or_else(|| query.to_string()); .unwrap_or_else(|| query.to_string());
results.push(FacetValueHit { value, count }); results.push(FacetValueHit { value, count });
} }
if results.len() >= self.max_values { if results.len() >= MAX_NUMBER_OF_FACETS {
return Ok(ControlFlow::Break(())); return Ok(ControlFlow::Break(()));
} }
} }

View File

@@ -15,7 +15,6 @@ pub struct BucketSortOutput {
// TODO: would probably be good to regroup some of these inside of a struct? // TODO: would probably be good to regroup some of these inside of a struct?
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[logging_timer::time]
pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx: &mut SearchContext<'ctx>, ctx: &mut SearchContext<'ctx>,
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>, mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
@@ -166,9 +165,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
continue; continue;
} }
let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
let entered = span.enter();
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket( let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
ctx, ctx,
logger, logger,
@@ -178,7 +174,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
back!(); back!();
continue; continue;
}; };
drop(entered);
ranking_rule_scores.push(next_bucket.score); ranking_rule_scores.push(next_bucket.score);

View File

@@ -72,7 +72,7 @@ impl<'m> MatcherBuilder<'m> {
} }
} }
#[derive(Copy, Clone, Default, Debug)] #[derive(Copy, Clone, Default)]
pub struct FormatOptions { pub struct FormatOptions {
pub highlight: bool, pub highlight: bool,
pub crop: Option<usize>, pub crop: Option<usize>,
@@ -82,10 +82,6 @@ impl FormatOptions {
pub fn merge(self, other: Self) -> Self { pub fn merge(self, other: Self) -> Self {
Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) } Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) }
} }
pub fn should_format(&self) -> bool {
self.highlight || self.crop.is_some()
}
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]

View File

@@ -191,7 +191,6 @@ fn resolve_maximally_reduced_query_graph(
Ok(docids) Ok(docids)
} }
#[logging_timer::time]
fn resolve_universe( fn resolve_universe(
ctx: &mut SearchContext, ctx: &mut SearchContext,
initial_universe: &RoaringBitmap, initial_universe: &RoaringBitmap,
@@ -557,7 +556,6 @@ pub fn execute_vector_search(
} }
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[logging_timer::time]
pub fn execute_search( pub fn execute_search(
ctx: &mut SearchContext, ctx: &mut SearchContext,
query: Option<&str>, query: Option<&str>,

View File

@@ -5,7 +5,6 @@ use super::*;
use crate::{Result, SearchContext, MAX_WORD_LENGTH}; use crate::{Result, SearchContext, MAX_WORD_LENGTH};
/// Convert the tokenised search query into a list of located query terms. /// Convert the tokenised search query into a list of located query terms.
#[logging_timer::time]
pub fn located_query_terms_from_tokens( pub fn located_query_terms_from_tokens(
ctx: &mut SearchContext, ctx: &mut SearchContext,
query: NormalizedTokenIter, query: NormalizedTokenIter,

View File

@@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best s"); s.query("best s");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 9, 6, 7, 8, 11, 12, 13, 15]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
insta::assert_snapshot!(format!("{document_scores:#?}")); insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@@ -379,13 +379,13 @@ fn test_proximity_prefix_db() {
insta::assert_debug_snapshot!(texts, @r###" insta::assert_debug_snapshot!(texts, @r###"
[ [
"\"this is the best summer meal\"", "\"this is the best summer meal\"",
"\"summer best\"",
"\"this is the best meal of summer\"", "\"this is the best meal of summer\"",
"\"summer x best\"",
"\"this is the best meal I have ever had in such a beautiful summer day\"", "\"this is the best meal I have ever had in such a beautiful summer day\"",
"\"this is the best cooked meal of the summer\"", "\"this is the best cooked meal of the summer\"",
"\"this is the best meal of the summer\"", "\"this is the best meal of the summer\"",
"\"summer x y best\"", "\"summer x y best\"",
"\"summer x best\"",
"\"summer best\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"", "\"this is the best meal I have ever had in such a beautiful winter day\"",
] ]
"###); "###);
@@ -423,20 +423,20 @@ fn test_proximity_prefix_db() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best win"); s.query("best win");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
insta::assert_snapshot!(format!("{document_scores:#?}")); insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###" insta::assert_debug_snapshot!(texts, @r###"
[ [
"\"this is the best winter meal\"", "\"this is the best winter meal\"",
"\"winter best\"",
"\"this is the best meal of winter\"", "\"this is the best meal of winter\"",
"\"winter x best\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"", "\"this is the best meal I have ever had in such a beautiful winter day\"",
"\"this is the best cooked meal of the winter\"", "\"this is the best cooked meal of the winter\"",
"\"this is the best meal of the winter\"", "\"this is the best meal of the winter\"",
"\"winter x y best\"", "\"winter x y best\"",
"\"winter x best\"",
"\"winter best\"",
] ]
"###); "###);
@@ -471,20 +471,20 @@ fn test_proximity_prefix_db() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wi"); s.query("best wi");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]"); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
insta::assert_snapshot!(format!("{document_scores:#?}")); insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids); let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###" insta::assert_debug_snapshot!(texts, @r###"
[ [
"\"this is the best winter meal\"", "\"this is the best winter meal\"",
"\"winter best\"",
"\"this is the best meal of winter\"", "\"this is the best meal of winter\"",
"\"winter x best\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"", "\"this is the best meal I have ever had in such a beautiful winter day\"",
"\"this is the best cooked meal of the winter\"", "\"this is the best cooked meal of the winter\"",
"\"this is the best meal of the winter\"", "\"this is the best meal of the winter\"",
"\"winter x y best\"", "\"winter x y best\"",
"\"winter x best\"",
"\"winter best\"",
] ]
"###); "###);
} }

View File

@@ -11,6 +11,22 @@ expression: "format!(\"{document_scores:#?}\")"
}, },
), ),
], ],
[
Proximity(
Rank {
rank: 3,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
},
),
],
[ [
Proximity( Proximity(
Rank { Rank {
@@ -51,20 +67,4 @@ expression: "format!(\"{document_scores:#?}\")"
}, },
), ),
], ],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
] ]

View File

@@ -11,6 +11,22 @@ expression: "format!(\"{document_scores:#?}\")"
}, },
), ),
], ],
[
Proximity(
Rank {
rank: 3,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
},
),
],
[ [
Proximity( Proximity(
Rank { Rank {
@@ -59,20 +75,4 @@ expression: "format!(\"{document_scores:#?}\")"
}, },
), ),
], ],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
] ]

View File

@@ -11,6 +11,22 @@ expression: "format!(\"{document_scores:#?}\")"
}, },
), ),
], ],
[
Proximity(
Rank {
rank: 3,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 2,
max_rank: 4,
},
),
],
[ [
Proximity( Proximity(
Rank { Rank {
@@ -51,20 +67,4 @@ expression: "format!(\"{document_scores:#?}\")"
}, },
), ),
], ],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),
],
] ]

View File

@@ -14,12 +14,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
ClearDocuments { wtxn, index } ClearDocuments { wtxn, index }
} }
#[tracing::instrument(
level = "trace",
skip(self),
target = "indexing::documents",
name = "clear_documents"
)]
pub fn execute(self) -> Result<u64> { pub fn execute(self) -> Result<u64> {
puffin::profile_function!(); puffin::profile_function!();

View File

@@ -1,7 +1,7 @@
use std::fs::File; use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use grenad::{CompressionType, Merger}; use grenad::CompressionType;
use heed::types::Bytes; use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn}; use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@@ -14,7 +14,6 @@ use crate::heed_codec::facet::{
use crate::heed_codec::BytesRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::update::del_add::{DelAdd, KvReaderDelAdd}; use crate::update::del_add::{DelAdd, KvReaderDelAdd};
use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader}; use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
use crate::update::MergeFn;
use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result}; use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};
/// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases /// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
@@ -29,7 +28,7 @@ pub struct FacetsUpdateBulk<'i> {
facet_type: FacetType, facet_type: FacetType,
field_ids: Vec<FieldId>, field_ids: Vec<FieldId>,
// None if level 0 does not need to be updated // None if level 0 does not need to be updated
delta_data: Option<Merger<BufReader<File>, MergeFn>>, delta_data: Option<grenad::Reader<BufReader<File>>>,
} }
impl<'i> FacetsUpdateBulk<'i> { impl<'i> FacetsUpdateBulk<'i> {
@@ -37,7 +36,7 @@ impl<'i> FacetsUpdateBulk<'i> {
index: &'i Index, index: &'i Index,
field_ids: Vec<FieldId>, field_ids: Vec<FieldId>,
facet_type: FacetType, facet_type: FacetType,
delta_data: Merger<BufReader<File>, MergeFn>, delta_data: grenad::Reader<BufReader<File>>,
group_size: u8, group_size: u8,
min_level_size: u8, min_level_size: u8,
) -> FacetsUpdateBulk<'i> { ) -> FacetsUpdateBulk<'i> {
@@ -66,7 +65,7 @@ impl<'i> FacetsUpdateBulk<'i> {
} }
} }
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::bulk")] #[logging_timer::time("FacetsUpdateBulk::{}")]
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> { pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self; let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self;
@@ -90,7 +89,7 @@ impl<'i> FacetsUpdateBulk<'i> {
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> { pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub delta_data: Option<Merger<R, MergeFn>>, pub delta_data: Option<grenad::Reader<R>>,
pub group_size: u8, pub group_size: u8,
pub min_level_size: u8, pub min_level_size: u8,
} }
@@ -130,8 +129,8 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
if self.db.is_empty(wtxn)? { if self.db.is_empty(wtxn)? {
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let mut database = self.db.iter_mut(wtxn)?.remap_types::<Bytes, Bytes>(); let mut database = self.db.iter_mut(wtxn)?.remap_types::<Bytes, Bytes>();
let mut iter = delta_data.into_stream_merger_iter()?; let mut cursor = delta_data.into_cursor()?;
while let Some((key, value)) = iter.next()? { while let Some((key, value)) = cursor.move_on_next()? {
if !valid_lmdb_key(key) { if !valid_lmdb_key(key) {
continue; continue;
} }
@@ -155,8 +154,8 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let database = self.db.remap_types::<Bytes, Bytes>(); let database = self.db.remap_types::<Bytes, Bytes>();
let mut iter = delta_data.into_stream_merger_iter()?; let mut cursor = delta_data.into_cursor()?;
while let Some((key, value)) = iter.next()? { while let Some((key, value)) = cursor.move_on_next()? {
if !valid_lmdb_key(key) { if !valid_lmdb_key(key) {
continue; continue;
} }

View File

@@ -1,7 +1,6 @@
use std::fs::File; use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use grenad::Merger;
use heed::types::{Bytes, DecodeIgnore}; use heed::types::{Bytes, DecodeIgnore};
use heed::{BytesDecode, Error, RoTxn, RwTxn}; use heed::{BytesDecode, Error, RoTxn, RwTxn};
use obkv::KvReader; use obkv::KvReader;
@@ -15,7 +14,6 @@ use crate::heed_codec::BytesRefCodec;
use crate::search::facet::get_highest_level; use crate::search::facet::get_highest_level;
use crate::update::del_add::DelAdd; use crate::update::del_add::DelAdd;
use crate::update::index_documents::valid_lmdb_key; use crate::update::index_documents::valid_lmdb_key;
use crate::update::MergeFn;
use crate::{CboRoaringBitmapCodec, Index, Result}; use crate::{CboRoaringBitmapCodec, Index, Result};
enum InsertionResult { enum InsertionResult {
@@ -33,14 +31,14 @@ enum DeletionResult {
/// `facet_id_(string/f64)_docids` databases. /// `facet_id_(string/f64)_docids` databases.
pub struct FacetsUpdateIncremental { pub struct FacetsUpdateIncremental {
inner: FacetsUpdateIncrementalInner, inner: FacetsUpdateIncrementalInner,
delta_data: Merger<BufReader<File>, MergeFn>, delta_data: grenad::Reader<BufReader<File>>,
} }
impl FacetsUpdateIncremental { impl FacetsUpdateIncremental {
pub fn new( pub fn new(
index: &Index, index: &Index,
facet_type: FacetType, facet_type: FacetType,
delta_data: Merger<BufReader<File>, MergeFn>, delta_data: grenad::Reader<BufReader<File>>,
group_size: u8, group_size: u8,
min_level_size: u8, min_level_size: u8,
max_group_size: u8, max_group_size: u8,
@@ -63,18 +61,16 @@ impl FacetsUpdateIncremental {
} }
} }
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::incremental")]
pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> { pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
let mut iter = self.delta_data.into_stream_merger_iter()?; let mut cursor = self.delta_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
while let Some((key, value)) = iter.next()? {
if !valid_lmdb_key(key) { if !valid_lmdb_key(key) {
continue; continue;
} }
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key) let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key)
.map_err(heed::Error::Encoding)?; .map_err(heed::Error::Encoding)?;
let value = KvReader::new(value); let value = KvReader::new(value);
let docids_to_delete = value let docids_to_delete = value
.get(DelAdd::Deletion) .get(DelAdd::Deletion)
.map(CboRoaringBitmapCodec::bytes_decode) .map(CboRoaringBitmapCodec::bytes_decode)
@@ -755,7 +751,7 @@ mod tests {
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
keys.shuffle(&mut rng); keys.shuffle(&mut rng);
for key in keys { for (_i, key) in keys.into_iter().enumerate() {
let mut bitmap = RoaringBitmap::new(); let mut bitmap = RoaringBitmap::new();
bitmap.insert(key); bitmap.insert(key);
index.insert(&mut txn, 0, &(key as f64), &bitmap); index.insert(&mut txn, 0, &(key as f64), &bitmap);
@@ -774,7 +770,7 @@ mod tests {
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
keys.shuffle(&mut rng); keys.shuffle(&mut rng);
for key in keys { for (_i, key) in keys.into_iter().enumerate() {
let mut bitmap = RoaringBitmap::new(); let mut bitmap = RoaringBitmap::new();
bitmap.insert(key); bitmap.insert(key);
bitmap.insert(rng.gen_range(256..512)); bitmap.insert(rng.gen_range(256..512));

View File

@@ -79,20 +79,23 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::fs::File; use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use std::iter::FromIterator;
use grenad::Merger; use charabia::normalizer::{Normalize, NormalizerOption};
use heed::types::{Bytes, DecodeIgnore}; use grenad::{CompressionType, SortAlgorithm};
use heed::types::{Bytes, DecodeIgnore, SerdeJson};
use heed::BytesEncode;
use log::debug;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use self::incremental::FacetsUpdateIncremental; use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk; use super::FacetsUpdateBulk;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::BytesRefCodec; use crate::heed_codec::BytesRefCodec;
use crate::update::del_add::{DelAdd, KvReaderDelAdd}; use crate::update::index_documents::create_sorter;
use crate::update::MergeFn; use crate::update::merge_btreeset_string;
use crate::{try_split_array_at, FieldId, Index, Result}; use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH};
pub mod bulk; pub mod bulk;
pub mod incremental; pub mod incremental;
@@ -105,20 +108,16 @@ pub struct FacetsUpdate<'i> {
index: &'i Index, index: &'i Index,
database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
facet_type: FacetType, facet_type: FacetType,
delta_data: Merger<BufReader<File>, MergeFn>, delta_data: grenad::Reader<BufReader<File>>,
normalized_delta_data: Option<Merger<BufReader<File>, MergeFn>>,
group_size: u8, group_size: u8,
max_group_size: u8, max_group_size: u8,
min_level_size: u8, min_level_size: u8,
data_size: u64,
} }
impl<'i> FacetsUpdate<'i> { impl<'i> FacetsUpdate<'i> {
pub fn new( pub fn new(
index: &'i Index, index: &'i Index,
facet_type: FacetType, facet_type: FacetType,
delta_data: Merger<BufReader<File>, MergeFn>, delta_data: grenad::Reader<BufReader<File>>,
normalized_delta_data: Option<Merger<BufReader<File>, MergeFn>>,
data_size: u64,
) -> Self { ) -> Self {
let database = match facet_type { let database = match facet_type {
FacetType::String => { FacetType::String => {
@@ -136,20 +135,18 @@ impl<'i> FacetsUpdate<'i> {
min_level_size: FACET_MIN_LEVEL_SIZE, min_level_size: FACET_MIN_LEVEL_SIZE,
facet_type, facet_type,
delta_data, delta_data,
normalized_delta_data,
data_size,
} }
} }
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> { pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
if self.data_size == 0 { if self.delta_data.is_empty() {
return Ok(()); return Ok(());
} }
debug!("Computing and writing the facet values levels docids into LMDB on disk..."); debug!("Computing and writing the facet values levels docids into LMDB on disk...");
self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
// See self::comparison_bench::benchmark_facet_indexing // See self::comparison_bench::benchmark_facet_indexing
if self.data_size >= (self.database.len(wtxn)? / 50) { if self.delta_data.len() >= (self.database.len(wtxn)? / 50) {
let field_ids = let field_ids =
self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>(); self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
let bulk_update = FacetsUpdateBulk::new( let bulk_update = FacetsUpdateBulk::new(
@@ -173,108 +170,94 @@ impl<'i> FacetsUpdate<'i> {
incremental_update.execute(wtxn)?; incremental_update.execute(wtxn)?;
} }
match self.normalized_delta_data { // We clear the list of normalized-for-search facets
Some(data) => index_facet_search(wtxn, data, self.index), // and the previous FSTs to compute everything from scratch
None => Ok(()), self.index.facet_id_normalized_string_strings.clear(wtxn)?;
} self.index.facet_id_string_fst.clear(wtxn)?;
}
}
fn index_facet_search( // As we can't use the same write transaction to read and write in two different databases
wtxn: &mut heed::RwTxn, // we must create a temporary sorter that we will write into LMDB afterward.
normalized_delta_data: Merger<BufReader<File>, MergeFn>, // As multiple unnormalized facet values can become the same normalized facet value
index: &Index, // we must merge them together.
) -> Result<()> { let mut sorter = create_sorter(
let mut iter = normalized_delta_data.into_stream_merger_iter()?; SortAlgorithm::Unstable,
while let Some((key_bytes, delta_bytes)) = iter.next()? { merge_btreeset_string,
let deladd_reader = KvReaderDelAdd::new(delta_bytes); CompressionType::None,
None,
None,
None,
);
let database_set = index // We iterate on the list of original, semi-normalized, facet values
.facet_id_normalized_string_strings // and normalize them for search, inserting them in LMDB in any given order.
.remap_key_type::<Bytes>() let options = NormalizerOption { lossy: true, ..Default::default() };
.get(wtxn, key_bytes)? let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
.unwrap_or_default(); for result in database.iter(wtxn)? {
let (facet_group_key, ()) = result?;
let add_set = deladd_reader if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
.get(DelAdd::Addition) let mut normalized_facet = left_bound.normalize(&options);
.and_then(|bytes| serde_json::from_slice::<BTreeSet<String>>(bytes).ok()) let normalized_truncated_facet: String;
.unwrap_or_default(); if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
normalized_truncated_facet = normalized_facet
let del_set = match deladd_reader .char_indices()
.get(DelAdd::Deletion) .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
.and_then(|bytes| serde_json::from_slice::<BTreeSet<String>>(bytes).ok()) .map(|(_, c)| c)
{ .collect();
Some(del_set) => { normalized_facet = normalized_truncated_facet.into();
let (field_id_bytes, _) = try_split_array_at(key_bytes).unwrap();
let field_id = FieldId::from_be_bytes(field_id_bytes);
let mut set = BTreeSet::new();
for facet in del_set {
let key = FacetGroupKey { field_id, level: 0, left_bound: facet.as_str() };
// Check if the referenced value doesn't exist anymore before deleting it.
if index
.facet_id_string_docids
.remap_data_type::<DecodeIgnore>()
.get(wtxn, &key)?
.is_none()
{
set.insert(facet);
}
} }
set let set = BTreeSet::from_iter(std::iter::once(left_bound));
let key = (field_id, normalized_facet.as_ref());
let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
sorter.insert(key, val)?;
} }
None => BTreeSet::new(),
};
let set: BTreeSet<_> =
database_set.difference(&del_set).chain(add_set.iter()).cloned().collect();
if set.is_empty() {
index
.facet_id_normalized_string_strings
.remap_key_type::<Bytes>()
.delete(wtxn, key_bytes)?;
} else {
index
.facet_id_normalized_string_strings
.remap_key_type::<Bytes>()
.put(wtxn, key_bytes, &set)?;
} }
}
// We clear the FST of normalized-for-search to compute everything from scratch. // In this loop we don't need to take care of merging bitmaps
index.facet_id_string_fst.clear(wtxn)?; // as the grenad sorter already merged them for us.
// We compute one FST by string facet let mut merger_iter = sorter.into_stream_merger_iter()?;
let mut text_fsts = vec![]; while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None; self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
let database = index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>(); wtxn,
for result in database.iter(wtxn)? { key_bytes,
let ((field_id, normalized_facet), _) = result?; btreeset_bytes,
current_fst = match current_fst.take() { )?;
Some((fid, fst_builder)) if fid != field_id => { }
let fst = fst_builder.into_set();
text_fsts.push((fid, fst)); // We compute one FST by string facet
Some((field_id, fst::SetBuilder::memory())) let mut text_fsts = vec![];
let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
let database =
self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
for result in database.iter(wtxn)? {
let ((field_id, normalized_facet), _) = result?;
current_fst = match current_fst.take() {
Some((fid, fst_builder)) if fid != field_id => {
let fst = fst_builder.into_set();
text_fsts.push((fid, fst));
Some((field_id, fst::SetBuilder::memory()))
}
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
None => Some((field_id, fst::SetBuilder::memory())),
};
if let Some((_, fst_builder)) = current_fst.as_mut() {
fst_builder.insert(normalized_facet)?;
} }
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
None => Some((field_id, fst::SetBuilder::memory())),
};
if let Some((_, fst_builder)) = current_fst.as_mut() {
fst_builder.insert(normalized_facet)?;
} }
}
if let Some((field_id, fst_builder)) = current_fst { if let Some((field_id, fst_builder)) = current_fst {
let fst = fst_builder.into_set(); let fst = fst_builder.into_set();
text_fsts.push((field_id, fst)); text_fsts.push((field_id, fst));
} }
// We write those FSTs in LMDB now // We write those FSTs in LMDB now
for (field_id, fst) in text_fsts { for (field_id, fst) in text_fsts {
index.facet_id_string_fst.put(wtxn, &field_id, &fst)?; self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
} }
Ok(()) Ok(())
}
} }
#[cfg(test)] #[cfg(test)]
@@ -285,7 +268,6 @@ pub(crate) mod test_helpers {
use std::marker::PhantomData; use std::marker::PhantomData;
use std::rc::Rc; use std::rc::Rc;
use grenad::MergerBuilder;
use heed::types::Bytes; use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn}; use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@@ -298,8 +280,7 @@ pub(crate) mod test_helpers {
use crate::search::facet::get_highest_level; use crate::search::facet::get_highest_level;
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use crate::update::del_add::{DelAdd, KvWriterDelAdd}; use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::index_documents::merge_deladd_cbo_roaring_bitmaps; use crate::update::FacetsUpdateIncrementalInner;
use crate::update::{FacetsUpdateIncrementalInner, MergeFn};
use crate::CboRoaringBitmapCodec; use crate::CboRoaringBitmapCodec;
/// Utility function to generate a string whose position in a lexicographically /// Utility function to generate a string whose position in a lexicographically
@@ -482,13 +463,10 @@ pub(crate) mod test_helpers {
} }
writer.finish().unwrap(); writer.finish().unwrap();
let reader = grenad::Reader::new(std::io::Cursor::new(new_data)).unwrap(); let reader = grenad::Reader::new(std::io::Cursor::new(new_data)).unwrap();
let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
builder.push(reader.into_cursor().unwrap());
let merger = builder.build();
let update = FacetsUpdateBulkInner { let update = FacetsUpdateBulkInner {
db: self.content, db: self.content,
delta_data: Some(merger), delta_data: Some(reader),
group_size: self.group_size.get(), group_size: self.group_size.get(),
min_level_size: self.min_level_size.get(), min_level_size: self.min_level_size.get(),
}; };

View File

@@ -22,7 +22,6 @@ use crate::{FieldId, Index, Result};
/// # Panics /// # Panics
/// ///
/// - if reader.is_empty(), this function may panic in some cases /// - if reader.is_empty(), this function may panic in some cases
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn enrich_documents_batch<R: Read + Seek>( pub fn enrich_documents_batch<R: Read + Seek>(
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
index: &Index, index: &Index,
@@ -78,7 +77,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
}, },
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
[(field_id, name)] => { [(field_id, name)] => {
tracing::info!("Primary key was not specified in index. Inferred to '{name}'"); log::info!("Primary key was not specified in index. Inferred to '{name}'");
PrimaryKey::Flat { name, field_id: *field_id } PrimaryKey::Flat { name, field_id: *field_id }
} }
multiple => { multiple => {
@@ -144,8 +143,6 @@ pub fn enrich_documents_batch<R: Read + Seek>(
/// Retrieve the document id after validating it, returning a `UserError` /// Retrieve the document id after validating it, returning a `UserError`
/// if the id is invalid or can't be guessed. /// if the id is invalid or can't be guessed.
#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document)
target = "indexing::documents")]
fn fetch_or_generate_document_id( fn fetch_or_generate_document_id(
document: &obkv::KvReader<FieldId>, document: &obkv::KvReader<FieldId>,
documents_batch_index: &DocumentsBatchIndex, documents_batch_index: &DocumentsBatchIndex,

View File

@@ -21,12 +21,12 @@ pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, R
/// ///
/// Returns the generated internal documents ids and a grenad reader /// Returns the generated internal documents ids and a grenad reader
/// with the list of extracted words from the given chunk of documents. /// with the list of extracted words from the given chunk of documents.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] #[logging_timer::time]
pub fn extract_docid_word_positions<R: io::Read + io::Seek>( pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>, obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
searchable_fields: &Option<HashSet<FieldId>>, searchable_fields: &Option<HashSet<FieldId>>,
stop_words: Option<&fst::Set<Vec<u8>>>, stop_words: Option<&fst::Set<&[u8]>>,
allowed_separators: Option<&[&str]>, allowed_separators: Option<&[&str]>,
dictionary: Option<&[&str]>, dictionary: Option<&[&str]>,
max_positions_per_attributes: Option<u32>, max_positions_per_attributes: Option<u32>,
@@ -181,11 +181,11 @@ fn searchable_fields_changed(
/// Factorize tokenizer building. /// Factorize tokenizer building.
fn tokenizer_builder<'a>( fn tokenizer_builder<'a>(
stop_words: Option<&'a fst::Set<Vec<u8>>>, stop_words: Option<&'a fst::Set<&[u8]>>,
allowed_separators: Option<&'a [&str]>, allowed_separators: Option<&'a [&str]>,
dictionary: Option<&'a [&str]>, dictionary: Option<&'a [&str]>,
script_language: Option<&'a HashMap<Script, Vec<Language>>>, script_language: Option<&'a HashMap<Script, Vec<Language>>>,
) -> TokenizerBuilder<'a, Vec<u8>> { ) -> TokenizerBuilder<'a, &'a [u8]> {
let mut tokenizer_builder = TokenizerBuilder::new(); let mut tokenizer_builder = TokenizerBuilder::new();
if let Some(stop_words) = stop_words { if let Some(stop_words) = stop_words {
tokenizer_builder.stop_words(stop_words); tokenizer_builder.stop_words(stop_words);
@@ -211,7 +211,7 @@ fn lang_safe_tokens_from_document<'a>(
obkv: &KvReader<FieldId>, obkv: &KvReader<FieldId>,
searchable_fields: &Option<HashSet<FieldId>>, searchable_fields: &Option<HashSet<FieldId>>,
tokenizer: &Tokenizer, tokenizer: &Tokenizer,
stop_words: Option<&fst::Set<Vec<u8>>>, stop_words: Option<&fst::Set<&[u8]>>,
allowed_separators: Option<&[&str]>, allowed_separators: Option<&[&str]>,
dictionary: Option<&[&str]>, dictionary: Option<&[&str]>,
max_positions_per_attributes: u32, max_positions_per_attributes: u32,

View File

@@ -16,7 +16,7 @@ use crate::Result;
/// ///
/// Returns a grenad reader with the list of extracted facet numbers and /// Returns a grenad reader with the list of extracted facet numbers and
/// documents ids from the given chunk of docid facet number positions. /// documents ids from the given chunk of docid facet number positions.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] #[logging_timer::time]
pub fn extract_facet_number_docids<R: io::Read + io::Seek>( pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
fid_docid_facet_number: grenad::Reader<R>, fid_docid_facet_number: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,

Some files were not shown because too many files have changed in this diff Show More