Compare commits

..

2 Commits

Author SHA1 Message Date
Clément Renault
bceaf4f981 Add a log on the time taken by the incremental facet updating 2024-01-25 17:48:31 +01:00
Clément Renault
d29b301618 Disable the facet search 2024-01-25 17:47:33 +01:00
120 changed files with 1170 additions and 4228 deletions

View File

@@ -1,2 +0,0 @@
[alias]
xtask = "run --package xtask --"

View File

@@ -27,23 +27,6 @@ Related spec: WIP
- [ ] If prototype validated, merge changes into `main`
- [ ] Update the spec
### Reminders when modifying the Setting API
<!--- Special steps to remind when adding a new index setting -->
- [ ] Ensure the new setting route is at least tested by the [`test_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/tests/settings/get_settings.rs#L276)
- [ ] Ensure Analytics are fully implemented
- [ ] `/settings/my-new-setting` configurated in the [`make_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L141-L165)
- [ ] global `/settings` route configurated in the [`update_all` function](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L655-L751)
- [ ] Ensure the dump serializing is consistent with the `/settings` route serializing, e.g., enums case can be different (`camelCase` in route and `PascalCase` in the dump)
#### Special cases when adding a setting for an experimental feature
- [ ] ⚠️ API stability: The setting does not appear on the main settings route when the feature has never been enabled (e.g. mark it `Unset` when returned from the index in this situation. See [an example](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch-types/src/settings.rs#L608))
- [ ] The setting cannot be set when the feature is disabled, either by the main settings route or the subroute (see [`validate_settings` function](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch/src/routes/indexes/settings.rs#L811))
- [ ] If possible, the setting is reset when the feature is disabled (hard if it requires reindexing)
## Impacted teams
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
<!---@meilisearch/docs-team when there is any API change, e.g. settings addition-->

View File

@@ -97,7 +97,7 @@ jobs:
- name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@v3
uses: peter-evans/repository-dispatch@v2
with:
token: ${{ secrets.MEILI_BOT_GH_PAT }}
repository: meilisearch/meilisearch-cloud

View File

@@ -22,7 +22,7 @@ jobs:
outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Define the Docker image we need to use
id: define-image
run: |
@@ -46,11 +46,11 @@ jobs:
MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core
uses: actions/setup-dotnet@v4
uses: actions/setup-dotnet@v3
with:
dotnet-version: "6.0.x"
- name: Install dependencies
@@ -75,12 +75,12 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dart
- uses: dart-lang/setup-dart@v1
with:
sdk: 'latest'
sdk: 3.1.1
- name: Install dependencies
run: dart pub get
- name: Run integration tests
@@ -100,10 +100,10 @@ jobs:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version: stable
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-go
- name: Get dependencies
@@ -129,11 +129,11 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-java
- name: Set up Java
uses: actions/setup-java@v4
uses: actions/setup-java@v3
with:
java-version: 8
distribution: 'zulu'
@@ -156,7 +156,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js
- name: Setup node
@@ -191,7 +191,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-php
- name: Install PHP
@@ -220,11 +220,11 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-python
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v4
- name: Install pipenv
uses: dschep/install-pipenv-action@v1
- name: Install dependencies
@@ -245,7 +245,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-ruby
- name: Set up Ruby 3
@@ -270,7 +270,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rust
- name: Build
@@ -291,7 +291,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-swift
- name: Run tests
@@ -314,7 +314,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js-plugins
- name: Setup node
@@ -345,7 +345,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rails
- name: Set up Ruby 3
@@ -369,7 +369,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-symfony
- name: Install PHP

View File

@@ -66,10 +66,6 @@ jobs:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -82,7 +78,7 @@ jobs:
args: --locked --release --all
test-all-features:
name: Tests almost all features
name: Tests all features
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
@@ -98,12 +94,16 @@ jobs:
with:
toolchain: stable
override: true
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
- name: Run cargo test with almost all features
run: |
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
- name: Run cargo build with all features
uses: actions-rs/cargo@v1
with:
command: build
args: --workspace --locked --release --all-features
- name: Run cargo test with all features
uses: actions-rs/cargo@v1
with:
command: test
args: --workspace --locked --release --all-features
test-disabled-tokenization:
name: Test disabled tokenization
@@ -164,7 +164,7 @@ jobs:
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: 1.75.0
toolchain: 1.71.1
override: true
components: clippy
- name: Cache dependencies

View File

@@ -75,12 +75,6 @@ If you get a "Too many open files" error you might want to increase the open fil
ulimit -Sn 3000
```
#### Build tools
Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) workflow to provide some build tools.
Run `cargo xtask --help` from the root of the repository to find out what is available.
## Git Guidelines
### Git Branches

1258
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -16,16 +16,11 @@ members = [
"json-depth-checker",
"benchmarks",
"fuzzers",
"tracing-trace",
"xtask",
]
[workspace.package]
version = "1.7.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
]
version = "1.6.0"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"
readme = "README.md"

View File

@@ -1,5 +1,5 @@
# Compile
FROM rust:1.75.0-alpine3.18 AS compiler
FROM rust:1.71.1-alpine3.18 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019-2024 Meili SAS
Copyright (c) 2019-2022 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@@ -41,10 +41,10 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
@@ -61,6 +61,8 @@ You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
## ⚡ Supercharge your Meilisearch experience
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
@@ -99,7 +101,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
Thank you for your support!

View File

@@ -106,7 +106,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_index_count{job=\"$job\", instance=\"$instance\"}",
"expr": "meilisearch_index_count{job=\"meilisearch\", instance=\"$instance\"}",
"interval": "",
"legendFormat": "",
"range": true,
@@ -165,7 +165,7 @@
"type": "prometheus"
},
"editorMode": "builder",
"expr": "meilisearch_index_docs_count{job=\"$job\", index=\"$Index\", instance=\"$instance\"}",
"expr": "meilisearch_index_docs_count{job=\"meilisearch\", index=\"$Index\", instance=\"$instance\"}",
"hide": false,
"range": true,
"refId": "A"
@@ -228,7 +228,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[1h]))",
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))",
"interval": "",
"legendFormat": "",
"range": true,
@@ -288,7 +288,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[24h]))",
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))",
"interval": "",
"legendFormat": "",
"range": true,
@@ -348,7 +348,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"$job\"}[30d]))",
"expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))",
"interval": "",
"legendFormat": "",
"range": true,
@@ -447,7 +447,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_db_size_bytes{job=\"$job\", instance=\"$instance\"}",
"expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
"interval": "",
"legendFormat": "Database size on disk",
"range": true,
@@ -458,7 +458,7 @@
"type": "prometheus"
},
"editorMode": "builder",
"expr": "meilisearch_used_db_size_bytes{job=\"$job\", instance=\"$instance\"}",
"expr": "meilisearch_used_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}",
"hide": false,
"legendFormat": "Used bytes",
"range": true,
@@ -553,7 +553,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"$job\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])",
"expr": "rate(meilisearch_http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(meilisearch_http_response_time_seconds_count[5m])",
"interval": "",
"legendFormat": "{{method}} {{path}}",
"range": true,
@@ -646,7 +646,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"$job\"}[5m])",
"expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])",
"interval": "",
"legendFormat": "{{method}} {{path}}",
"range": true,
@@ -744,7 +744,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"$job\"}[30s]))",
"expr": "sum by(le) (increase(meilisearch_http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))",
"format": "heatmap",
"interval": "",
"legendFormat": "{{le}}",
@@ -854,7 +854,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"statuses\"}",
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"statuses\"}",
"interval": "",
"legendFormat": "{{value}} ",
"range": true,
@@ -947,7 +947,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"types\"}",
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"types\"}",
"interval": "",
"legendFormat": "{{value}} ",
"range": true,
@@ -1040,7 +1040,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"$job\", kind=\"indexes\"}",
"expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"indexes\"}",
"interval": "",
"legendFormat": "{{value}} ",
"range": true,
@@ -1161,7 +1161,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "rate(process_cpu_seconds_total{job=\"$job\", instance=\"$instance\"}[1m])",
"expr": "rate(process_cpu_seconds_total{job=\"meilisearch\", instance=\"$instance\"}[1m])",
"interval": "",
"legendFormat": "process",
"range": true,
@@ -1264,7 +1264,7 @@
},
"editorMode": "builder",
"exemplar": true,
"expr": "process_resident_memory_bytes{job=\"$job\", instance=\"$instance\"} / 1024 / 1024",
"expr": "process_resident_memory_bytes{job=\"meilisearch\", instance=\"$instance\"} / 1024 / 1024",
"interval": "",
"legendFormat": "process",
"range": true,
@@ -1342,33 +1342,6 @@
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": true,
"text": "meilisearch",
"value": "meilisearch"
},
"datasource": {
"type": "prometheus"
},
"definition": "label_values(job)",
"description": "Prometheus job_name from scrape config (default is meilisearch)",
"hide": 0,
"includeAll": false,
"label": "Job",
"multi": false,
"name": "job",
"options": [],
"query": {
"query": "label_values(job)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},

View File

@@ -11,24 +11,24 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.79"
csv = "1.3.0"
anyhow = "1.0.70"
csv = "1.2.1"
milli = { path = "../milli" }
mimalloc = { version = "0.1.39", default-features = false }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
mimalloc = { version = "0.1.37", default-features = false }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5"
rand_chacha = "0.3.1"
roaring = "0.10.2"
roaring = "0.10.1"
[build-dependencies]
anyhow = "1.0.79"
bytes = "1.5.0"
anyhow = "1.0.70"
bytes = "1.4.0"
convert_case = "0.6.0"
flate2 = "1.0.28"
reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false }
flate2 = "1.0.25"
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]

View File

@@ -11,22 +11,22 @@ readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.79"
flate2 = "1.0.28"
http = "0.2.11"
anyhow = "1.0.70"
flate2 = "1.0.25"
http = "0.2.9"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0"
regex = "1.10.2"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
once_cell = "1.17.1"
regex = "1.7.3"
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
tar = "0.4.38"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"

View File

@@ -120,7 +120,7 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
criterion.as_ref().map(ToString::to_string)
}
Err(()) => {
tracing::warn!(
log::warn!(
"Could not import the following ranking rule: `{}`.",
ranking_rule
);
@@ -152,11 +152,11 @@ impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
use v2::updates::UpdateStatus as UpdateStatusV2;
Some(match source {
UpdateStatusV1::Enqueued { content } => {
tracing::warn!(
log::warn!(
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
content.update_id
);
tracing::warn!("Task will be skipped in the queue of imported tasks.");
log::warn!("Task will be skipped in the queue of imported tasks.");
return None;
}
@@ -229,7 +229,7 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
Some(match source {
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
v1::update::UpdateType::Customs => {
tracing::warn!("Ignoring task with type 'Customs' that is no longer supported");
log::warn!("Ignoring task with type 'Customs' that is no longer supported");
return None;
}
v1::update::UpdateType::DocumentsAddition { .. } => {
@@ -296,7 +296,7 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
v1::settings::RankingRule::WordsPosition => {
tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
None
}
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),

View File

@@ -146,8 +146,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
started_processing_at: processing.started_processing_at,
}),
Err(e) => {
tracing::warn!("Error with task {}: {}", processing.from.update_id, e);
tracing::warn!("Task will be marked as `Failed`.");
log::warn!("Error with task {}: {}", processing.from.update_id, e);
log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
@@ -172,8 +172,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
enqueued_at: enqueued.enqueued_at,
}),
Err(e) => {
tracing::warn!("Error with task {}: {}", enqueued.update_id, e);
tracing::warn!("Task will be marked as `Failed`.");
log::warn!("Error with task {}: {}", enqueued.update_id, e);
log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
@@ -353,7 +353,7 @@ impl From<String> for v3::Code {
"malformed_payload" => v3::Code::MalformedPayload,
"missing_payload" => v3::Code::MissingPayload,
other => {
tracing::warn!("Unknown error code {}", other);
log::warn!("Unknown error code {}", other);
v3::Code::UnretrievableErrorCode
}
}

View File

@@ -76,20 +76,20 @@ impl CompatV3ToV4 {
let index_uid = match index_uid {
Some(uid) => uid,
None => {
tracing::warn!(
log::warn!(
"Error while importing the update {}.",
task.update.id()
);
tracing::warn!(
log::warn!(
"The index associated to the uuid `{}` could not be retrieved.",
task.uuid.to_string()
);
if task.update.is_finished() {
// we're fucking with his history but not his data, that's ok-ish.
tracing::warn!("The index-uuid will be set as `unknown`.");
log::warn!("The index-uuid will be set as `unknown`.");
String::from("unknown")
} else {
tracing::warn!("The task will be ignored.");
log::warn!("The task will be ignored.");
return None;
}
}

View File

@@ -305,7 +305,7 @@ impl From<v4::ResponseError> for v5::ResponseError {
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
other => {
tracing::warn!("Unknown error code {}", other);
log::warn!("Unknown error code {}", other);
v5::Code::UnretrievableErrorCode
}
};

View File

@@ -304,7 +304,7 @@ impl From<v5::ResponseError> for v6::ResponseError {
"immutable_field" => v6::Code::BadRequest,
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
other => {
tracing::warn!("Unknown error code {}", other);
log::warn!("Unknown error code {}", other);
v6::Code::UnretrievableErrorCode
}
};
@@ -329,7 +329,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
new_ranking_rules.push(new_rule);
}
Err(_) => {
tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
}
}
}

View File

@@ -1,6 +1,5 @@
use serde::{Deserialize, Serialize};
#[allow(clippy::enum_variant_names)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code {
// index related error

View File

@@ -95,7 +95,6 @@ impl fmt::Display for ErrorType {
}
}
#[allow(clippy::enum_variant_names)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code {
// index related error

View File

@@ -31,7 +31,6 @@ impl ResponseError {
}
}
#[allow(clippy::enum_variant_names)]
#[derive(Deserialize, Debug, Clone, Copy)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum Code {

View File

@@ -2,10 +2,10 @@ use std::fs::{self, File};
use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path;
use log::debug;
pub use meilisearch_types::milli;
use tempfile::TempDir;
use time::OffsetDateTime;
use tracing::debug;
use uuid::Uuid;
use super::Document;

View File

@@ -11,9 +11,9 @@ edition.workspace = true
license.workspace = true
[dependencies]
tempfile = "3.9.0"
thiserror = "1.0.56"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
tempfile = "3.5.0"
thiserror = "1.0.40"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
faux = "0.1.10"
faux = "0.1.9"

View File

@@ -13,8 +13,8 @@ license.workspace = true
[dependencies]
nom = "7.1.3"
nom_locate = "4.2.0"
unescaper = "0.1.3"
nom_locate = "4.1.0"
unescaper = "0.1.2"
[dev-dependencies]
insta = "1.34.0"
insta = "1.29.0"

View File

@@ -11,10 +11,10 @@ edition.workspace = true
license.workspace = true
[dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] }
clap = { version = "4.4.17", features = ["derive"] }
fastrand = "2.0.1"
arbitrary = { version = "1.3.0", features = ["derive"] }
clap = { version = "4.3.0", features = ["derive"] }
fastrand = "2.0.0"
milli = { path = "../milli" }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
tempfile = "3.9.0"
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
tempfile = "3.5.0"

View File

@@ -11,36 +11,32 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.79"
anyhow = "1.0.70"
bincode = "1.3.3"
csv = "1.3.0"
csv = "1.2.1"
derive_builder = "0.12.0"
dump = { path = "../dump" }
enum-iterator = "1.5.0"
enum-iterator = "1.4.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
puffin = { version = "0.16.0", features = ["serialization"] }
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tracing = "0.1.40"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
ureq = "2.9.1"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"
crossbeam = "0.8.4"
insta = { version = "1.34.0", features = ["json", "redactions"] }
crossbeam = "0.8.2"
insta = { version = "1.29.0", features = ["json", "redactions"] }
meili-snap = { path = "../meili-snap" }
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}

View File

@@ -24,6 +24,7 @@ use std::fs::{self, File};
use std::io::BufWriter;
use dump::IndexMetadata;
use log::{debug, error, info, trace};
use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
@@ -59,7 +60,7 @@ pub(crate) enum Batch {
/// The list of tasks that were processing when this task cancelation appeared.
previous_processing_tasks: RoaringBitmap,
},
TaskDeletions(Vec<Task>),
TaskDeletion(Task),
SnapshotCreation(Vec<Task>),
Dump(Task),
IndexOperation {
@@ -145,12 +146,13 @@ impl Batch {
pub fn ids(&self) -> Vec<TaskId> {
match self {
Batch::TaskCancelation { task, .. }
| Batch::TaskDeletion(task)
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::IndexUpdate { task, .. } => vec![task.uid],
Batch::SnapshotCreation(tasks)
| Batch::TaskDeletions(tasks)
| Batch::IndexDeletion { tasks, .. } => tasks.iter().map(|task| task.uid).collect(),
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. }
| IndexOperation::Settings { tasks, .. }
@@ -178,7 +180,7 @@ impl Batch {
use Batch::*;
match self {
TaskCancelation { .. }
| TaskDeletions(_)
| TaskDeletion(_)
| SnapshotCreation(_)
| Dump(_)
| IndexSwap { .. } => None,
@@ -197,7 +199,7 @@ impl fmt::Display for Batch {
let tasks = self.ids();
match self {
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?,
Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
Batch::Dump(_) => f.write_str("Dump")?,
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
@@ -513,7 +515,6 @@ impl IndexScheduler {
/// 3. We get the *next* snapshot to process.
/// 4. We get the *next* dump to process.
/// 5. We get the *next* tasks to process for a specific index.
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> {
#[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
@@ -538,9 +539,9 @@ impl IndexScheduler {
// 2. we get the next task to delete
let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
if !to_delete.is_empty() {
let tasks = self.get_existing_tasks(rtxn, to_delete)?;
return Ok(Some(Batch::TaskDeletions(tasks)));
if let Some(task_id) = to_delete.min() {
let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
return Ok(Some(Batch::TaskDeletion(task)));
}
// 3. we batch the snapshot.
@@ -619,7 +620,6 @@ impl IndexScheduler {
/// The list of tasks that were processed. The metadata of each task in the returned
/// list is updated accordingly, with the exception of the its date fields
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> {
#[cfg(test)]
{
@@ -669,10 +669,9 @@ impl IndexScheduler {
Ok(()) => {
for content_uuid in canceled_tasks_content_uuids {
if let Err(error) = self.delete_update_file(content_uuid) {
tracing::error!(
file_content_uuid = %content_uuid,
%error,
"Failed deleting content file"
error!(
"We failed deleting the content file indentified as {}: {}",
content_uuid, error
)
}
}
@@ -682,43 +681,31 @@ impl IndexScheduler {
Ok(vec![task])
}
Batch::TaskDeletions(mut tasks) => {
Batch::TaskDeletion(mut task) => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
let mut matched_tasks = RoaringBitmap::new();
for task in tasks.iter() {
let matched_tasks =
if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
matched_tasks |= tasks;
tasks
} else {
unreachable!()
}
}
let mut wtxn = self.env.write_txn()?;
let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
wtxn.commit()?;
for task in tasks.iter_mut() {
task.status = Status::Succeeded;
let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
unreachable!()
};
let deleted_tasks_count = deleted_tasks.intersection_len(tasks);
deleted_tasks -= tasks;
let mut wtxn = self.env.write_txn()?;
let deleted_tasks_count = self.delete_matched_tasks(&mut wtxn, matched_tasks)?;
match &mut task.details {
Some(Details::TaskDeletion {
matched_tasks: _,
deleted_tasks,
original_filter: _,
}) => {
*deleted_tasks = Some(deleted_tasks_count);
}
_ => unreachable!(),
task.status = Status::Succeeded;
match &mut task.details {
Some(Details::TaskDeletion {
matched_tasks: _,
deleted_tasks,
original_filter: _,
}) => {
*deleted_tasks = Some(deleted_tasks_count);
}
_ => unreachable!(),
}
Ok(tasks)
wtxn.commit()?;
Ok(vec![task])
}
Batch::SnapshotCreation(mut tasks) => {
fs::create_dir_all(&self.snapshots_path)?;
@@ -971,10 +958,7 @@ impl IndexScheduler {
match res {
Ok(_) => (),
Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
Err(e) => error!("Could not write the stats of the index {}", e),
}
Ok(tasks)
@@ -1002,7 +986,7 @@ impl IndexScheduler {
builder.set_primary_key(primary_key);
let must_stop_processing = self.must_stop_processing.clone();
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
index_wtxn.commit()?;
@@ -1029,10 +1013,7 @@ impl IndexScheduler {
match res {
Ok(_) => (),
Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
Err(e) => error!("Could not write the stats of the index {}", e),
}
Ok(vec![task])
@@ -1151,11 +1132,6 @@ impl IndexScheduler {
///
/// ## Return
/// The list of processed tasks.
#[tracing::instrument(
level = "trace",
skip(self, index_wtxn, index),
target = "indexing::scheduler"
)]
fn apply_index_operation<'i>(
&self,
index_wtxn: &mut RwTxn<'i>,
@@ -1216,7 +1192,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.set_primary_key(primary_key);
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(),
)?;
primary_key_has_been_set = true;
@@ -1235,7 +1211,7 @@ impl IndexScheduler {
index,
indexer_config,
config,
|indexing_step| tracing::trace!(?indexing_step, "Update"),
|indexing_step| trace!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
@@ -1307,7 +1283,7 @@ impl IndexScheduler {
if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?;
tracing::info!(indexing_result = ?addition, "document indexing done");
info!("document addition done: {:?}", addition);
} else if primary_key_has_been_set {
// Everything failed but we've set a primary key.
// We need to remove it.
@@ -1315,7 +1291,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.reset_primary_key();
builder.execute(
|indexing_step| tracing::trace!(update = ?indexing_step),
|indexing_step| trace!("update: {:?}", indexing_step),
|| must_stop_processing.clone().get(),
)?;
}
@@ -1385,7 +1361,7 @@ impl IndexScheduler {
let must_stop_processing = self.must_stop_processing.clone();
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;
@@ -1459,11 +1435,7 @@ impl IndexScheduler {
/// Delete each given task from all the databases (if it is deleteable).
///
/// Return the number of tasks that were actually deleted.
fn delete_matched_tasks(
&self,
wtxn: &mut RwTxn,
matched_tasks: &RoaringBitmap,
) -> Result<RoaringBitmap> {
fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
// 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
@@ -1528,7 +1500,7 @@ impl IndexScheduler {
}
}
Ok(to_delete_tasks)
Ok(to_delete_tasks.len())
}
/// Cancel each given task from all the databases (if it is cancelable).
@@ -1597,7 +1569,7 @@ fn delete_document_by_filter<'a>(
index,
indexer_config,
config,
|indexing_step| tracing::debug!(update = ?indexing_step),
|indexing_step| debug!("update: {:?}", indexing_step),
|| must_stop_processing.get(),
)?;

View File

@@ -30,6 +30,19 @@ impl RoFeatures {
self.runtime
}
pub fn check_score_details(&self) -> Result<()> {
if self.runtime.score_details {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Computing score details",
feature: "score details",
issue_link: "https://github.com/meilisearch/product/discussions/674",
}
.into())
}
}
pub fn check_metrics(&self) -> Result<()> {
if self.runtime.metrics {
Ok(())
@@ -43,19 +56,6 @@ impl RoFeatures {
}
}
pub fn check_logs_route(&self) -> Result<()> {
if self.runtime.logs_route {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "getting logs through the `/logs/stream` route",
feature: "logs route",
issue_link: "https://github.com/orgs/meilisearch/discussions/721",
}
.into())
}
}
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.vector_store {
Ok(())
@@ -94,7 +94,6 @@ impl FeatureData {
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: instance_features.metrics || persisted_features.metrics,
logs_route: instance_features.logs_route || persisted_features.logs_route,
..persisted_features
}));

View File

@@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock};
use std::time::Duration;
use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tracing::error;
use uuid::Uuid;
use self::index_map::IndexMap;

View File

@@ -535,17 +535,17 @@ impl IndexScheduler {
let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) {
DEFAULT_BUDGET
} else {
tracing::debug!("determining budget with dichotomic search");
log::debug!("determining budget with dichotomic search");
utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| {
Self::is_good_heed(tasks_path, map_size)
})
};
tracing::debug!("memmap budget: {budget}B");
log::debug!("memmap budget: {budget}B");
let mut budget = budget / 2;
if task_db_size > (budget / 2) {
task_db_size = clamp_to_page_size(budget * 2 / 5);
tracing::debug!(
log::debug!(
"Decreasing max size of task DB to {task_db_size}B due to constrained memory space"
);
}
@@ -555,13 +555,13 @@ impl IndexScheduler {
let budget = budget;
let task_db_size = task_db_size;
tracing::debug!("index budget: {budget}B");
log::debug!("index budget: {budget}B");
let mut index_count = budget / base_map_size;
if index_count < 2 {
// take a bit less than half than the budget to make sure we can always afford to open an index
let map_size = (budget * 2) / 5;
// single index of max budget
tracing::debug!("1 index of {map_size}B can be opened simultaneously.");
log::debug!("1 index of {map_size}B can be opened simultaneously.");
return IndexBudget { map_size, index_count: 1, task_db_size };
}
// give us some space for an additional index when the cache is already full
@@ -570,7 +570,7 @@ impl IndexScheduler {
if index_count > max_index_count {
index_count = max_index_count;
}
tracing::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
IndexBudget { map_size: base_map_size, index_count, task_db_size }
}
@@ -617,7 +617,7 @@ impl IndexScheduler {
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => {
tracing::error!("{e}");
log::error!("{e}");
// Wait one second when an irrecoverable error occurs.
if !e.is_recoverable() {
std::thread::sleep(Duration::from_secs(1));
@@ -634,15 +634,15 @@ impl IndexScheduler {
let mut file = match File::create(format!("{}.puffin", now)) {
Ok(file) => file,
Err(e) => {
tracing::error!("{e}");
log::error!("{e}");
continue;
}
};
if let Err(e) = frame_view.save_to_writer(&mut file) {
tracing::error!("{e}");
log::error!("{e}");
}
if let Err(e) = file.sync_all() {
tracing::error!("{e}");
log::error!("{e}");
}
// We erase this frame view as it is no more useful. We want to
// measure the new frames now that we exported the previous ones.
@@ -1190,10 +1190,10 @@ impl IndexScheduler {
self.update_task(&mut wtxn, &task)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
if let Err(e) = self.delete_persisted_task_data(&task) {
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
}
}
tracing::info!("A batch of tasks was successfully completed.");
log::info!("A batch of tasks was successfully completed.");
}
// If we have an abortion error we must stop the tick here and re-schedule tasks.
Err(Error::Milli(milli::Error::InternalError(
@@ -1247,7 +1247,7 @@ impl IndexScheduler {
self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;
if let Err(e) = self.delete_persisted_task_data(&task) {
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
}
self.update_task(&mut wtxn, &task)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
@@ -1341,7 +1341,7 @@ impl IndexScheduler {
};
if let Err(e) = request.send(reader) {
tracing::error!("While sending data to the webhook: {e}");
log::error!("While sending data to the webhook: {e}");
}
}
@@ -1367,12 +1367,12 @@ impl IndexScheduler {
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
// the deletion tasks we enqueued ourselves.
if to_delete.len() < 2 {
tracing::warn!("The task queue is almost full, but no task can be deleted yet.");
log::warn!("The task queue is almost full, but no task can be deleted yet.");
// the only thing we can do is hope that the user tasks are going to finish
return Ok(());
}
tracing::info!(
log::info!(
"The task queue is almost full. Deleting the oldest {} finished tasks.",
to_delete.len()
);
@@ -2244,7 +2244,10 @@ mod tests {
.unwrap();
index_scheduler.assert_internally_consistent();
}
handle.advance_one_successful_batch();
for _ in 0..2 {
handle.advance_one_successful_batch();
index_scheduler.assert_internally_consistent();
}
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed");
}

View File

@@ -34,10 +34,12 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} }
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
[timestamp] [2,3,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [2,3,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000001

View File

@@ -11,6 +11,6 @@ edition.workspace = true
license.workspace = true
[dependencies]
insta = { version = "^1.34.0", features = ["json", "redactions"] }
insta = { version = "^1.29.0", features = ["json", "redactions"] }
md5 = "0.7.0"
once_cell = "1.19"
once_cell = "1.17"

View File

@@ -11,16 +11,16 @@ edition.workspace = true
license.workspace = true
[dependencies]
base64 = "0.21.7"
enum-iterator = "1.5.0"
base64 = "0.21.0"
enum-iterator = "1.4.0"
hmac = "0.12.1"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
sha2 = "0.10.8"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.6.1", features = ["serde", "v4"] }
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
sha2 = "0.10.6"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }

View File

@@ -11,31 +11,31 @@ edition.workspace = true
license.workspace = true
[dependencies]
actix-web = { version = "4.5.1", default-features = false }
anyhow = "1.0.79"
actix-web = { version = "4.3.1", default-features = false }
anyhow = "1.0.70"
convert_case = "0.6.0"
csv = "1.3.0"
deserr = { version = "0.6.1", features = ["actix-web"] }
either = { version = "1.9.0", features = ["serde"] }
enum-iterator = "1.5.0"
csv = "1.2.1"
deserr = { version = "0.6.0", features = ["actix-web"] }
either = { version = "1.8.1", features = ["serde"] }
enum-iterator = "1.4.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
flate2 = "1.0.25"
fst = "0.4.7"
memmap2 = "0.7.1"
milli = { path = "../milli" }
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde-cs = "0.2.4"
serde_json = "1.0.111"
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.35"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
serde_json = "1.0.95"
tar = "0.4.38"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.27"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
[dev-dependencies]
insta = "1.34.0"
insta = "1.29.0"
meili-snap = { path = "../meili-snap" }
[features]

View File

@@ -310,8 +310,6 @@ TooManyVectors , InvalidRequest , BAD_REQUEST ;
UnretrievableDocument , Internal , BAD_REQUEST ;
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
// Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST
}
@@ -349,9 +347,6 @@ impl ErrorCode for milli::Error {
UserError::InvalidFieldForSource { .. }
| UserError::MissingFieldForSource { .. }
| UserError::InvalidOpenAiModel { .. }
| UserError::InvalidOpenAiModelDimensions { .. }
| UserError::InvalidOpenAiModelDimensionsMax { .. }
| UserError::InvalidSettingsDimensions { .. }
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,

View File

@@ -3,14 +3,13 @@ use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
#[serde(rename_all = "camelCase", default)]
pub struct RuntimeTogglableFeatures {
pub score_details: bool,
pub vector_store: bool,
pub metrics: bool,
pub logs_route: bool,
pub export_puffin_reports: bool,
}
#[derive(Default, Debug, Clone, Copy)]
pub struct InstanceTogglableFeatures {
pub metrics: bool,
pub logs_route: bool,
}

View File

@@ -13,126 +13,124 @@ license.workspace = true
default-run = "meilisearch"
[dependencies]
actix-cors = "0.7.0"
actix-http = { version = "3.6.0", default-features = false, features = [
actix-cors = "0.6.4"
actix-http = { version = "3.3.1", default-features = false, features = [
"compress-brotli",
"compress-gzip",
"rustls-0_21",
"rustls",
] }
actix-utils = "3.0.1"
actix-web = { version = "4.5.1", default-features = false, features = [
actix-web = { version = "4.3.1", default-features = false, features = [
"macros",
"compress-brotli",
"compress-gzip",
"cookies",
"rustls-0_21",
"rustls",
] }
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] }
anyhow = { version = "1.0.70", features = ["backtrace"] }
async-stream = "0.3.5"
async-trait = "0.1.77"
bstr = "1.9.0"
async-trait = "0.1.68"
bstr = "1.4.0"
byte-unit = { version = "4.0.19", default-features = false, features = [
"std",
"serde",
] }
bytes = "1.5.0"
clap = { version = "4.4.17", features = ["derive", "env"] }
crossbeam-channel = "0.5.11"
deserr = { version = "0.6.1", features = ["actix-web"] }
bytes = "1.4.0"
clap = { version = "4.2.1", features = ["derive", "env"] }
crossbeam-channel = "0.5.8"
deserr = { version = "0.6.0", features = ["actix-web"] }
dump = { path = "../dump" }
either = "1.9.0"
either = "1.8.1"
env_logger = "0.10.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
flate2 = "1.0.25"
fst = "0.4.7"
futures = "0.3.30"
futures-util = "0.3.30"
http = "0.2.11"
futures = "0.3.28"
futures-util = "0.3.28"
http = "0.2.9"
index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.1.0", features = ["serde"] }
is-terminal = "0.4.10"
indexmap = { version = "2.0.0", features = ["serde"] }
is-terminal = "0.4.8"
itertools = "0.11.0"
jsonwebtoken = "9.2.0"
jsonwebtoken = "8.3.0"
lazy_static = "1.4.0"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.39", default-features = false }
mimalloc = { version = "0.1.37", default-features = false }
mime = "0.3.17"
num_cpus = "1.16.0"
obkv = "0.2.1"
once_cell = "1.19.0"
ordered-float = "4.2.0"
num_cpus = "1.15.0"
obkv = "0.2.0"
once_cell = "1.17.1"
ordered-float = "3.7.0"
parking_lot = "0.12.1"
permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.13"
pin-project-lite = "0.2.9"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] }
puffin = { version = "0.16.0", features = ["serialization"] }
rand = "0.8.5"
rayon = "1.8.0"
regex = "1.10.2"
reqwest = { version = "0.11.23", features = [
rayon = "1.7.0"
regex = "1.7.3"
reqwest = { version = "0.11.16", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = "0.21.6"
rustls = "0.20.8"
rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
sha2 = "0.10.8"
siphasher = "1.0.0"
slice-group-by = "0.3.1"
segment = { version = "0.2.2", optional = true }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
sha2 = "0.10.6"
siphasher = "0.3.10"
slice-group-by = "0.3.0"
static-files = { version = "0.2.3", optional = true }
sysinfo = "0.30.5"
tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
sysinfo = "0.29.7"
tar = "0.4.38"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = { version = "1.35.1", features = ["full"] }
tokio-stream = "0.1.14"
toml = "0.8.8"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
walkdir = "2.4.0"
tokio = { version = "1.27.0", features = ["full"] }
tokio-stream = "0.1.12"
toml = "0.7.3"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
walkdir = "2.3.3"
yaup = "0.2.1"
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
termcolor = "1.2.0"
url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.9"
[dev-dependencies]
actix-rt = "2.9.0"
actix-rt = "2.8.0"
assert-json-diff = "2.0.2"
brotli = "3.4.0"
insta = "1.34.0"
manifest-dir-macros = "0.1.18"
brotli = "3.3.4"
insta = "1.29.0"
manifest-dir-macros = "0.1.16"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
urlencoding = "2.1.3"
temp-env = "0.3.3"
urlencoding = "2.1.2"
yaup = "0.2.1"
[build-dependencies]
anyhow = { version = "1.0.79", optional = true }
cargo_toml = { version = "0.18.0", optional = true }
anyhow = { version = "1.0.70", optional = true }
cargo_toml = { version = "0.15.2", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.11.23", features = [
reqwest = { version = "0.11.16", features = [
"blocking",
"rustls-tls",
], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.9.0", optional = true }
tempfile = { version = "3.5.0", optional = true }
vergen = { version = "7.5.1", default-features = false, features = ["git"] }
zip = { version = "0.6.6", optional = true }
zip = { version = "0.6.4", optional = true }
[features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
@@ -156,5 +154,5 @@ greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.12/build.zip"
sha1 = "acfe9a018c93eb0604ea87ee87bff7df5474e18e"

View File

@@ -18,7 +18,7 @@ use segment::message::{Identify, Track, User};
use segment::{AutoBatcher, Batcher, HttpClient};
use serde::Serialize;
use serde_json::{json, Value};
use sysinfo::{Disks, System};
use sysinfo::{DiskExt, System, SystemExt};
use time::OffsetDateTime;
use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender};
@@ -250,7 +250,6 @@ impl super::Analytics for SegmentAnalytics {
struct Infos {
env: String,
experimental_enable_metrics: bool,
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize,
db_path: bool,
@@ -288,7 +287,6 @@ impl From<Opt> for Infos {
let Opt {
db_path,
experimental_enable_metrics,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks,
http_addr,
@@ -335,7 +333,6 @@ impl From<Opt> for Infos {
Self {
env,
experimental_enable_metrics,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),
@@ -389,17 +386,16 @@ impl Segment {
fn compute_traits(opt: &Opt, stats: Stats) -> Value {
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
static SYSTEM: Lazy<Value> = Lazy::new(|| {
let disks = Disks::new_with_refreshed_list();
let mut sys = System::new_all();
sys.refresh_all();
let kernel_version = System::kernel_version()
.and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
let kernel_version =
sys.kernel_version().and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
json!({
"distribution": System::name(),
"distribution": sys.name(),
"kernel_version": kernel_version,
"cores": sys.cpus().len(),
"ram_size": sys.total_memory(),
"disk_size": disks.iter().map(|disk| disk.total_space()).max(),
"disk_size": sys.disks().iter().map(|disk| disk.total_space()).max(),
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
})
});

View File

@@ -12,8 +12,6 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error("The `/logs/stream` route is currently in use by someone else.")]
AlreadyUsedLogRoute,
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
@@ -61,7 +59,6 @@ impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,

View File

@@ -29,6 +29,7 @@ use error::PayloadError;
use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::error;
use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@@ -38,8 +39,6 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use option::ScheduleSnapshot;
use tracing::{error, info_span};
use tracing_subscriber::filter::Targets;
use crate::error::MeilisearchHttpError;
@@ -87,21 +86,10 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
}
}
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
pub type LogRouteHandle =
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
pub type LogRouteType = tracing_subscriber::filter::Filtered<
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
Targets,
tracing_subscriber::Registry,
>;
pub fn create_app(
index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>,
opt: Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>,
enable_dashboard: bool,
) -> actix_web::App<
@@ -120,7 +108,6 @@ pub fn create_app(
index_scheduler.clone(),
auth_controller.clone(),
&opt,
logs,
analytics.clone(),
)
})
@@ -136,49 +123,11 @@ pub fn create_app(
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new())
.wrap(actix_web::middleware::Logger::default())
.wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
}
struct AwebTracingLogger;
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
use tracing::field::Empty;
let conn_info = request.connection_info();
let headers = request.headers();
let user_agent = headers
.get(http::header::USER_AGENT)
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
.unwrap_or_default();
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
}
fn on_request_end<B: MessageBody>(
span: tracing::Span,
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
) {
match &outcome {
Ok(response) => {
let code: i32 = response.response().status().as_u16().into();
span.record("status_code", code);
if let Some(error) = response.response().error() {
// use the status code already constructed for the outgoing HTTP response
span.record("error", &tracing::field::display(error.as_response_error()));
}
}
Err(error) => {
let code: i32 = error.error_response().status().as_u16().into();
span.record("status_code", code);
span.record("error", &tracing::field::display(error.as_response_error()));
}
};
}
}
enum OnFailure {
RemoveDb,
KeepDb,
@@ -331,15 +280,15 @@ fn import_dump(
let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() {
tracing::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
%date,
"Importing a dump of meilisearch"
log::info!(
"Importing a dump of meilisearch `{:?}` from the {}",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
date
);
} else {
tracing::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
"Importing a dump of meilisearch",
log::info!(
"Importing a dump of meilisearch `{:?}`",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
);
}
@@ -373,7 +322,7 @@ fn import_dump(
for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
tracing::info!("Importing index `{}`.", metadata.uid);
log::info!("Importing index `{}`.", metadata.uid);
let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
@@ -387,15 +336,14 @@ fn import_dump(
}
// 4.2 Import the settings.
tracing::info!("Importing the settings.");
log::info!("Importing the settings.");
let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder);
builder
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
// 4.3 Import the documents.
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
log::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
@@ -417,16 +365,15 @@ fn import_dump(
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|indexing_step| log::trace!("update: {:?}", indexing_step),
|| false,
)?;
let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
log::info!("{} documents found.", user_result?);
builder.execute()?;
wtxn.commit()?;
tracing::info!("All documents successfully imported.");
log::info!("All documents successfully imported.");
}
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
@@ -444,7 +391,6 @@ pub fn configure_data(
index_scheduler: Data<IndexScheduler>,
auth: Data<AuthController>,
opt: &Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>,
) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
@@ -452,7 +398,6 @@ pub fn configure_data(
.app_data(index_scheduler)
.app_data(auth)
.app_data(web::Data::from(analytics))
.app_data(web::Data::new(logs))
.app_data(
web::JsonConfig::default()
.limit(http_payload_size_limit)

View File

@@ -1,7 +1,6 @@
use std::env;
use std::io::{stderr, Write};
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use actix_web::http::KeepAlive;
@@ -10,60 +9,37 @@ use actix_web::HttpServer;
use index_scheduler::IndexScheduler;
use is_terminal::IsTerminal;
use meilisearch::analytics::Analytics;
use meilisearch::{
analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType, Opt,
};
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use mimalloc::MiMalloc;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_subscriber::Layer;
#[global_allocator]
static ALLOC: MiMalloc = MiMalloc;
fn default_layer() -> LogRouteType {
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
}
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
/// does all the setup before meilisearch is launched
fn setup(opt: &Opt) -> anyhow::Result<LogRouteHandle> {
let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(default_layer());
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
let subscriber = tracing_subscriber::registry().with(route_layer).with(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE)
.with_filter(
tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string())
.unwrap(),
),
fn setup(opt: &Opt) -> anyhow::Result<()> {
let mut log_builder = env_logger::Builder::new();
let log_filters = format!(
"{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn",
opt.log_level
);
log_builder.parse_filters(&log_filters);
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
log_builder.init();
Ok(route_layer_handle)
}
fn on_panic(info: &std::panic::PanicInfo) {
let info = info.to_string().replace('\n', " ");
tracing::error!(%info);
Ok(())
}
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
std::panic::set_hook(Box::new(on_panic));
anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
);
let log_handle = setup(&opt)?;
setup(&opt)?;
match (opt.env.as_ref(), &opt.master_key) {
("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => {
@@ -101,7 +77,7 @@ async fn main() -> anyhow::Result<()> {
print_launch_resume(&opt, analytics.clone(), config_read_from);
run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?;
run_http(index_scheduler, auth_controller, opt, analytics).await?;
Ok(())
}
@@ -110,7 +86,6 @@ async fn run_http(
index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>,
opt: Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>,
) -> anyhow::Result<()> {
let enable_dashboard = &opt.env == "development";
@@ -123,7 +98,6 @@ async fn run_http(
index_scheduler.clone(),
auth_controller.clone(),
opt.clone(),
logs.clone(),
analytics.clone(),
enable_dashboard,
)
@@ -133,7 +107,7 @@ async fn run_http(
.keep_alive(KeepAlive::Os);
if let Some(config) = opt_clone.get_ssl_config()? {
http_server.bind_rustls_021(opt_clone.http_addr, config)?.run().await?;
http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?;
} else {
http_server.bind(&opt_clone.http_addr)?.run().await?;
}

View File

@@ -20,7 +20,7 @@ use rustls::server::{
use rustls::RootCertStore;
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::{Deserialize, Serialize};
use sysinfo::{MemoryRefreshKind, RefreshKind, System};
use sysinfo::{RefreshKind, System, SystemExt};
use url::Url;
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
@@ -51,7 +51,6 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
@@ -310,13 +309,6 @@ pub struct Opt {
#[serde(default)]
pub experimental_enable_metrics: bool,
/// Experimental logs route feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/721>
///
/// Enables the log route on the `POST /logs/stream` endpoint and the `DELETE /logs/stream` to stop receiving logs.
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)]
#[serde(default)]
pub experimental_enable_logs_route: bool,
/// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
#[serde(default)]
@@ -422,7 +414,6 @@ impl Opt {
#[cfg(feature = "analytics")]
no_analytics,
experimental_enable_metrics,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
@@ -479,10 +470,6 @@ impl Opt {
MEILI_EXPERIMENTAL_ENABLE_METRICS,
experimental_enable_metrics.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE,
experimental_enable_logs_route.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
experimental_reduce_indexing_memory_usage.to_string(),
@@ -503,11 +490,11 @@ impl Opt {
}
if self.ssl_require_auth {
let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(Arc::from(verifier))
config.with_client_cert_verifier(verifier)
} else {
let verifier =
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(Arc::from(verifier))
config.with_client_cert_verifier(verifier)
}
}
None => config.with_no_client_auth(),
@@ -537,10 +524,7 @@ impl Opt {
}
pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures {
InstanceTogglableFeatures {
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
}
InstanceTogglableFeatures { metrics: self.experimental_enable_metrics }
}
}
@@ -649,8 +633,8 @@ impl MaxMemory {
/// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> {
if sysinfo::IS_SUPPORTED_SYSTEM {
let memory_kind = RefreshKind::new().with_memory(MemoryRefreshKind::new().with_ram());
if System::IS_SUPPORTED {
let memory_kind = RefreshKind::new().with_memory();
let mut system = System::new_with_specifics(memory_kind);
system.refresh_memory();
Some(system.total_memory())

View File

@@ -1,11 +1,11 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
@@ -32,6 +32,6 @@ pub async fn create_dump(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create dump");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}

View File

@@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::ActionPolicy;
@@ -33,21 +33,20 @@ async fn get_features(
let features = index_scheduler.features();
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
let features = features.runtime_features();
debug!(returns = ?features, "Get features");
HttpResponse::Ok().json(features)
debug!("returns: {:?}", features.runtime_features());
HttpResponse::Ok().json(features.runtime_features())
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct RuntimeTogglableFeatures {
#[deserr(default)]
pub score_details: Option<bool>,
#[deserr(default)]
pub vector_store: Option<bool>,
#[deserr(default)]
pub metrics: Option<bool>,
#[deserr(default)]
pub logs_route: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>,
}
@@ -61,13 +60,12 @@ async fn patch_features(
analytics: Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features();
debug!(parameters = ?new_features, "Patch features");
let old_features = features.runtime_features();
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
export_puffin_reports: new_features
.0
.export_puffin_reports
@@ -78,23 +76,22 @@ async fn patch_features(
// the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures {
score_details,
vector_store,
metrics,
logs_route,
export_puffin_reports,
} = new_features;
analytics.publish(
"Experimental features Updated".to_string(),
json!({
"score_details": score_details,
"vector_store": vector_store,
"metrics": metrics,
"logs_route": logs_route,
"export_puffin_reports": export_puffin_reports,
}),
Some(&req),
);
index_scheduler.put_runtime_features(new_features)?;
debug!(returns = ?new_features, "Patch features");
Ok(HttpResponse::Ok().json(new_features))
}

View File

@@ -8,6 +8,7 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr;
use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
@@ -27,7 +28,6 @@ use serde_json::Value;
use tempfile::tempfile;
use tokio::fs::File;
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
use tracing::debug;
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::error::MeilisearchHttpError;
@@ -101,7 +101,6 @@ pub async fn get_document(
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner();
debug!(parameters = ?params, "Get document");
let index_uid = IndexUid::try_from(index_uid)?;
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
@@ -111,7 +110,7 @@ pub async fn get_document(
let index = index_scheduler.index(&index_uid)?;
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
debug!(returns = ?document, "Get document");
debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document))
}
@@ -132,7 +131,7 @@ pub async fn delete_document(
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete document");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -169,8 +168,9 @@ pub async fn documents_by_query_post(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with body: {:?}", body);
let body = body.into_inner();
debug!(parameters = ?body, "Get documents POST");
analytics.post_fetch_documents(
&DocumentFetchKind::Normal {
@@ -191,7 +191,7 @@ pub async fn get_documents(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET");
debug!("called with params: {:?}", params);
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
@@ -235,7 +235,7 @@ fn documents_by_query(
let ret = PaginationView::new(offset, limit, total as usize, documents);
debug!(returns = ?ret, "Get documents");
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
}
@@ -271,7 +271,7 @@ pub async fn replace_documents(
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!(parameters = ?params, "Replace documents");
debug!("called with params: {:?}", params);
let params = params.into_inner();
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@@ -288,7 +288,6 @@ pub async fn replace_documents(
allow_index_creation,
)
.await?;
debug!(returns = ?task, "Replace documents");
Ok(HttpResponse::Accepted().json(task))
}
@@ -303,8 +302,8 @@ pub async fn update_documents(
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner();
debug!(parameters = ?params, "Update documents");
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@@ -320,7 +319,6 @@ pub async fn update_documents(
allow_index_creation,
)
.await?;
debug!(returns = ?task, "Update documents");
Ok(HttpResponse::Accepted().json(task))
}
@@ -429,10 +427,7 @@ async fn document_addition(
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
if e.kind() == ErrorKind::NotFound => {}
Err(e) => {
tracing::warn!(
index_uuid = %uuid,
"Unknown error happened while deleting a malformed update file: {e}"
);
log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
}
}
// We still want to return the original error to the end user.
@@ -458,6 +453,7 @@ async fn document_addition(
}
};
debug!("returns: {:?}", task);
Ok(task.into())
}
@@ -468,7 +464,7 @@ pub async fn delete_documents_batch(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by batch");
debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
@@ -483,7 +479,7 @@ pub async fn delete_documents_batch(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete documents by batch");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -501,7 +497,7 @@ pub async fn delete_documents_by_filter(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by filter");
debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter;
@@ -519,7 +515,7 @@ pub async fn delete_documents_by_filter(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete documents by filter");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -536,7 +532,7 @@ pub async fn clear_all_documents(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete all documents");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}

View File

@@ -2,12 +2,12 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use serde_json::Value;
use tracing::debug;
use crate::analytics::{Analytics, FacetSearchAggregator};
use crate::extractors::authentication::policies::*;
@@ -56,7 +56,7 @@ pub async fn search(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner();
debug!(parameters = ?query, "Facet search");
debug!("facet search called with params: {:?}", query);
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
@@ -83,7 +83,7 @@ pub async fn search(
let search_result = search_result?;
debug!(returns = ?search_result, "Facet search");
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}

View File

@@ -5,6 +5,7 @@ use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
@@ -15,7 +16,6 @@ use meilisearch_types::tasks::KindWithContent;
use serde::Serialize;
use serde_json::json;
use time::OffsetDateTime;
use tracing::debug;
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::Analytics;
@@ -93,7 +93,6 @@ pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?paginate, "List indexes");
let filters = index_scheduler.filters();
let indexes: Vec<Option<IndexView>> =
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
@@ -106,7 +105,7 @@ pub async fn list_indexes(
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
debug!(returns = ?ret, "List indexes");
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
}
@@ -125,7 +124,6 @@ pub async fn create_index(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Create index");
let IndexCreateRequest { primary_key, uid } = body.into_inner();
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
@@ -139,7 +137,6 @@ pub async fn create_index(
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create index");
Ok(HttpResponse::Accepted().json(task))
} else {
@@ -180,7 +177,7 @@ pub async fn get_index(
let index = index_scheduler.index(&index_uid)?;
let index_view = IndexView::new(index_uid.into_inner(), &index)?;
debug!(returns = ?index_view, "Get index");
debug!("returns: {:?}", index_view);
Ok(HttpResponse::Ok().json(index_view))
}
@@ -192,7 +189,7 @@ pub async fn update_index(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Update index");
debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner();
analytics.publish(
@@ -209,7 +206,7 @@ pub async fn update_index(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Update index");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -221,7 +218,6 @@ pub async fn delete_index(
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete index");
Ok(HttpResponse::Accepted().json(task))
}
@@ -259,6 +255,6 @@ pub async fn get_index_stats(
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
debug!(returns = ?stats, "Get index stats");
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
}

View File

@@ -2,6 +2,7 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler;
use log::{debug, warn};
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
@@ -11,7 +12,6 @@ use meilisearch_types::milli;
use meilisearch_types::milli::vector::DistributionShift;
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::{debug, warn};
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*;
@@ -186,7 +186,7 @@ pub async fn search_with_url_query(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Search get");
debug!("called with params: {:?}", params);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().into();
@@ -213,7 +213,7 @@ pub async fn search_with_url_query(
let search_result = search_result?;
debug!(returns = ?search_result, "Search get");
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
@@ -227,7 +227,7 @@ pub async fn search_with_post(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query = params.into_inner();
debug!(parameters = ?query, "Search post");
debug!("search called with params: {:?}", query);
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
@@ -252,7 +252,7 @@ pub async fn search_with_post(
let search_result = search_result?;
debug!(returns = ?search_result, "Search post");
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}

View File

@@ -2,6 +2,7 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::facet_values_sort::FacetValuesSort;
@@ -10,7 +11,6 @@ use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
@@ -24,12 +24,12 @@ macro_rules! make_setting_route {
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse, Resource};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, Settings};
use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::policies::*;
use $crate::extractors::authentication::GuardedData;
@@ -61,7 +61,7 @@ macro_rules! make_setting_route {
.await??
.into();
debug!(returns = ?task, "Delete settings");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -78,7 +78,6 @@ macro_rules! make_setting_route {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner();
debug!(parameters = ?body, "Update settings");
#[allow(clippy::redundant_closure_call)]
$analytics(&body, &req);
@@ -110,7 +109,7 @@ macro_rules! make_setting_route {
.await??
.into();
debug!(returns = ?task, "Update settings");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -127,7 +126,7 @@ macro_rules! make_setting_route {
let rtxn = index.read_txn()?;
let settings = settings(&index, &rtxn)?;
debug!(returns = ?settings, "Update settings");
debug!("returns: {:?}", settings);
let mut json = serde_json::json!(&settings);
let val = json[$camelcase_attr].take();
@@ -657,7 +656,6 @@ pub async fn update_all(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let new_settings = body.into_inner();
debug!(parameters = ?new_settings, "Update all settings");
let new_settings = validate_settings(new_settings, &index_scheduler)?;
analytics.publish(
@@ -770,7 +768,7 @@ pub async fn update_all(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Update all settings");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -783,7 +781,7 @@ pub async fn get_all(
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let new_settings = settings(&index, &rtxn)?;
debug!(returns = ?new_settings, "Get all settings");
debug!("returns: {:?}", new_settings);
Ok(HttpResponse::Ok().json(new_settings))
}
@@ -806,7 +804,7 @@ pub async fn delete_all(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete all settings");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}

View File

@@ -1,281 +0,0 @@
use std::convert::Infallible;
use std::io::Write;
use std::ops::ControlFlow;
use std::pin::Pin;
use std::str::FromStr;
use std::sync::Arc;
use actix_web::web::{Bytes, Data};
use actix_web::{web, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use futures_util::Stream;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use tokio::sync::mpsc;
use tracing_subscriber::filter::Targets;
use tracing_subscriber::Layer;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::LogRouteHandle;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("stream")
.route(web::post().to(SeqHandler(get_logs)))
.route(web::delete().to(SeqHandler(cancel_logs))),
);
}
#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)]
#[deserr(rename_all = camelCase)]
pub enum LogMode {
#[default]
Human,
Profile,
}
/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Clone, Debug)]
struct MyTargets(Targets);
/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Debug, thiserror::Error)]
enum MyParseError {
#[error(transparent)]
ParseError(#[from] tracing_subscriber::filter::ParseError),
#[error(
"Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`"
)]
Example,
}
impl FromStr for MyTargets {
type Err = MyParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.is_empty() {
Err(MyParseError::Example)
} else {
Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?))
}
}
}
impl MergeWithError<MyParseError> for DeserrJsonError<BadRequest> {
fn merge(
_self_: Option<Self>,
other: MyParseError,
merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
Self::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: other.to_string() },
merge_location,
)
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub struct GetLogs {
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
target: MyTargets,
#[deserr(default, error = DeserrJsonError<BadRequest>)]
mode: LogMode,
#[deserr(default = false, error = DeserrJsonError<BadRequest>)]
profile_memory: bool,
}
fn validate_get_logs<E: DeserializeError>(
logs: GetLogs,
location: ValuePointerRef,
) -> Result<GetLogs, E> {
if logs.profile_memory && logs.mode != LogMode::Profile {
Err(deserr::take_cf_content(E::error::<Infallible>(
None,
ErrorKind::Unexpected {
msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode),
},
location,
)))
} else {
Ok(logs)
}
}
struct LogWriter {
sender: mpsc::UnboundedSender<Vec<u8>>,
}
impl Write for LogWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?;
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
struct HandleGuard {
/// We need to keep an handle on the logs to make it available again when the streamer is dropped
logs: Arc<LogRouteHandle>,
}
impl Drop for HandleGuard {
fn drop(&mut self) {
if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
}
}
fn byte_stream(
receiver: mpsc::UnboundedReceiver<Vec<u8>>,
guard: HandleGuard,
) -> impl futures_util::Stream<Item = Result<Bytes, ResponseError>> {
futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move {
let vec = receiver.recv().await;
vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard)))
})
}
type PinnedByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, ResponseError>>>>;
fn make_layer<
S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>,
>(
opt: &GetLogs,
logs: Data<LogRouteHandle>,
) -> (Box<dyn Layer<S> + Send + Sync>, PinnedByteStream) {
let guard = HandleGuard { logs: logs.into_inner() };
match opt.mode {
LogMode::Human => {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(move || LogWriter { sender: sender.clone() })
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE);
let stream = byte_stream(receiver, guard);
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
LogMode::Profile => {
let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory);
let stream = entry_stream(trace, guard);
(Box::new(layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
}
}
fn entry_stream(
trace: tracing_trace::Trace,
guard: HandleGuard,
) -> impl Stream<Item = Result<Bytes, ResponseError>> {
let receiver = trace.into_receiver();
let entry_buf = Vec::new();
futures_util::stream::unfold(
(receiver, entry_buf, guard),
move |(mut receiver, mut entry_buf, guard)| async move {
let mut bytes = Vec::new();
while bytes.len() < 8192 {
entry_buf.clear();
let Ok(count) = tokio::time::timeout(
std::time::Duration::from_secs(1),
receiver.recv_many(&mut entry_buf, 100),
)
.await
else {
break;
};
if count == 0 {
if !bytes.is_empty() {
break;
}
// channel closed, exit
return None;
}
for entry in &entry_buf {
if let Err(error) = serde_json::to_writer(&mut bytes, entry) {
tracing::error!(
error = &error as &dyn std::error::Error,
"deserializing entry"
);
return Some((
Err(ResponseError::from_msg(
format!("error deserializing entry: {error}"),
Code::Internal,
)),
(receiver, entry_buf, guard),
));
}
}
}
Some((Ok(bytes.into()), (receiver, entry_buf, guard)))
},
)
}
pub async fn get_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
body: AwebJson<GetLogs, DeserrJsonError>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
let opt = body.into_inner();
let mut stream = None;
logs.modify(|layer| match layer.inner_mut() {
None => {
// there is no one getting logs
*layer.filter_mut() = opt.target.0.clone();
let (new_layer, new_stream) = make_layer(&opt, logs.clone());
*layer.inner_mut() = Some(new_layer);
stream = Some(new_stream);
}
Some(_) => {
// there is already someone getting logs
}
})
.unwrap();
if let Some(stream) = stream {
Ok(HttpResponse::Ok().streaming(stream))
} else {
Err(MeilisearchHttpError::AlreadyUsedLogRoute.into())
}
}
pub async fn cancel_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
Ok(HttpResponse::NoContent().finish())
}

View File

@@ -3,6 +3,7 @@ use std::collections::BTreeMap;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::settings::{Settings, Unchecked};
@@ -10,7 +11,6 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
@@ -22,7 +22,6 @@ mod api_key;
mod dump;
pub mod features;
pub mod indexes;
mod logs;
mod metrics;
mod multi_search;
mod snapshot;
@@ -32,7 +31,6 @@ pub mod tasks;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::scope("/tasks").configure(tasks::configure))
.service(web::resource("/health").route(web::get().to(get_health)))
.service(web::scope("/logs").configure(logs::configure))
.service(web::scope("/keys").configure(api_key::configure))
.service(web::scope("/dumps").configure(dump::configure))
.service(web::scope("/snapshots").configure(snapshot::configure))
@@ -252,7 +250,7 @@ async fn get_stats(
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
debug!(returns = ?stats, "Get stats");
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
}

View File

@@ -3,11 +3,11 @@ use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::Serialize;
use tracing::debug;
use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::extractors::authentication::policies::ActionPolicy;
@@ -52,7 +52,7 @@ pub async fn multi_search_with_post(
for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
{
debug!(on_index = query_index, parameters = ?query, "Multi-search");
debug!("multi-search #{query_index}: called with params: {:?}", query);
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
@@ -107,7 +107,7 @@ pub async fn multi_search_with_post(
err
})?;
debug!(returns = ?search_results, "Multi-search");
debug!("returns: {:?}", search_results);
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
}

View File

@@ -1,10 +1,10 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
@@ -27,6 +27,6 @@ pub async fn create_snapshot(
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create snapshot");
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}

View File

@@ -441,6 +441,10 @@ fn prepare_search<'t>(
ScoringStrategy::Skip
});
if query.show_ranking_score_details {
features.check_score_details()?;
}
if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid {
search.embedder_name(embedder);
}

View File

@@ -59,8 +59,6 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"},
("GET", "/version") => hashset!{"version", "*"},
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},

View File

@@ -64,7 +64,7 @@ impl Display for Value {
write!(
f,
"{}",
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
json_string!(self, { ".enqueuedAt" => "[date]", ".processedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
)
}
}

View File

@@ -13,8 +13,6 @@ use meilisearch::{analytics, create_app, setup_meilisearch};
use once_cell::sync::Lazy;
use tempfile::TempDir;
use tokio::time::sleep;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use super::index::Index;
use super::service::Service;
@@ -83,16 +81,10 @@ impl Server {
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
> {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
actix_web::test::init_service(create_app(
self.service.index_scheduler.clone().into(),
self.service.auth.clone().into(),
self.service.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&self.service.options),
true,
))

View File

@@ -7,8 +7,6 @@ use actix_web::test::TestRequest;
use index_scheduler::IndexScheduler;
use meilisearch::{analytics, create_app, Opt};
use meilisearch_auth::AuthController;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use crate::common::encoder::Encoder;
use crate::common::Value;
@@ -107,16 +105,10 @@ impl Service {
}
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let app = test::init_service(create_app(
self.index_scheduler.clone().into(),
self.auth.clone().into(),
self.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&self.options),
true,
))

View File

@@ -1760,181 +1760,6 @@ async fn add_documents_invalid_geo_field() {
"finishedAt": "[date]"
}
"###);
// The three next tests are related to #4333
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "12",
"_geo": { "lng": null, "lat": 67}
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 14,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse longitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "12",
"_geo": { "lng": 35, "lat": null }
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 15,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// _geo has a lat and lng but set to `null`
let documents = json!([
{
"id": "13",
"_geo": { "lng": null, "lat": null }
}
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 16,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude nor longitude in the document with the id: `13`. Was expecting finite numbers but instead got `null` and `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
// Related to #4333
#[actix_rt::test]
async fn add_invalid_geo_and_then_settings() {
let server = Server::new().await;
let index = server.index("test");
index.create(Some("id")).await;
// _geo is not an object
let documents = json!([
{
"id": "11",
"_geo": { "lat": null, "lng": null },
}
]);
let (ret, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let ret = index.wait_task(ret.uid()).await;
snapshot!(ret, @r###"
{
"uid": 1,
"indexUid": "test",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (ret, code) = index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
snapshot!(code, @"202 Accepted");
let ret = index.wait_task(ret.uid()).await;
snapshot!(ret, @r###"
{
"uid": 2,
"indexUid": "test",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"sortableAttributes": [
"_geo"
]
},
"error": {
"message": "Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]

View File

@@ -1845,9 +1845,9 @@ async fn import_dump_v6_containing_experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);

View File

@@ -18,9 +18,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@@ -30,9 +30,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@@ -42,9 +42,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@@ -55,9 +55,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@@ -68,9 +68,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@@ -88,9 +88,9 @@ async fn experimental_feature_metrics() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": true,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@@ -146,7 +146,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@@ -5,7 +5,6 @@ mod documents;
mod dumps;
mod features;
mod index;
mod logs;
mod search;
mod settings;
mod snapshot;

View File

@@ -1,182 +0,0 @@
use meili_snap::*;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn logs_stream_bad_target() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Our help message
let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// An error from the target parser
let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_mode() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong value
let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `tamo` at `.mode`: expected one of `human`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_profile_memory() {
let server = Server::new().await;
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Used with default parameters
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Human mode.",
"code": "invalid_settings_typo_tolerance",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance"
}
"###);
// Used with an unsupported mode
let (response, code) =
server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `fmt` at `.mode`: expected one of `human`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_without_enabling_the_route() {
let server = Server::new().await;
let (response, code) = server.service.post("/logs/stream", json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.service.delete("/logs/stream").await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View File

@@ -1,92 +0,0 @@
mod error;
use std::rc::Rc;
use std::str::FromStr;
use actix_web::http::header::ContentType;
use meili_snap::snapshot;
use meilisearch::{analytics, create_app, Opt};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Layer;
use crate::common::{default_settings, Server};
use crate::json;
#[actix_web::test]
async fn basic_test_log_stream_route() {
let db_path = tempfile::tempdir().unwrap();
let server = Server::new_with_options(Opt {
experimental_enable_logs_route: true,
..default_settings(db_path.path())
})
.await
.unwrap();
let (route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let subscriber = tracing_subscriber::registry().with(route_layer).with(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("INFO").unwrap()),
);
let app = actix_web::test::init_service(create_app(
server.service.index_scheduler.clone().into(),
server.service.auth.clone().into(),
server.service.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&server.service.options),
true,
))
.await;
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
let app = Rc::new(app);
// First, we start listening on the `/logs/stream` route
let handle_app = app.clone();
let handle = tokio::task::spawn_local(async move {
let req = actix_web::test::TestRequest::post()
.uri("/logs/stream")
.insert_header(ContentType::json())
.set_payload(
serde_json::to_vec(&json!({
"mode": "human",
"target": "info",
}))
.unwrap(),
);
let req = req.to_request();
let ret = actix_web::test::call_service(&*handle_app, req).await;
actix_web::test::read_body(ret).await
});
// We're going to create an index to get at least one info log saying we processed a batch of task
let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await;
snapshot!(ret, @r###"
{
"taskUid": 0,
"indexUid": "tamo",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
server.wait_task(ret.uid()).await;
let req = actix_web::test::TestRequest::delete().uri("/logs/stream");
let req = req.to_request();
let ret = actix_web::test::call_service(&*app, req).await;
let code = ret.status();
snapshot!(code, @"204 No Content");
let logs = handle.await.unwrap();
let logs = String::from_utf8(logs.to_vec()).unwrap();
assert!(logs.contains("INFO"), "{logs}");
}

View File

@@ -13,9 +13,9 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@@ -87,52 +87,6 @@ async fn simple_search() {
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###);
}
#[actix_rt::test]
async fn highlighter() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.2},
"attributesToHighlight": [
"desc"
],
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**"
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.8},
"attributesToHighlight": [
"desc"
],
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**"
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_semanticScore":0.9472136}]"###);
// no highlighting on full semantic
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 1.0},
"attributesToHighlight": [
"desc"
],
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**"
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}}]"###);
}
#[actix_rt::test]
async fn invalid_semantic_ratio() {
let server = Server::new().await;

View File

@@ -766,14 +766,38 @@ async fn faceting_max_values_per_facet() {
}
#[actix_rt::test]
async fn test_score_details() {
async fn experimental_feature_score_details() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
let res = index.add_documents(json!(documents), None).await;
index.wait_task(res.0.uid()).await;
index.add_documents(json!(documents), None).await;
index.wait_task(0).await;
index
.search(
json!({
"q": "train dragon",
"showRankingScoreDetails": true,
}),
|response, code| {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Computing score details requires enabling the `score details` experimental feature. See https://github.com/meilisearch/product/discussions/674",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
},
)
.await;
let (response, code) = server.set_features(json!({"scoreDetails": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response["scoreDetails"], @"true");
index
.search(

View File

@@ -44,6 +44,10 @@ struct WebhookHandle {
}
async fn create_webhook_server() -> WebhookHandle {
let mut log_builder = env_logger::Builder::new();
log_builder.parse_filters("info");
log_builder.init();
let (sender, receiver) = mpsc::unbounded_channel();
let sender = Arc::new(sender);

View File

@@ -9,11 +9,11 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.79"
clap = { version = "4.4.17", features = ["derive"] }
anyhow = "1.0.75"
clap = { version = "4.2.1", features = ["derive"] }
dump = { path = "../dump" }
file-store = { path = "../file-store" }
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
time = { version = "0.3.31", features = ["formatting"] }
uuid = { version = "1.6.1", features = ["v4"], default-features = false }
time = { version = "0.3.30", features = ["formatting"] }
uuid = { version = "1.5.0", features = ["v4"], default-features = false }

View File

@@ -14,14 +14,14 @@ license.workspace = true
[dependencies]
bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3"
bstr = "1.9.0"
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
bstr = "1.4.0"
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
byteorder = "1.4.3"
charabia = { version = "0.8.5", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.11"
deserr = "0.6.1"
either = { version = "1.9.0", features = ["serde"] }
crossbeam-channel = "0.5.8"
deserr = "0.6.0"
either = { version = "1.8.1", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
@@ -35,32 +35,32 @@ heed = { version = "0.20.0-alpha.9", default-features = false, features = [
"serde-bincode",
"read-txn-no-tls",
] }
indexmap = { version = "2.1.0", features = ["serde"] }
indexmap = { version = "2.0.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memmap2 = "0.7.1"
obkv = "0.2.1"
once_cell = "1.19.0"
ordered-float = "4.2.0"
obkv = "0.2.0"
once_cell = "1.17.1"
ordered-float = "3.6.0"
rand_pcg = { version = "0.3.1", features = ["serde1"] }
rayon = "1.8.0"
roaring = "0.10.2"
rayon = "1.7.0"
roaring = "0.10.1"
rstar = { version = "0.11.0", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
slice-group-by = "0.3.1"
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
slice-group-by = "0.3.0"
smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = "1.12.0"
smallvec = "1.10.0"
smartstring = "1.0.1"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = [
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
uuid = { version = "1.6.1", features = ["v4"] }
uuid = { version = "1.3.1", features = ["v4"] }
filter-parser = { path = "../filter-parser" }
@@ -71,8 +71,9 @@ itertools = "0.11.0"
puffin = "0.16.0"
# logging
log = "0.4.17"
logging_timer = "1.1.0"
csv = "1.3.0"
csv = "1.2.1"
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
@@ -80,29 +81,36 @@ tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
"online",
] }
tokio = { version = "1.35.1", features = ["rt"] }
futures = "0.3.30"
reqwest = { version = "0.11.23", features = [
tokio = { version = "1.34.0", features = ["rt"] }
futures = "0.3.29"
reqwest = { version = "0.11.16", features = [
"rustls-tls",
"json",
], default-features = false }
tiktoken-rs = "0.5.8"
tiktoken-rs = "0.5.7"
liquid = "0.26.4"
arroy = "0.2.0"
arroy = { git = "https://github.com/meilisearch/arroy.git", version = "0.1.0" }
rand = "0.8.5"
tracing = "0.1.40"
[dev-dependencies]
mimalloc = { version = "0.1.39", default-features = false }
mimalloc = { version = "0.1.37", default-features = false }
big_s = "1.0.2"
insta = "1.34.0"
insta = "1.29.0"
maplit = "1.0.2"
md5 = "0.7.0"
meili-snap = { path = "../meili-snap" }
rand = { version = "0.8.5", features = ["small_rng"] }
[features]
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
all-tokenizations = [
"charabia/chinese",
"charabia/hebrew",
"charabia/japanese",
"charabia/thai",
"charabia/korean",
"charabia/greek",
"charabia/khmer",
]
# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml
@@ -129,6 +137,3 @@ greek = ["charabia/greek"]
# allow khmer specialized tokenization
khmer = ["charabia/khmer"]
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
cuda = ["candle-core/cuda"]

View File

@@ -25,7 +25,6 @@ impl<R: io::Read + io::Seek> DocumentsBatchReader<R> {
///
/// It first retrieves the index, then moves to the first document. Use the `into_cursor`
/// method to iterator over the documents, from the first to the last.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn from_reader(reader: R) -> Result<Self, Error> {
let reader = grenad::Reader::new(reader)?;
let mut cursor = reader.into_cursor()?;

View File

@@ -172,7 +172,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
TooManyDocumentIds { primary_key: String, document: Object },
#[error("The primary key inference failed as the engine did not find any field ending with `id` in its name. Please specify the primary key manually using the `primaryKey` query parameter.")]
NoPrimaryKeyCandidateFound,
#[error("The primary key inference failed as the engine found {} fields ending with `id` in their names: '{}' and '{}'. Please specify the primary key manually using the `primaryKey` query parameter.", .candidates.len(), .candidates.first().unwrap(), .candidates.get(1).unwrap())]
#[error("The primary key inference failed as the engine found {} fields ending with `id` in their names: '{}' and '{}'. Please specify the primary key manually using the `primaryKey` query parameter.", .candidates.len(), .candidates.get(0).unwrap(), .candidates.get(1).unwrap())]
MultiplePrimaryKeyCandidatesFound { candidates: Vec<String> },
#[error("There is no more space left on the device. Consider increasing the size of the disk/partition.")]
NoSpaceLeftOnDevice,
@@ -227,22 +227,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
source_: crate::vector::settings::EmbedderSource,
embedder_name: String,
},
#[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its native dimensions of {expected_dimensions}. Found {dimensions}")]
InvalidOpenAiModelDimensions {
embedder_name: String,
model: &'static str,
dimensions: usize,
expected_dimensions: usize,
},
#[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its dimensions to a value higher than {max_dimensions}. Found {dimensions}")]
InvalidOpenAiModelDimensionsMax {
embedder_name: String,
model: &'static str,
dimensions: usize,
max_dimensions: usize,
},
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
InvalidSettingsDimensions { embedder_name: String },
}
impl From<crate::vector::Error> for Error {
@@ -267,7 +251,6 @@ impl From<arroy::Error> for Error {
arroy::Error::DatabaseFull
| arroy::Error::InvalidItemAppend
| arroy::Error::UnmatchingDistance { .. }
| arroy::Error::MissingNode
| arroy::Error::MissingMetadata => {
Error::InternalError(InternalError::ArroyError(value))
}

View File

@@ -181,7 +181,7 @@ mod tests {
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for index in indexes {
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();
@@ -212,7 +212,7 @@ mod tests {
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for index in indexes {
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();

View File

@@ -195,7 +195,7 @@ mod tests {
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for index in indexes {
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();
@@ -226,7 +226,7 @@ mod tests {
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for index in indexes {
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();

View File

@@ -142,7 +142,7 @@ pub(crate) mod tests {
let mut txn = index.env.write_txn().unwrap();
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
for key in std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128) {
for (_i, key) in std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
@@ -172,7 +172,7 @@ pub(crate) mod tests {
let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for fid in 0..2 {
for &key in &keys {
for (_i, &key) in keys.iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
@@ -207,7 +207,7 @@ pub(crate) mod tests {
let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for fid in 0..2 {
for &key in &keys {
for (_i, &key) in keys.iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);

View File

@@ -102,7 +102,7 @@ impl ScoreWithRatioResult {
}
SearchResult {
matching_words: right.matching_words,
matching_words: left.matching_words,
candidates: left.candidates | right.candidates,
documents_ids,
document_scores,

View File

@@ -6,9 +6,9 @@ use charabia::Normalize;
use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::error;
use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
use tracing::error;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};

View File

@@ -166,9 +166,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
continue;
}
let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
let entered = span.enter();
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
ctx,
logger,
@@ -178,7 +175,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
back!();
continue;
};
drop(entered);
ranking_rule_scores.push(next_bucket.score);

View File

@@ -14,12 +14,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
ClearDocuments { wtxn, index }
}
#[tracing::instrument(
level = "trace",
skip(self),
target = "indexing::documents",
name = "clear_documents"
)]
pub fn execute(self) -> Result<u64> {
puffin::profile_function!();

View File

@@ -61,6 +61,7 @@ impl FacetsUpdateIncremental {
}
}
#[logging_timer::time("FacetsUpdateIncremental::{}")]
pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
let mut cursor = self.delta_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
@@ -751,7 +752,7 @@ mod tests {
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
keys.shuffle(&mut rng);
for key in keys {
for (_i, key) in keys.into_iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
index.insert(&mut txn, 0, &(key as f64), &bitmap);
@@ -770,7 +771,7 @@ mod tests {
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
keys.shuffle(&mut rng);
for key in keys {
for (_i, key) in keys.into_iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(rng.gen_range(256..512));

View File

@@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption};
use grenad::{CompressionType, SortAlgorithm};
use heed::types::{Bytes, DecodeIgnore, SerdeJson};
use heed::BytesEncode;
use log::debug;
use time::OffsetDateTime;
use tracing::debug;
use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;
@@ -170,91 +170,91 @@ impl<'i> FacetsUpdate<'i> {
incremental_update.execute(wtxn)?;
}
// We clear the list of normalized-for-search facets
// and the previous FSTs to compute everything from scratch
self.index.facet_id_normalized_string_strings.clear(wtxn)?;
self.index.facet_id_string_fst.clear(wtxn)?;
// // We clear the list of normalized-for-search facets
// // and the previous FSTs to compute everything from scratch
// self.index.facet_id_normalized_string_strings.clear(wtxn)?;
// self.index.facet_id_string_fst.clear(wtxn)?;
// As we can't use the same write transaction to read and write in two different databases
// we must create a temporary sorter that we will write into LMDB afterward.
// As multiple unnormalized facet values can become the same normalized facet value
// we must merge them together.
let mut sorter = create_sorter(
SortAlgorithm::Unstable,
merge_btreeset_string,
CompressionType::None,
None,
None,
None,
);
// // As we can't use the same write transaction to read and write in two different databases
// // we must create a temporary sorter that we will write into LMDB afterward.
// // As multiple unnormalized facet values can become the same normalized facet value
// // we must merge them together.
// let mut sorter = create_sorter(
// SortAlgorithm::Unstable,
// merge_btreeset_string,
// CompressionType::None,
// None,
// None,
// None,
// );
// We iterate on the list of original, semi-normalized, facet values
// and normalize them for search, inserting them in LMDB in any given order.
let options = NormalizerOption { lossy: true, ..Default::default() };
let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
for result in database.iter(wtxn)? {
let (facet_group_key, ()) = result?;
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
let mut normalized_facet = left_bound.normalize(&options);
let normalized_truncated_facet: String;
if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
normalized_truncated_facet = normalized_facet
.char_indices()
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
.map(|(_, c)| c)
.collect();
normalized_facet = normalized_truncated_facet.into();
}
let set = BTreeSet::from_iter(std::iter::once(left_bound));
let key = (field_id, normalized_facet.as_ref());
let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
sorter.insert(key, val)?;
}
}
// // We iterate on the list of original, semi-normalized, facet values
// // and normalize them for search, inserting them in LMDB in any given order.
// let options = NormalizerOption { lossy: true, ..Default::default() };
// let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
// for result in database.iter(wtxn)? {
// let (facet_group_key, ()) = result?;
// if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
// let mut normalized_facet = left_bound.normalize(&options);
// let normalized_truncated_facet: String;
// if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
// normalized_truncated_facet = normalized_facet
// .char_indices()
// .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
// .map(|(_, c)| c)
// .collect();
// normalized_facet = normalized_truncated_facet.into();
// }
// let set = BTreeSet::from_iter(std::iter::once(left_bound));
// let key = (field_id, normalized_facet.as_ref());
// let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
// let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
// sorter.insert(key, val)?;
// }
// }
// In this loop we don't need to take care of merging bitmaps
// as the grenad sorter already merged them for us.
let mut merger_iter = sorter.into_stream_merger_iter()?;
while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
wtxn,
key_bytes,
btreeset_bytes,
)?;
}
// // In this loop we don't need to take care of merging bitmaps
// // as the grenad sorter already merged them for us.
// let mut merger_iter = sorter.into_stream_merger_iter()?;
// while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
// self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
// wtxn,
// key_bytes,
// btreeset_bytes,
// )?;
// }
// We compute one FST by string facet
let mut text_fsts = vec![];
let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
let database =
self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
for result in database.iter(wtxn)? {
let ((field_id, normalized_facet), _) = result?;
current_fst = match current_fst.take() {
Some((fid, fst_builder)) if fid != field_id => {
let fst = fst_builder.into_set();
text_fsts.push((fid, fst));
Some((field_id, fst::SetBuilder::memory()))
}
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
None => Some((field_id, fst::SetBuilder::memory())),
};
// // We compute one FST by string facet
// let mut text_fsts = vec![];
// let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
// let database =
// self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
// for result in database.iter(wtxn)? {
// let ((field_id, normalized_facet), _) = result?;
// current_fst = match current_fst.take() {
// Some((fid, fst_builder)) if fid != field_id => {
// let fst = fst_builder.into_set();
// text_fsts.push((fid, fst));
// Some((field_id, fst::SetBuilder::memory()))
// }
// Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
// None => Some((field_id, fst::SetBuilder::memory())),
// };
if let Some((_, fst_builder)) = current_fst.as_mut() {
fst_builder.insert(normalized_facet)?;
}
}
// if let Some((_, fst_builder)) = current_fst.as_mut() {
// fst_builder.insert(normalized_facet)?;
// }
// }
if let Some((field_id, fst_builder)) = current_fst {
let fst = fst_builder.into_set();
text_fsts.push((field_id, fst));
}
// if let Some((field_id, fst_builder)) = current_fst {
// let fst = fst_builder.into_set();
// text_fsts.push((field_id, fst));
// }
// We write those FSTs in LMDB now
for (field_id, fst) in text_fsts {
self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
}
// // We write those FSTs in LMDB now
// for (field_id, fst) in text_fsts {
// self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
// }
Ok(())
}

View File

@@ -22,7 +22,6 @@ use crate::{FieldId, Index, Result};
/// # Panics
///
/// - if reader.is_empty(), this function may panic in some cases
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn enrich_documents_batch<R: Read + Seek>(
rtxn: &heed::RoTxn,
index: &Index,
@@ -78,7 +77,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
},
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
[(field_id, name)] => {
tracing::info!("Primary key was not specified in index. Inferred to '{name}'");
log::info!("Primary key was not specified in index. Inferred to '{name}'");
PrimaryKey::Flat { name, field_id: *field_id }
}
multiple => {
@@ -144,8 +143,6 @@ pub fn enrich_documents_batch<R: Read + Seek>(
/// Retrieve the document id after validating it, returning a `UserError`
/// if the id is invalid or can't be guessed.
#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document)
target = "indexing::documents")]
fn fetch_or_generate_document_id(
document: &obkv::KvReader<FieldId>,
documents_batch_index: &DocumentsBatchIndex,

View File

@@ -21,7 +21,7 @@ pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, R
///
/// Returns the generated internal documents ids and a grenad reader
/// with the list of extracted words from the given chunk of documents.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
#[logging_timer::time]
pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@@ -16,7 +16,7 @@ use crate::Result;
///
/// Returns a grenad reader with the list of extracted facet numbers and
/// documents ids from the given chunk of docid facet number positions.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
#[logging_timer::time]
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
fid_docid_facet_number: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@@ -15,7 +15,7 @@ use crate::{FieldId, Result};
///
/// Returns a grenad reader with the list of extracted facet strings and
/// documents ids from the given chunk of docid facet string positions.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
#[logging_timer::time]
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
docid_fid_facet_string: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@@ -39,7 +39,7 @@ pub struct ExtractedFacetValues {
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
/// and the normalized value as value extracted from the given chunk of documents.
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
#[logging_timer::time]
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
@@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
if let Ok(float) = original.parse() {
output_numbers.push(float);
} else {
tracing::warn!(
log::warn!(
"Internal error, could not parse a geofield that has been validated. Please open an issue."
)
}

View File

@@ -19,7 +19,7 @@ const MAX_COUNTED_WORDS: usize = 30;
///
/// Returns a grenad reader with the list of extracted field id word counts
/// and documents ids from the given chunk of docid word positions.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
#[logging_timer::time]
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@@ -13,7 +13,7 @@ use crate::{FieldId, InternalError, Result};
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
///
/// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude)
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
#[logging_timer::time]
pub fn extract_geo_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
@@ -34,9 +34,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
// since we only need the primary key when we throw an error
// we create this getter to lazily get it when needed
let document_id = || -> Value {
let reader = KvReaderDelAdd::new(obkv.get(primary_key_id).unwrap());
let document_id =
reader.get(DelAdd::Deletion).or(reader.get(DelAdd::Addition)).unwrap();
let document_id = obkv.get(primary_key_id).unwrap();
serde_json::from_slice(document_id).unwrap()
};

View File

@@ -67,7 +67,7 @@ impl VectorStateDelta {
/// Extracts the embedding vector contained in each document under the `_vectors` field.
///
/// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
#[logging_timer::time]
pub fn extract_vector_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
@@ -135,7 +135,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
let del_vectors = extract_vectors(old, document_id, embedder_name)?;
let add_vectors = extract_vectors(new, document_id, embedder_name)?;
if add_vectors.len() > usize::from(u8::MAX) {
if add_vectors.len() > u8::MAX.into() {
return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
document_id().to_string(),
add_vectors.len(),
@@ -164,7 +164,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
(None, Some(new)) => {
// was possibly autogenerated, remove all vectors for that document
let add_vectors = extract_vectors(new, document_id, embedder_name)?;
if add_vectors.len() > usize::from(u8::MAX) {
if add_vectors.len() > u8::MAX.into() {
return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
document_id().to_string(),
add_vectors.len(),
@@ -186,12 +186,12 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default();
let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?;
if old_prompt != new_prompt {
tracing::trace!(
log::trace!(
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
);
VectorStateDelta::NowGenerated(new_prompt)
} else {
tracing::trace!("⏭️ Prompt unmodified, skipping");
log::trace!("⏭️ Prompt unmodified, skipping");
VectorStateDelta::NoChange
}
} else {
@@ -339,7 +339,9 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
indexer: GrenadParameters,
embedder: Arc<Embedder>,
) -> Result<grenad::Reader<BufReader<File>>> {
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
let rt = tokio::runtime::Builder::new_current_thread().enable_io().enable_time().build()?;
let n_chunks = embedder.chunk_count_hint(); // chunk level parellelism
let n_vectors_per_chunk = embedder.prompt_count_in_chunk_hint(); // number of vectors in a single chunk
// docid, state with embedding
@@ -373,8 +375,11 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
current_chunk_ids.push(docid);
if chunks.len() == chunks.capacity() {
let chunked_embeds = embedder
.embed_chunks(std::mem::replace(&mut chunks, Vec::with_capacity(n_chunks)))
let chunked_embeds = rt
.block_on(
embedder
.embed_chunks(std::mem::replace(&mut chunks, Vec::with_capacity(n_chunks))),
)
.map_err(crate::vector::Error::from)
.map_err(crate::Error::from)?;
@@ -391,8 +396,8 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
// send last chunk
if !chunks.is_empty() {
let chunked_embeds = embedder
.embed_chunks(std::mem::take(&mut chunks))
let chunked_embeds = rt
.block_on(embedder.embed_chunks(std::mem::take(&mut chunks)))
.map_err(crate::vector::Error::from)
.map_err(crate::Error::from)?;
for (docid, embeddings) in chunks_ids
@@ -405,15 +410,13 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
}
if !current_chunk.is_empty() {
let embeds = embedder
.embed_chunks(vec![std::mem::take(&mut current_chunk)])
let embeds = rt
.block_on(embedder.embed(std::mem::take(&mut current_chunk)))
.map_err(crate::vector::Error::from)
.map_err(crate::Error::from)?;
if let Some(embeds) = embeds.first() {
for (docid, embeddings) in current_chunk_ids.iter().zip(embeds.iter()) {
state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings.as_inner()))?;
}
for (docid, embeddings) in current_chunk_ids.iter().zip(embeds.iter()) {
state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings.as_inner()))?;
}
}

View File

@@ -1,17 +1,14 @@
use std::collections::{BTreeSet, HashSet};
use std::fs::File;
use std::io::{self, BufReader};
use std::sync::Mutex;
use heed::BytesDecode;
use obkv::KvReaderU16;
use once_cell::sync::Lazy;
use super::helpers::{
create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader,
try_split_array_at, writer_into_reader, GrenadParameters,
};
use super::RawKVWriter;
use crate::error::SerializationError;
use crate::heed_codec::StrBEU16Codec;
use crate::index::db_name::DOCID_WORD_POSITIONS;
@@ -19,11 +16,6 @@ use crate::update::del_add::{is_noop_del_add_obkv, DelAdd, KvReaderDelAdd, KvWri
use crate::update::MergeFn;
use crate::{DocumentId, FieldId, Result};
static WORD_FID_DOCIDS_RAW_KV: Lazy<Mutex<RawKVWriter>> =
Lazy::new(|| Mutex::new(RawKVWriter::new("extract_word_fid_docids").unwrap()));
static WORD_DOCIDS_RAW_KV: Lazy<Mutex<RawKVWriter>> =
Lazy::new(|| Mutex::new(RawKVWriter::new("extract_word_docids").unwrap()));
/// Extracts the word and the documents ids where this word appear.
///
/// Returns a grenad reader with the list of extracted words and
@@ -31,7 +23,7 @@ static WORD_DOCIDS_RAW_KV: Lazy<Mutex<RawKVWriter>> =
///
/// The first returned reader is the one for normal word_docids, and the second one is for
/// exact_word_docids
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
#[logging_timer::time]
pub fn extract_word_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
@@ -117,7 +109,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
tempfile::tempfile()?,
);
let mut word_docids_raw_kv = WORD_DOCIDS_RAW_KV.lock().unwrap();
let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?;
// TODO: replace sorters by writers by accumulating values into a buffer before inserting them.
while let Some((key, value)) = iter.next()? {
@@ -133,14 +124,10 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
if exact_attributes.contains(&fid) {
exact_word_docids_sorter.insert(word.as_bytes(), value)?;
} else {
word_docids_raw_kv.push(word.as_bytes(), value).unwrap();
word_docids_sorter.insert(word.as_bytes(), value)?;
}
}
WORD_FID_DOCIDS_RAW_KV.lock().unwrap().flush().unwrap();
word_docids_raw_kv.flush().unwrap();
Ok((
sorter_into_reader(word_docids_sorter, indexer)?,
sorter_into_reader(exact_word_docids_sorter, indexer)?,
@@ -148,7 +135,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
))
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
fn words_into_sorter(
document_id: DocumentId,
fid: FieldId,
@@ -159,8 +145,6 @@ fn words_into_sorter(
) -> Result<()> {
puffin::profile_function!();
let mut raw_kv_word_fid_docids = WORD_FID_DOCIDS_RAW_KV.lock().unwrap();
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};
@@ -188,9 +172,7 @@ fn words_into_sorter(
key_buffer.extend_from_slice(word_bytes);
key_buffer.push(0);
key_buffer.extend_from_slice(&fid.to_be_bytes());
let value_buffer = value_writer.into_inner().unwrap();
raw_kv_word_fid_docids.push(key_buffer, value_buffer).unwrap();
word_fid_docids_sorter.insert(&key_buffer, value_buffer)?;
word_fid_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
}
Ok(())

View File

@@ -1,31 +1,25 @@
use std::collections::{BTreeMap, VecDeque};
use std::fs::File;
use std::io::BufReader;
use std::sync::Mutex;
use std::{cmp, io};
use obkv::KvReaderU16;
use once_cell::sync::Lazy;
use super::helpers::{
create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
writer_into_reader, GrenadParameters, MergeFn,
};
use super::RawKVWriter;
use crate::error::SerializationError;
use crate::index::db_name::DOCID_WORD_POSITIONS;
use crate::proximity::{index_proximity, MAX_DISTANCE};
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::{DocumentId, Result};
static WORD_PAIR_PROXIMITY_DOCIDS_RAW_KV: Lazy<Mutex<RawKVWriter>> =
Lazy::new(|| Mutex::new(RawKVWriter::new("extract_word_pair_proximity_docids").unwrap()));
/// Extracts the best proximity between pairs of words and the documents ids where this pair appear.
///
/// Returns a grenad reader with the list of extracted word pairs proximities and
/// documents ids from the given chunk of docid word positions.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
#[logging_timer::time]
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
@@ -65,10 +59,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if current_document_id.map_or(false, |id| id != document_id) {
puffin::profile_scope!("Document into sorter");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
let _entered = span.enter();
document_word_positions_into_sorter(
current_document_id.unwrap(),
&del_word_pair_proximity,
@@ -87,7 +77,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) {
for (position, word) in KvReaderU16::new(deletion).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting.
while del_word_positions.front().map_or(false, |(_w, p)| {
while del_word_positions.get(0).map_or(false, |(_w, p)| {
index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
}) {
word_positions_into_word_pair_proximity(
@@ -116,7 +106,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) {
for (position, word) in KvReaderU16::new(addition).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting.
while add_word_positions.front().map_or(false, |(_w, p)| {
while add_word_positions.get(0).map_or(false, |(_w, p)| {
index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
}) {
word_positions_into_word_pair_proximity(
@@ -148,10 +138,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if let Some(document_id) = current_document_id {
puffin::profile_scope!("Final document into sorter");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter");
let _entered = span.enter();
document_word_positions_into_sorter(
document_id,
&del_word_pair_proximity,
@@ -159,15 +145,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
&mut word_pair_proximity_docids_sorters,
)?;
}
WORD_PAIR_PROXIMITY_DOCIDS_RAW_KV.lock().unwrap().flush().unwrap();
{
puffin::profile_scope!("sorter_into_reader");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader");
let _entered = span.enter();
let mut writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
@@ -195,8 +174,6 @@ fn document_word_positions_into_sorter(
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};
let mut word_pair_proximity_docids_raw_kv = WORD_PAIR_PROXIMITY_DOCIDS_RAW_KV.lock().unwrap();
let mut buffer = Vec::new();
let mut key_buffer = Vec::new();
for eob in
@@ -228,9 +205,8 @@ fn document_word_positions_into_sorter(
key_buffer.push(0);
key_buffer.extend_from_slice(w2.as_bytes());
let value_buffer = value_writer.into_inner().unwrap();
word_pair_proximity_docids_raw_kv.push(&key_buffer, value_buffer).unwrap();
word_pair_proximity_docids_sorters[*prox as usize - 1].insert(&key_buffer, value_buffer)?;
word_pair_proximity_docids_sorters[*prox as usize - 1]
.insert(&key_buffer, value_writer.into_inner().unwrap())?;
}
Ok(())

View File

@@ -1,30 +1,24 @@
use std::collections::BTreeSet;
use std::fs::File;
use std::io::{self, BufReader};
use std::sync::Mutex;
use obkv::KvReaderU16;
use once_cell::sync::Lazy;
use super::helpers::{
create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
GrenadParameters,
};
use super::RawKVWriter;
use crate::error::SerializationError;
use crate::index::db_name::DOCID_WORD_POSITIONS;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::MergeFn;
use crate::{bucketed_position, DocumentId, Result};
static WORD_POSITION_DOCIDS_RAW_KV: Lazy<Mutex<RawKVWriter>> =
Lazy::new(|| Mutex::new(RawKVWriter::new("extract_word_position_docids").unwrap()));
/// Extracts the word positions and the documents ids where this word appear.
///
/// Returns a grenad reader with the list of extracted words at positions and
/// documents ids from the given chunk of docid word positions.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
#[logging_timer::time]
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
@@ -94,15 +88,12 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
)?;
}
WORD_POSITION_DOCIDS_RAW_KV.lock().unwrap().flush().unwrap();
// TODO remove noop DelAdd OBKV
let word_position_docids_reader = sorter_into_reader(word_position_docids_sorter, indexer)?;
Ok(word_position_docids_reader)
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
fn words_position_into_sorter(
document_id: DocumentId,
key_buffer: &mut Vec<u8>,
@@ -115,8 +106,6 @@ fn words_position_into_sorter(
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};
let mut word_position_docids_raw_kv = WORD_POSITION_DOCIDS_RAW_KV.lock().unwrap();
let mut buffer = Vec::new();
for eob in merge_join_by(del_word_positions.iter(), add_word_positions.iter(), |d, a| d.cmp(a))
{
@@ -143,10 +132,7 @@ fn words_position_into_sorter(
key_buffer.extend_from_slice(word_bytes);
key_buffer.push(0);
key_buffer.extend_from_slice(&position.to_be_bytes());
let value_buffer = value_writer.into_inner().unwrap();
word_position_docids_raw_kv.push(key_buffer, value_buffer).unwrap();
word_position_docids_sorter.insert(&key_buffer, &value_buffer)?;
word_position_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
}
Ok(())

View File

@@ -11,11 +11,11 @@ mod extract_word_position_docids;
use std::collections::HashSet;
use std::fs::File;
use std::io::{self, BufReader, Write};
use std::io::BufReader;
use crossbeam_channel::Sender;
use log::debug;
use rayon::prelude::*;
use tracing::debug;
use self::extract_docid_word_positions::extract_docid_word_positions;
use self::extract_facet_number_docids::extract_facet_number_docids;
@@ -38,34 +38,9 @@ use crate::proximity::ProximityPrecision;
use crate::vector::EmbeddingConfigs;
use crate::{FieldId, FieldsIdsMap, Result};
pub struct RawKVWriter {
file: io::BufWriter<File>,
}
impl RawKVWriter {
pub fn new(path: &str) -> io::Result<Self> {
Ok(Self { file: File::create(path).map(io::BufWriter::new)? })
}
pub fn push(&mut self, key: &[u8], value: &[u8]) -> io::Result<()> {
let key_len = key.len().to_be_bytes();
let value_len = value.len().to_be_bytes();
self.file.write_all(&key_len)?;
self.file.write_all(key)?;
self.file.write_all(&value_len)?;
self.file.write_all(value)?;
Ok(())
}
pub fn flush(&mut self) -> io::Result<()> {
self.file.flush()
}
}
/// Extract data for each databases from obkv documents in parallel.
/// Send data in grenad file over provided Sender.
#[allow(clippy::too_many_arguments)]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub(crate) fn data_from_obkv_documents(
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
@@ -138,7 +113,7 @@ pub(crate) fn data_from_obkv_documents(
{
let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || {
debug!(database = "facet-id-exists-docids", "merge");
debug!("merge {} database", "facet-id-exists-docids");
match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader)));
@@ -154,7 +129,7 @@ pub(crate) fn data_from_obkv_documents(
{
let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || {
debug!(database = "facet-id-is-null-docids", "merge");
debug!("merge {} database", "facet-id-is-null-docids");
match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader)));
@@ -170,7 +145,7 @@ pub(crate) fn data_from_obkv_documents(
{
let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || {
debug!(database = "facet-id-is-empty-docids", "merge");
debug!("merge {} database", "facet-id-is-empty-docids");
match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
@@ -282,22 +257,13 @@ fn spawn_extraction_task<FE, FS, M>(
M: MergeableReader + FromParallelIterator<M::Output> + Send + 'static,
M::Output: Send,
{
let current_span = tracing::Span::current();
rayon::spawn(move || {
let child_span =
tracing::trace_span!(target: "", parent: &current_span, "extract_multiple_chunks");
let _entered = child_span.enter();
puffin::profile_scope!("extract_multiple_chunksdexing::details, ", name);
puffin::profile_scope!("extract_multiple_chunks", name);
let chunks: Result<M> =
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
let current_span = tracing::Span::current();
rayon::spawn(move || match chunks {
Ok(chunks) => {
let child_span = tracing::trace_span!(target: "", parent: &current_span, "merge_multiple_chunks");
let _entered = child_span.enter();
debug!(database = name, "merge");
debug!("merge {} database", name);
puffin::profile_scope!("merge_multiple_chunks", name);
let reader = chunks.merge(merge_fn, &indexer);
let _ = lmdb_writer_sx.send(reader.map(serialize_fn));

View File

@@ -9,10 +9,6 @@ use super::{ClonableMmap, MergeFn};
use crate::update::index_documents::valid_lmdb_key;
use crate::Result;
/// This is something reasonable given the fact
/// that there is one grenad sorter by thread.
const MAX_GRENAD_SORTER_USAGE: usize = 500 * 1024 * 1024; // 500 MiB
pub type CursorClonableMmap = io::Cursor<ClonableMmap>;
pub fn create_writer<R: io::Write>(
@@ -28,9 +24,6 @@ pub fn create_writer<R: io::Write>(
builder.build(BufWriter::new(file))
}
/// A helper function that creates a grenad sorter
/// with the given parameters. The max memory is
/// clamped to something reasonable.
pub fn create_sorter(
sort_algorithm: grenad::SortAlgorithm,
merge: MergeFn,
@@ -48,7 +41,7 @@ pub fn create_sorter(
builder.max_nb_chunks(nb_chunks);
}
if let Some(memory) = max_memory {
builder.dump_threshold(memory.min(MAX_GRENAD_SORTER_USAGE));
builder.dump_threshold(memory);
builder.allow_realloc(false);
}
builder.sort_algorithm(sort_algorithm);
@@ -56,7 +49,6 @@ pub fn create_sorter(
builder.build()
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
pub fn sorter_into_reader(
sorter: grenad::Sorter<MergeFn>,
indexer: GrenadParameters,
@@ -195,15 +187,10 @@ impl Default for GrenadParameters {
impl GrenadParameters {
/// This function use the number of threads in the current threadpool to compute the value.
///
/// This should be called inside of a rayon thread pool,
/// otherwise, it will take the global number of threads.
///
/// The max memory cannot exceed a given reasonable value.
/// Otherwise, it will take the global number of threads.
pub fn max_memory_by_thread(&self) -> Option<usize> {
self.max_memory.map(|max_memory| {
(max_memory / rayon::current_num_threads()).min(MAX_GRENAD_SORTER_USAGE)
})
self.max_memory.map(|max_memory| max_memory / rayon::current_num_threads())
}
}
@@ -253,7 +240,6 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
/// Write provided sorter in database using serialize_value function.
/// merge_values function is used if an entry already exist in the database.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
pub fn write_sorter_into_database<K, V, FS, FM>(
sorter: Sorter<MergeFn>,
database: &heed::Database<K, V>,

View File

@@ -13,11 +13,11 @@ use std::result::Result as StdResult;
use crossbeam_channel::{Receiver, Sender};
use heed::types::Str;
use heed::Database;
use log::debug;
use rand::SeedableRng;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use slice_group_by::GroupBy;
use tracing::debug_span;
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
use self::enrich::enrich_documents_batch;
@@ -134,7 +134,6 @@ where
/// return an error and not the `IndexDocuments` struct as it is invalid to use it afterward.
///
/// Returns the number of documents added to the builder.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn add_documents<R: Read + Seek>(
mut self,
reader: DocumentsBatchReader<R>,
@@ -180,7 +179,6 @@ where
/// Remove a batch of documents from the current builder.
///
/// Returns the number of documents deleted from the builder.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn remove_documents(
mut self,
to_delete: Vec<String>,
@@ -216,7 +214,6 @@ where
/// - No batching using the standards `remove_documents` and `add_documents` took place
///
/// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::details")]
pub fn remove_documents_from_db_no_batch(
mut self,
to_delete: &RoaringBitmap,
@@ -240,12 +237,7 @@ where
Ok((self, deleted_documents))
}
#[tracing::instrument(
level = "trace"
skip_all,
target = "indexing::documents",
name = "index_documents"
)]
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
puffin::profile_function!();
@@ -281,12 +273,7 @@ where
}
/// Returns the total number of documents in the index after the update.
#[tracing::instrument(
level = "trace",
skip_all,
target = "profile::indexing::details",
name = "index_documents_raw"
)]
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
where
FP: Fn(UpdateIndexingStep) + Sync,
@@ -387,12 +374,8 @@ where
let cloned_embedder = self.embedders.clone();
let current_span = tracing::Span::current();
// Run extraction pipeline in parallel.
pool.install(|| {
let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_and_send_grenad_chunks");
let _enter = child_span.enter();
puffin::profile_scope!("extract_and_send_grenad_chunks");
// split obkv file into several chunks
let original_chunk_iter =
@@ -506,7 +489,10 @@ where
documents_seen: documents_seen_count as usize,
total_documents: documents_count,
});
debug_span!("Seen", documents = documents_seen_count, total = documents_count);
debug!(
"We have seen {} documents on {} total document so far",
documents_seen_count, documents_count
);
}
if is_merged_database {
databases_seen += 1;
@@ -536,8 +522,12 @@ where
pool.install(|| {
let writer_index = (embedder_index as u16) << 8;
for k in 0..=u8::MAX {
let writer =
arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension)?;
let writer = arroy::Writer::prepare(
wtxn,
vector_arroy,
writer_index | (k as u16),
dimension,
)?;
if writer.is_empty(wtxn)? {
break;
}
@@ -557,12 +547,7 @@ where
Ok(number_of_documents)
}
#[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "index_documents_prefix_databases"
)]
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute_prefix_databases(
self,
word_docids: Option<grenad::Reader<CursorClonableMmap>>,
@@ -617,8 +602,6 @@ where
let del_prefix_fst_words;
{
let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs");
let _entered = span.enter();
puffin::profile_scope!("compute_prefix_diffs");
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
@@ -743,12 +726,6 @@ where
/// Run the word prefix docids update operation.
#[allow(clippy::too_many_arguments)]
#[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "index_documents_word_prefix_docids"
)]
fn execute_word_prefix_docids(
txn: &mut heed::RwTxn,
reader: grenad::Reader<Cursor<ClonableMmap>>,

View File

@@ -146,7 +146,7 @@ impl<'a, 'i> Transform<'a, 'i> {
})
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
#[logging_timer::time]
pub fn read_documents<R, FP, FA>(
&mut self,
reader: EnrichedDocumentsBatchReader<R>,
@@ -359,7 +359,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db,
/// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids.
/// - If the document to remove was not present in either the db or the transform we do nothing.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
#[logging_timer::time]
pub fn remove_documents<FA>(
&mut self,
mut to_remove: Vec<String>,
@@ -450,7 +450,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// - No batching using the standards `remove_documents` and `add_documents` took place
///
/// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::details")]
#[logging_timer::time]
pub fn remove_documents_from_db_no_batch<FA>(
&mut self,
to_remove: &RoaringBitmap,
@@ -541,7 +541,6 @@ impl<'a, 'i> Transform<'a, 'i> {
// Flatten a document from the fields ids map contained in self and insert the new
// created fields. Returns `None` if the document doesn't need to be flattened.
#[tracing::instrument(level = "trace", skip(self, obkv), target = "indexing::transform")]
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
if obkv
.iter()
@@ -662,7 +661,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// Generate the `TransformOutput` based on the given sorter that can be generated from any
/// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
/// id for the user side and the value must be an obkv where keys are valid fields ids.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::transform")]
#[logging_timer::time]
pub(crate) fn output_from_sorter<F>(
self,
wtxn: &mut heed::RwTxn,

View File

@@ -115,7 +115,6 @@ impl TypedChunk {
/// Write typed chunk in the corresponding LMDB database of the provided index.
/// Return new documents seen.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
pub(crate) fn write_typed_chunk_into_index(
typed_chunk: TypedChunk,
index: &Index,
@@ -127,8 +126,6 @@ pub(crate) fn write_typed_chunk_into_index(
let mut is_merged_database = false;
match typed_chunk {
TypedChunk::Documents(obkv_documents_iter) => {
let span = tracing::trace_span!(target: "indexing::write_db", "documents");
let _entered = span.enter();
let mut operations: Vec<DocumentOperation> = Default::default();
let mut docids = index.documents_ids(wtxn)?;
@@ -175,9 +172,6 @@ pub(crate) fn write_typed_chunk_into_index(
index.put_documents_ids(wtxn, &docids)?;
}
TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids");
let _entered = span.enter();
append_entries_into_database(
fid_word_count_docids_iter,
&index.field_id_word_count_docids,
@@ -193,8 +187,6 @@ pub(crate) fn write_typed_chunk_into_index(
exact_word_docids_reader,
word_fid_docids_reader,
} => {
let span = tracing::trace_span!(target: "indexing::write_db", "word_docids");
let _entered = span.enter();
let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
append_entries_into_database(
word_docids_iter.clone(),
@@ -238,8 +230,6 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::WordPositionDocids(word_position_docids_iter) => {
let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids");
let _entered = span.enter();
append_entries_into_database(
word_position_docids_iter,
&index.word_position_docids,
@@ -251,25 +241,16 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids");
let _entered = span.enter();
let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter);
indexer.execute(wtxn)?;
is_merged_database = true;
}
TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids");
let _entered = span.enter();
let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter);
indexer.execute(wtxn)?;
is_merged_database = true;
}
TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids");
let _entered = span.enter();
append_entries_into_database(
facet_id_exists_docids,
&index.facet_id_exists_docids,
@@ -281,9 +262,6 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids");
let _entered = span.enter();
append_entries_into_database(
facet_id_is_null_docids,
&index.facet_id_is_null_docids,
@@ -295,8 +273,6 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => {
let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids");
let _entered = span.enter();
append_entries_into_database(
facet_id_is_empty_docids,
&index.facet_id_is_empty_docids,
@@ -308,9 +284,6 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids");
let _entered = span.enter();
append_entries_into_database(
word_pair_proximity_docids_iter,
&index.word_pair_proximity_docids,
@@ -322,9 +295,6 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_numbers");
let _entered = span.enter();
let index_fid_docid_facet_numbers =
index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
let mut cursor = fid_docid_facet_number.into_cursor()?;
@@ -345,9 +315,6 @@ pub(crate) fn write_typed_chunk_into_index(
}
}
TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_strings");
let _entered = span.enter();
let index_fid_docid_facet_strings =
index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
let mut cursor = fid_docid_facet_string.into_cursor()?;
@@ -368,8 +335,6 @@ pub(crate) fn write_typed_chunk_into_index(
}
}
TypedChunk::GeoPoints(geo_points) => {
let span = tracing::trace_span!(target: "indexing::write_db", "geo_points");
let _entered = span.enter();
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?;
@@ -400,8 +365,6 @@ pub(crate) fn write_typed_chunk_into_index(
expected_dimension,
embedder_name,
} => {
let span = tracing::trace_span!(target: "indexing::write_db", "vector_points");
let _entered = span.enter();
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
)?;
@@ -409,7 +372,8 @@ pub(crate) fn write_typed_chunk_into_index(
// FIXME: allow customizing distance
let writers: std::result::Result<Vec<_>, _> = (0..=u8::MAX)
.map(|k| {
arroy::Writer::new(
arroy::Writer::prepare(
wtxn,
index.vector_arroy,
writer_index | (k as u16),
expected_dimension,
@@ -443,7 +407,7 @@ pub(crate) fn write_typed_chunk_into_index(
// code error if we somehow got the wrong dimension
.unwrap();
if embeddings.embedding_count() > usize::from(u8::MAX) {
if embeddings.embedding_count() > u8::MAX.into() {
let external_docid = if let Ok(Some(Ok(index))) = index
.external_id_of(wtxn, std::iter::once(docid))
.map(|it| it.into_iter().next())
@@ -517,11 +481,9 @@ pub(crate) fn write_typed_chunk_into_index(
}
}
tracing::debug!("Finished vector chunk for {}", embedder_name);
log::debug!("Finished vector chunk for {}", embedder_name);
}
TypedChunk::ScriptLanguageDocids(sl_map) => {
let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");
let _entered = span.enter();
for (key, (deletion, addition)) in sl_map {
let mut db_key_exists = false;
let final_value = match index.script_language_docids.get(wtxn, &key)? {
@@ -575,7 +537,6 @@ fn merge_word_docids_reader_into_fst(
/// Write provided entries in database using serialize_value function.
/// merge_values function is used if an entry already exist in the database.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
fn write_entries_into_database<R, K, V, FS, FM>(
data: grenad::Reader<R>,
database: &heed::Database<K, V>,
@@ -622,7 +583,6 @@ where
/// merge_values function is used if an entry already exist in the database.
/// All provided entries must be ordered.
/// If the index is not empty, write_entries_into_database is called instead.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
fn append_entries_into_database<R, K, V, FS, FM>(
data: grenad::Reader<R>,
database: &heed::Database<K, V>,

View File

@@ -372,11 +372,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.embedder_settings = Setting::Reset;
}
#[tracing::instrument(
level = "trace"
skip(self, progress_callback, should_abort, old_fields_ids_map),
target = "indexing::documents"
)]
fn reindex<FP, FA>(
&mut self,
progress_callback: &FP,
@@ -979,9 +974,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
crate::vector::settings::EmbeddingSettings::apply_default_source(
&mut setting,
);
crate::vector::settings::EmbeddingSettings::apply_default_openai_model(
&mut setting,
);
let setting = validate_embedding_settings(setting, &name)?;
changed = true;
new_configs.insert(name, setting);
@@ -1127,14 +1119,6 @@ pub fn validate_embedding_settings(
let Setting::Set(settings) = settings else { return Ok(settings) };
let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
settings;
if let Some(0) = dimensions.set() {
return Err(crate::error::UserError::InvalidSettingsDimensions {
embedder_name: name.to_owned(),
}
.into());
}
let Some(inferred_source) = source.set() else {
return Ok(Setting::Set(EmbeddingSettings {
source,
@@ -1148,34 +1132,14 @@ pub fn validate_embedding_settings(
match inferred_source {
EmbedderSource::OpenAi => {
check_unset(&revision, "revision", inferred_source, name)?;
check_unset(&dimensions, "dimensions", inferred_source, name)?;
if let Setting::Set(model) = &model {
let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str())
.ok_or(crate::error::UserError::InvalidOpenAiModel {
crate::vector::openai::EmbeddingModel::from_name(model.as_str()).ok_or(
crate::error::UserError::InvalidOpenAiModel {
embedder_name: name.to_owned(),
model: model.clone(),
})?;
if let Setting::Set(dimensions) = dimensions {
if !model.supports_overriding_dimensions()
&& dimensions != model.default_dimensions()
{
return Err(crate::error::UserError::InvalidOpenAiModelDimensions {
embedder_name: name.to_owned(),
model: model.name(),
dimensions,
expected_dimensions: model.default_dimensions(),
}
.into());
}
if dimensions > model.default_dimensions() {
return Err(crate::error::UserError::InvalidOpenAiModelDimensionsMax {
embedder_name: name.to_owned(),
model: model.name(),
dimensions,
max_dimensions: model.default_dimensions(),
}
.into());
}
}
},
)?;
}
}
EmbedderSource::HuggingFace => {

View File

@@ -39,12 +39,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
}
}
#[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "word_prefix_docids"
)]
#[logging_timer::time("WordPrefixDocids::{}")]
pub fn execute(
self,
mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>,

View File

@@ -4,7 +4,7 @@ use std::str;
use grenad::CompressionType;
use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Database};
use tracing::debug;
use log::debug;
use crate::error::SerializationError;
use crate::heed_codec::StrBEU16Codec;
@@ -44,12 +44,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
}
}
#[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "words_prefix_integer_docids"
)]
#[logging_timer::time("WordPrefixIntegerDocids::{}")]
pub fn execute(
self,
new_word_integer_docids: grenad::Reader<CursorClonableMmap>,

Some files were not shown because too many files have changed in this diff Show More