mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-15 08:56:56 +00:00
Compare commits
33 Commits
v1.8.0-rc.
...
downgrade-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
036251dad5 | ||
|
|
ba75d23bfe | ||
|
|
7fbb3bf8e8 | ||
|
|
9066a446a3 | ||
|
|
f762307838 | ||
|
|
3e94a90722 | ||
|
|
fc7e817221 | ||
|
|
0f78703b85 | ||
|
|
c668043c4f | ||
|
|
5a305bfdea | ||
|
|
f4dd73ec8c | ||
|
|
66dce4600d | ||
|
|
fe51ceca6d | ||
|
|
88174b8ae4 | ||
|
|
ebca29f3de | ||
|
|
c793b6ef6d | ||
|
|
cbbfff3594 | ||
|
|
dbcf50589b | ||
|
|
3e5cd027a5 | ||
|
|
7468c1cf8d | ||
|
|
d4aeff92d0 | ||
|
|
e87cb373de | ||
|
|
9b76501875 | ||
|
|
6247e95dc3 | ||
|
|
b3173d0423 | ||
|
|
96cc5319c8 | ||
|
|
0c7003c5df | ||
|
|
a1aa999026 | ||
|
|
aa0bbbb246 | ||
|
|
2dd9dd6d0a | ||
|
|
e1f27de51a | ||
|
|
abae31aee0 | ||
|
|
70ce0095ea |
38
Cargo.lock
generated
38
Cargo.lock
generated
@@ -494,7 +494,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -639,7 +639,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "build-info"
|
name = "build-info"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"time",
|
"time",
|
||||||
@@ -889,9 +889,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "charabia"
|
name = "charabia"
|
||||||
version = "0.8.9"
|
version = "0.8.10"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f6a65052f308636e5d5e1777f0dbc07919f5fbac24b6c8ad3e140472e5520de9"
|
checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"cow-utils",
|
"cow-utils",
|
||||||
@@ -1539,7 +1539,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dump"
|
name = "dump"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -1787,7 +1787,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "file-store"
|
name = "file-store"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"faux",
|
"faux",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
@@ -1810,7 +1810,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"nom",
|
"nom",
|
||||||
@@ -1830,7 +1830,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -1948,7 +1948,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fuzzers"
|
name = "fuzzers"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arbitrary",
|
"arbitrary",
|
||||||
"clap",
|
"clap",
|
||||||
@@ -2442,7 +2442,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index-scheduler"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -2638,7 +2638,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -3275,7 +3275,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meili-snap"
|
name = "meili-snap"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"md5",
|
"md5",
|
||||||
@@ -3284,7 +3284,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch"
|
name = "meilisearch"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-http",
|
"actix-http",
|
||||||
@@ -3377,7 +3377,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-auth"
|
name = "meilisearch-auth"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.21.7",
|
"base64 0.21.7",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
@@ -3396,7 +3396,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-types"
|
name = "meilisearch-types"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@@ -3426,7 +3426,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilitool"
|
name = "meilitool"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap",
|
||||||
@@ -3465,7 +3465,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arroy",
|
"arroy",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -3906,7 +3906,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "permissive-json-pointer"
|
name = "permissive-json-pointer"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"big_s",
|
"big_s",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -6074,7 +6074,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xtask"
|
name = "xtask"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"build-info",
|
"build-info",
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ license.workspace = true
|
|||||||
anyhow = "1.0.79"
|
anyhow = "1.0.79"
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
mimalloc = { version = "0.1.39", default-features = false }
|
mimalloc = { version = "0.1.37", default-features = false }
|
||||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|||||||
@@ -256,8 +256,8 @@ pub(crate) mod test {
|
|||||||
|
|
||||||
pub fn create_test_settings() -> Settings<Checked> {
|
pub fn create_test_settings() -> Settings<Checked> {
|
||||||
let settings = Settings {
|
let settings = Settings {
|
||||||
displayed_attributes: Setting::Set(vec![S("race"), S("name")]),
|
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
|
||||||
searchable_attributes: Setting::Set(vec![S("name"), S("race")]),
|
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
|
||||||
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
|
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
|
||||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||||
ranking_rules: Setting::NotSet,
|
ranking_rules: Setting::NotSet,
|
||||||
|
|||||||
@@ -315,8 +315,8 @@ impl From<v5::ResponseError> for v6::ResponseError {
|
|||||||
impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||||
fn from(settings: v5::Settings<T>) -> Self {
|
fn from(settings: v5::Settings<T>) -> Self {
|
||||||
v6::Settings {
|
v6::Settings {
|
||||||
displayed_attributes: settings.displayed_attributes.into(),
|
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
|
||||||
searchable_attributes: settings.searchable_attributes.into(),
|
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
|
||||||
filterable_attributes: settings.filterable_attributes.into(),
|
filterable_attributes: settings.filterable_attributes.into(),
|
||||||
sortable_attributes: settings.sortable_attributes.into(),
|
sortable_attributes: settings.sortable_attributes.into(),
|
||||||
ranking_rules: {
|
ranking_rules: {
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
|||||||
@@ -57,3 +57,5 @@ greek = ["milli/greek"]
|
|||||||
khmer = ["milli/khmer"]
|
khmer = ["milli/khmer"]
|
||||||
# allow vietnamese specialized tokenization
|
# allow vietnamese specialized tokenization
|
||||||
vietnamese = ["milli/vietnamese"]
|
vietnamese = ["milli/vietnamese"]
|
||||||
|
# force swedish character recomposition
|
||||||
|
swedish-recomposition = ["milli/swedish-recomposition"]
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ use std::convert::Infallible;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
use std::ops::ControlFlow;
|
use std::ops::{ControlFlow, Deref};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||||
@@ -143,21 +143,13 @@ impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRa
|
|||||||
)]
|
)]
|
||||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
pub struct Settings<T> {
|
pub struct Settings<T> {
|
||||||
#[serde(
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
default,
|
|
||||||
serialize_with = "serialize_with_wildcard",
|
|
||||||
skip_serializing_if = "Setting::is_not_set"
|
|
||||||
)]
|
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsDisplayedAttributes>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSettingsDisplayedAttributes>)]
|
||||||
pub displayed_attributes: Setting<Vec<String>>,
|
pub displayed_attributes: WildcardSetting,
|
||||||
|
|
||||||
#[serde(
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
default,
|
|
||||||
serialize_with = "serialize_with_wildcard",
|
|
||||||
skip_serializing_if = "Setting::is_not_set"
|
|
||||||
)]
|
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchableAttributes>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchableAttributes>)]
|
||||||
pub searchable_attributes: Setting<Vec<String>>,
|
pub searchable_attributes: WildcardSetting,
|
||||||
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
|
||||||
@@ -251,8 +243,8 @@ impl<T> Settings<T> {
|
|||||||
impl Settings<Checked> {
|
impl Settings<Checked> {
|
||||||
pub fn cleared() -> Settings<Checked> {
|
pub fn cleared() -> Settings<Checked> {
|
||||||
Settings {
|
Settings {
|
||||||
displayed_attributes: Setting::Reset,
|
displayed_attributes: Setting::Reset.into(),
|
||||||
searchable_attributes: Setting::Reset,
|
searchable_attributes: Setting::Reset.into(),
|
||||||
filterable_attributes: Setting::Reset,
|
filterable_attributes: Setting::Reset,
|
||||||
sortable_attributes: Setting::Reset,
|
sortable_attributes: Setting::Reset,
|
||||||
ranking_rules: Setting::Reset,
|
ranking_rules: Setting::Reset,
|
||||||
@@ -319,7 +311,7 @@ impl Settings<Checked> {
|
|||||||
|
|
||||||
impl Settings<Unchecked> {
|
impl Settings<Unchecked> {
|
||||||
pub fn check(self) -> Settings<Checked> {
|
pub fn check(self) -> Settings<Checked> {
|
||||||
let displayed_attributes = match self.displayed_attributes {
|
let displayed_attributes = match self.displayed_attributes.0 {
|
||||||
Setting::Set(fields) => {
|
Setting::Set(fields) => {
|
||||||
if fields.iter().any(|f| f == "*") {
|
if fields.iter().any(|f| f == "*") {
|
||||||
Setting::Reset
|
Setting::Reset
|
||||||
@@ -330,7 +322,7 @@ impl Settings<Unchecked> {
|
|||||||
otherwise => otherwise,
|
otherwise => otherwise,
|
||||||
};
|
};
|
||||||
|
|
||||||
let searchable_attributes = match self.searchable_attributes {
|
let searchable_attributes = match self.searchable_attributes.0 {
|
||||||
Setting::Set(fields) => {
|
Setting::Set(fields) => {
|
||||||
if fields.iter().any(|f| f == "*") {
|
if fields.iter().any(|f| f == "*") {
|
||||||
Setting::Reset
|
Setting::Reset
|
||||||
@@ -342,8 +334,8 @@ impl Settings<Unchecked> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
Settings {
|
Settings {
|
||||||
displayed_attributes,
|
displayed_attributes: displayed_attributes.into(),
|
||||||
searchable_attributes,
|
searchable_attributes: searchable_attributes.into(),
|
||||||
filterable_attributes: self.filterable_attributes,
|
filterable_attributes: self.filterable_attributes,
|
||||||
sortable_attributes: self.sortable_attributes,
|
sortable_attributes: self.sortable_attributes,
|
||||||
ranking_rules: self.ranking_rules,
|
ranking_rules: self.ranking_rules,
|
||||||
@@ -412,13 +404,13 @@ pub fn apply_settings_to_builder(
|
|||||||
_kind,
|
_kind,
|
||||||
} = settings;
|
} = settings;
|
||||||
|
|
||||||
match searchable_attributes {
|
match searchable_attributes.deref() {
|
||||||
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
||||||
Setting::Reset => builder.reset_searchable_fields(),
|
Setting::Reset => builder.reset_searchable_fields(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match displayed_attributes {
|
match displayed_attributes.deref() {
|
||||||
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
||||||
Setting::Reset => builder.reset_displayed_fields(),
|
Setting::Reset => builder.reset_displayed_fields(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
@@ -690,11 +682,13 @@ pub fn settings(
|
|||||||
displayed_attributes: match displayed_attributes {
|
displayed_attributes: match displayed_attributes {
|
||||||
Some(attrs) => Setting::Set(attrs),
|
Some(attrs) => Setting::Set(attrs),
|
||||||
None => Setting::Reset,
|
None => Setting::Reset,
|
||||||
},
|
}
|
||||||
|
.into(),
|
||||||
searchable_attributes: match searchable_attributes {
|
searchable_attributes: match searchable_attributes {
|
||||||
Some(attrs) => Setting::Set(attrs),
|
Some(attrs) => Setting::Set(attrs),
|
||||||
None => Setting::Reset,
|
None => Setting::Reset,
|
||||||
},
|
}
|
||||||
|
.into(),
|
||||||
filterable_attributes: Setting::Set(filterable_attributes),
|
filterable_attributes: Setting::Set(filterable_attributes),
|
||||||
sortable_attributes: Setting::Set(sortable_attributes),
|
sortable_attributes: Setting::Set(sortable_attributes),
|
||||||
ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
|
ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
|
||||||
@@ -848,6 +842,41 @@ impl From<ProximityPrecisionView> for ProximityPrecision {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)]
|
||||||
|
pub struct WildcardSetting(Setting<Vec<String>>);
|
||||||
|
|
||||||
|
impl From<Setting<Vec<String>>> for WildcardSetting {
|
||||||
|
fn from(setting: Setting<Vec<String>>) -> Self {
|
||||||
|
Self(setting)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for WildcardSetting {
|
||||||
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: Serializer,
|
||||||
|
{
|
||||||
|
serialize_with_wildcard(&self.0, serializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E: deserr::DeserializeError> Deserr<E> for WildcardSetting {
|
||||||
|
fn deserialize_from_value<V: deserr::IntoValue>(
|
||||||
|
value: deserr::Value<V>,
|
||||||
|
location: ValuePointerRef<'_>,
|
||||||
|
) -> Result<Self, E> {
|
||||||
|
Ok(Self(Setting::deserialize_from_value(value, location)?))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::Deref for WildcardSetting {
|
||||||
|
type Target = Setting<Vec<String>>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod test {
|
pub(crate) mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -856,8 +885,8 @@ pub(crate) mod test {
|
|||||||
fn test_setting_check() {
|
fn test_setting_check() {
|
||||||
// test no changes
|
// test no changes
|
||||||
let settings = Settings {
|
let settings = Settings {
|
||||||
displayed_attributes: Setting::Set(vec![String::from("hello")]),
|
displayed_attributes: Setting::Set(vec![String::from("hello")]).into(),
|
||||||
searchable_attributes: Setting::Set(vec![String::from("hello")]),
|
searchable_attributes: Setting::Set(vec![String::from("hello")]).into(),
|
||||||
filterable_attributes: Setting::NotSet,
|
filterable_attributes: Setting::NotSet,
|
||||||
sortable_attributes: Setting::NotSet,
|
sortable_attributes: Setting::NotSet,
|
||||||
ranking_rules: Setting::NotSet,
|
ranking_rules: Setting::NotSet,
|
||||||
@@ -883,8 +912,9 @@ pub(crate) mod test {
|
|||||||
// test wildcard
|
// test wildcard
|
||||||
// test no changes
|
// test no changes
|
||||||
let settings = Settings {
|
let settings = Settings {
|
||||||
displayed_attributes: Setting::Set(vec![String::from("*")]),
|
displayed_attributes: Setting::Set(vec![String::from("*")]).into(),
|
||||||
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
|
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")])
|
||||||
|
.into(),
|
||||||
filterable_attributes: Setting::NotSet,
|
filterable_attributes: Setting::NotSet,
|
||||||
sortable_attributes: Setting::NotSet,
|
sortable_attributes: Setting::NotSet,
|
||||||
ranking_rules: Setting::NotSet,
|
ranking_rules: Setting::NotSet,
|
||||||
@@ -904,7 +934,7 @@ pub(crate) mod test {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let checked = settings.check();
|
let checked = settings.check();
|
||||||
assert_eq!(checked.displayed_attributes, Setting::Reset);
|
assert_eq!(checked.displayed_attributes, Setting::Reset.into());
|
||||||
assert_eq!(checked.searchable_attributes, Setting::Reset);
|
assert_eq!(checked.searchable_attributes, Setting::Reset.into());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ jsonwebtoken = "9.2.0"
|
|||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
mimalloc = { version = "0.1.39", default-features = false }
|
mimalloc = { version = "0.1.37", default-features = false }
|
||||||
mime = "0.3.17"
|
mime = "0.3.17"
|
||||||
num_cpus = "1.16.0"
|
num_cpus = "1.16.0"
|
||||||
obkv = "0.2.1"
|
obkv = "0.2.1"
|
||||||
@@ -156,6 +156,7 @@ thai = ["meilisearch-types/thai"]
|
|||||||
greek = ["meilisearch-types/greek"]
|
greek = ["meilisearch-types/greek"]
|
||||||
khmer = ["meilisearch-types/khmer"]
|
khmer = ["meilisearch-types/khmer"]
|
||||||
vietnamese = ["meilisearch-types/vietnamese"]
|
vietnamese = ["meilisearch-types/vietnamese"]
|
||||||
|
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
|
||||||
|
|
||||||
[package.metadata.mini-dashboard]
|
[package.metadata.mini-dashboard]
|
||||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
|
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ use serde_json::Value;
|
|||||||
|
|
||||||
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
|
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
|
||||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||||
use crate::routes::tasks::TasksFilterQuery;
|
|
||||||
use crate::Opt;
|
use crate::Opt;
|
||||||
|
|
||||||
pub struct MockAnalytics {
|
pub struct MockAnalytics {
|
||||||
@@ -86,6 +85,4 @@ impl Analytics for MockAnalytics {
|
|||||||
}
|
}
|
||||||
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||||
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||||
fn get_tasks(&self, _query: &TasksFilterQuery, _request: &HttpRequest) {}
|
|
||||||
fn health_seen(&self, _request: &HttpRequest) {}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ use platform_dirs::AppDirs;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||||
use crate::routes::tasks::TasksFilterQuery;
|
|
||||||
|
|
||||||
// if the analytics feature is disabled
|
// if the analytics feature is disabled
|
||||||
// the `SegmentAnalytics` point to the mock instead of the real analytics
|
// the `SegmentAnalytics` point to the mock instead of the real analytics
|
||||||
@@ -117,10 +116,4 @@ pub trait Analytics: Sync + Send {
|
|||||||
index_creation: bool,
|
index_creation: bool,
|
||||||
request: &HttpRequest,
|
request: &HttpRequest,
|
||||||
);
|
);
|
||||||
|
|
||||||
// this method should be called to aggregate the get tasks requests.
|
|
||||||
fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest);
|
|
||||||
|
|
||||||
// this method should be called to aggregate a add documents request
|
|
||||||
fn health_seen(&self, request: &HttpRequest);
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,7 +33,6 @@ use crate::option::{
|
|||||||
};
|
};
|
||||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||||
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
||||||
use crate::routes::tasks::TasksFilterQuery;
|
|
||||||
use crate::routes::{create_all_stats, Stats};
|
use crate::routes::{create_all_stats, Stats};
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
||||||
@@ -81,8 +80,6 @@ pub enum AnalyticsMsg {
|
|||||||
AggregateUpdateDocuments(DocumentsAggregator),
|
AggregateUpdateDocuments(DocumentsAggregator),
|
||||||
AggregateGetFetchDocuments(DocumentsFetchAggregator),
|
AggregateGetFetchDocuments(DocumentsFetchAggregator),
|
||||||
AggregatePostFetchDocuments(DocumentsFetchAggregator),
|
AggregatePostFetchDocuments(DocumentsFetchAggregator),
|
||||||
AggregateTasks(TasksAggregator),
|
|
||||||
AggregateHealth(HealthAggregator),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct SegmentAnalytics {
|
pub struct SegmentAnalytics {
|
||||||
@@ -152,8 +149,6 @@ impl SegmentAnalytics {
|
|||||||
update_documents_aggregator: DocumentsAggregator::default(),
|
update_documents_aggregator: DocumentsAggregator::default(),
|
||||||
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
||||||
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
||||||
get_tasks_aggregator: TasksAggregator::default(),
|
|
||||||
health_aggregator: HealthAggregator::default(),
|
|
||||||
});
|
});
|
||||||
tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
|
tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
|
||||||
|
|
||||||
@@ -231,16 +226,6 @@ impl super::Analytics for SegmentAnalytics {
|
|||||||
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
|
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
|
||||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest) {
|
|
||||||
let aggregate = TasksAggregator::from_query(query, request);
|
|
||||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate));
|
|
||||||
}
|
|
||||||
|
|
||||||
fn health_seen(&self, request: &HttpRequest) {
|
|
||||||
let aggregate = HealthAggregator::from_query(request);
|
|
||||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateHealth(aggregate));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This structure represent the `infos` field we send in the analytics.
|
/// This structure represent the `infos` field we send in the analytics.
|
||||||
@@ -394,8 +379,6 @@ pub struct Segment {
|
|||||||
update_documents_aggregator: DocumentsAggregator,
|
update_documents_aggregator: DocumentsAggregator,
|
||||||
get_fetch_documents_aggregator: DocumentsFetchAggregator,
|
get_fetch_documents_aggregator: DocumentsFetchAggregator,
|
||||||
post_fetch_documents_aggregator: DocumentsFetchAggregator,
|
post_fetch_documents_aggregator: DocumentsFetchAggregator,
|
||||||
get_tasks_aggregator: TasksAggregator,
|
|
||||||
health_aggregator: HealthAggregator,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Segment {
|
impl Segment {
|
||||||
@@ -458,8 +441,6 @@ impl Segment {
|
|||||||
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg),
|
|
||||||
Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg),
|
|
||||||
None => (),
|
None => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -513,8 +494,6 @@ impl Segment {
|
|||||||
update_documents_aggregator,
|
update_documents_aggregator,
|
||||||
get_fetch_documents_aggregator,
|
get_fetch_documents_aggregator,
|
||||||
post_fetch_documents_aggregator,
|
post_fetch_documents_aggregator,
|
||||||
get_tasks_aggregator,
|
|
||||||
health_aggregator,
|
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
if let Some(get_search) =
|
if let Some(get_search) =
|
||||||
@@ -562,12 +541,6 @@ impl Segment {
|
|||||||
{
|
{
|
||||||
let _ = self.batcher.push(post_fetch_documents).await;
|
let _ = self.batcher.push(post_fetch_documents).await;
|
||||||
}
|
}
|
||||||
if let Some(get_tasks) = take(get_tasks_aggregator).into_event(user, "Tasks Seen") {
|
|
||||||
let _ = self.batcher.push(get_tasks).await;
|
|
||||||
}
|
|
||||||
if let Some(health) = take(health_aggregator).into_event(user, "Health Seen") {
|
|
||||||
let _ = self.batcher.push(health).await;
|
|
||||||
}
|
|
||||||
let _ = self.batcher.flush().await;
|
let _ = self.batcher.flush().await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1503,176 +1476,6 @@ impl DocumentsDeletionAggregator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Serialize)]
|
|
||||||
pub struct TasksAggregator {
|
|
||||||
#[serde(skip)]
|
|
||||||
timestamp: Option<OffsetDateTime>,
|
|
||||||
|
|
||||||
// context
|
|
||||||
#[serde(rename = "user-agent")]
|
|
||||||
user_agents: HashSet<String>,
|
|
||||||
|
|
||||||
filtered_by_uid: bool,
|
|
||||||
filtered_by_index_uid: bool,
|
|
||||||
filtered_by_type: bool,
|
|
||||||
filtered_by_status: bool,
|
|
||||||
filtered_by_canceled_by: bool,
|
|
||||||
filtered_by_before_enqueued_at: bool,
|
|
||||||
filtered_by_after_enqueued_at: bool,
|
|
||||||
filtered_by_before_started_at: bool,
|
|
||||||
filtered_by_after_started_at: bool,
|
|
||||||
filtered_by_before_finished_at: bool,
|
|
||||||
filtered_by_after_finished_at: bool,
|
|
||||||
total_received: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TasksAggregator {
|
|
||||||
pub fn from_query(query: &TasksFilterQuery, request: &HttpRequest) -> Self {
|
|
||||||
let TasksFilterQuery {
|
|
||||||
limit: _,
|
|
||||||
from: _,
|
|
||||||
uids,
|
|
||||||
index_uids,
|
|
||||||
types,
|
|
||||||
statuses,
|
|
||||||
canceled_by,
|
|
||||||
before_enqueued_at,
|
|
||||||
after_enqueued_at,
|
|
||||||
before_started_at,
|
|
||||||
after_started_at,
|
|
||||||
before_finished_at,
|
|
||||||
after_finished_at,
|
|
||||||
} = query;
|
|
||||||
|
|
||||||
Self {
|
|
||||||
timestamp: Some(OffsetDateTime::now_utc()),
|
|
||||||
user_agents: extract_user_agents(request).into_iter().collect(),
|
|
||||||
filtered_by_uid: uids.is_some(),
|
|
||||||
filtered_by_index_uid: index_uids.is_some(),
|
|
||||||
filtered_by_type: types.is_some(),
|
|
||||||
filtered_by_status: statuses.is_some(),
|
|
||||||
filtered_by_canceled_by: canceled_by.is_some(),
|
|
||||||
filtered_by_before_enqueued_at: before_enqueued_at.is_some(),
|
|
||||||
filtered_by_after_enqueued_at: after_enqueued_at.is_some(),
|
|
||||||
filtered_by_before_started_at: before_started_at.is_some(),
|
|
||||||
filtered_by_after_started_at: after_started_at.is_some(),
|
|
||||||
filtered_by_before_finished_at: before_finished_at.is_some(),
|
|
||||||
filtered_by_after_finished_at: after_finished_at.is_some(),
|
|
||||||
total_received: 1,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Aggregate one [TasksAggregator] into another.
|
|
||||||
pub fn aggregate(&mut self, other: Self) {
|
|
||||||
let Self {
|
|
||||||
timestamp,
|
|
||||||
user_agents,
|
|
||||||
total_received,
|
|
||||||
filtered_by_uid,
|
|
||||||
filtered_by_index_uid,
|
|
||||||
filtered_by_type,
|
|
||||||
filtered_by_status,
|
|
||||||
filtered_by_canceled_by,
|
|
||||||
filtered_by_before_enqueued_at,
|
|
||||||
filtered_by_after_enqueued_at,
|
|
||||||
filtered_by_before_started_at,
|
|
||||||
filtered_by_after_started_at,
|
|
||||||
filtered_by_before_finished_at,
|
|
||||||
filtered_by_after_finished_at,
|
|
||||||
} = other;
|
|
||||||
|
|
||||||
if self.timestamp.is_none() {
|
|
||||||
self.timestamp = timestamp;
|
|
||||||
}
|
|
||||||
|
|
||||||
// we can't create a union because there is no `into_union` method
|
|
||||||
for user_agent in user_agents {
|
|
||||||
self.user_agents.insert(user_agent);
|
|
||||||
}
|
|
||||||
|
|
||||||
self.filtered_by_uid |= filtered_by_uid;
|
|
||||||
self.filtered_by_index_uid |= filtered_by_index_uid;
|
|
||||||
self.filtered_by_type |= filtered_by_type;
|
|
||||||
self.filtered_by_status |= filtered_by_status;
|
|
||||||
self.filtered_by_canceled_by |= filtered_by_canceled_by;
|
|
||||||
self.filtered_by_before_enqueued_at |= filtered_by_before_enqueued_at;
|
|
||||||
self.filtered_by_after_enqueued_at |= filtered_by_after_enqueued_at;
|
|
||||||
self.filtered_by_before_started_at |= filtered_by_before_started_at;
|
|
||||||
self.filtered_by_after_started_at |= filtered_by_after_started_at;
|
|
||||||
self.filtered_by_before_finished_at |= filtered_by_before_finished_at;
|
|
||||||
self.filtered_by_after_finished_at |= filtered_by_after_finished_at;
|
|
||||||
self.filtered_by_after_finished_at |= filtered_by_after_finished_at;
|
|
||||||
|
|
||||||
self.total_received = self.total_received.saturating_add(total_received);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
|
||||||
// if we had no timestamp it means we never encountered any events and
|
|
||||||
// thus we don't need to send this event.
|
|
||||||
let timestamp = self.timestamp?;
|
|
||||||
|
|
||||||
Some(Track {
|
|
||||||
timestamp: Some(timestamp),
|
|
||||||
user: user.clone(),
|
|
||||||
event: event_name.to_string(),
|
|
||||||
properties: serde_json::to_value(self).ok()?,
|
|
||||||
..Default::default()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default, Serialize)]
|
|
||||||
pub struct HealthAggregator {
|
|
||||||
#[serde(skip)]
|
|
||||||
timestamp: Option<OffsetDateTime>,
|
|
||||||
|
|
||||||
// context
|
|
||||||
#[serde(rename = "user-agent")]
|
|
||||||
user_agents: HashSet<String>,
|
|
||||||
|
|
||||||
#[serde(rename = "requests.total_received")]
|
|
||||||
total_received: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl HealthAggregator {
|
|
||||||
pub fn from_query(request: &HttpRequest) -> Self {
|
|
||||||
Self {
|
|
||||||
timestamp: Some(OffsetDateTime::now_utc()),
|
|
||||||
user_agents: extract_user_agents(request).into_iter().collect(),
|
|
||||||
total_received: 1,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Aggregate one [HealthAggregator] into another.
|
|
||||||
pub fn aggregate(&mut self, other: Self) {
|
|
||||||
let Self { timestamp, user_agents, total_received } = other;
|
|
||||||
|
|
||||||
if self.timestamp.is_none() {
|
|
||||||
self.timestamp = timestamp;
|
|
||||||
}
|
|
||||||
|
|
||||||
// we can't create a union because there is no `into_union` method
|
|
||||||
for user_agent in user_agents {
|
|
||||||
self.user_agents.insert(user_agent);
|
|
||||||
}
|
|
||||||
self.total_received = self.total_received.saturating_add(total_received);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
|
||||||
// if we had no timestamp it means we never encountered any events and
|
|
||||||
// thus we don't need to send this event.
|
|
||||||
let timestamp = self.timestamp?;
|
|
||||||
|
|
||||||
Some(Track {
|
|
||||||
timestamp: Some(timestamp),
|
|
||||||
user: user.clone(),
|
|
||||||
event: event_name.to_string(),
|
|
||||||
properties: serde_json::to_value(self).ok()?,
|
|
||||||
..Default::default()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default, Serialize)]
|
#[derive(Default, Serialize)]
|
||||||
pub struct DocumentsFetchAggregator {
|
pub struct DocumentsFetchAggregator {
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ use byte_unit::{Byte, ByteError};
|
|||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use meilisearch_types::features::InstanceTogglableFeatures;
|
use meilisearch_types::features::InstanceTogglableFeatures;
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
|
use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
|
||||||
use rustls::server::{
|
use rustls::server::{
|
||||||
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
|
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
|
||||||
};
|
};
|
||||||
@@ -666,7 +667,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
|||||||
type Error = anyhow::Error;
|
type Error = anyhow::Error;
|
||||||
|
|
||||||
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
|
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
|
||||||
let thread_pool = rayon::ThreadPoolBuilder::new()
|
let thread_pool = ThreadPoolNoAbortBuilder::new()
|
||||||
.thread_name(|index| format!("indexing-thread:{index}"))
|
.thread_name(|index| format!("indexing-thread:{index}"))
|
||||||
.num_threads(*other.max_indexing_threads)
|
.num_threads(*other.max_indexing_threads)
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|||||||
@@ -269,12 +269,8 @@ impl From<index_scheduler::IndexStats> for IndexStats {
|
|||||||
pub async fn get_index_stats(
|
pub async fn get_index_stats(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
req: HttpRequest,
|
|
||||||
analytics: web::Data<dyn Analytics>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": true }), Some(&req));
|
|
||||||
|
|
||||||
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
|
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
|
||||||
|
|
||||||
debug!(returns = ?stats, "Get index stats");
|
debug!(returns = ?stats, "Get index stats");
|
||||||
|
|||||||
@@ -137,10 +137,8 @@ macro_rules! make_setting_route {
|
|||||||
let settings = settings(&index, &rtxn, meilisearch_types::settings::SecretPolicy::HideSecrets)?;
|
let settings = settings(&index, &rtxn, meilisearch_types::settings::SecretPolicy::HideSecrets)?;
|
||||||
|
|
||||||
debug!(returns = ?settings, "Update settings");
|
debug!(returns = ?settings, "Update settings");
|
||||||
let mut json = serde_json::json!(&settings);
|
|
||||||
let val = json[$camelcase_attr].take();
|
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().json(val))
|
Ok(HttpResponse::Ok().json(settings.$attr))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resources() -> Resource {
|
pub fn resources() -> Resource {
|
||||||
|
|||||||
@@ -8,11 +8,9 @@ use meilisearch_types::error::{Code, ResponseError};
|
|||||||
use meilisearch_types::settings::{Settings, Unchecked};
|
use meilisearch_types::settings::{Settings, Unchecked};
|
||||||
use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
|
use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::json;
|
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::Analytics;
|
|
||||||
use crate::extractors::authentication::policies::*;
|
use crate::extractors::authentication::policies::*;
|
||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
@@ -296,10 +294,7 @@ pub struct Stats {
|
|||||||
async fn get_stats(
|
async fn get_stats(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||||
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<AuthController>>,
|
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<AuthController>>,
|
||||||
req: HttpRequest,
|
|
||||||
analytics: web::Data<dyn Analytics>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req));
|
|
||||||
let filters = index_scheduler.filters();
|
let filters = index_scheduler.filters();
|
||||||
|
|
||||||
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
|
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
|
||||||
@@ -355,11 +350,7 @@ struct VersionResponse {
|
|||||||
|
|
||||||
async fn get_version(
|
async fn get_version(
|
||||||
_index_scheduler: GuardedData<ActionPolicy<{ actions::VERSION }>, Data<IndexScheduler>>,
|
_index_scheduler: GuardedData<ActionPolicy<{ actions::VERSION }>, Data<IndexScheduler>>,
|
||||||
req: HttpRequest,
|
|
||||||
analytics: web::Data<dyn Analytics>,
|
|
||||||
) -> HttpResponse {
|
) -> HttpResponse {
|
||||||
analytics.publish("Version Seen".to_string(), json!(null), Some(&req));
|
|
||||||
|
|
||||||
let build_info = build_info::BuildInfo::from_build();
|
let build_info = build_info::BuildInfo::from_build();
|
||||||
|
|
||||||
HttpResponse::Ok().json(VersionResponse {
|
HttpResponse::Ok().json(VersionResponse {
|
||||||
@@ -376,21 +367,11 @@ async fn get_version(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct KeysResponse {
|
|
||||||
private: Option<String>,
|
|
||||||
public: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_health(
|
pub async fn get_health(
|
||||||
req: HttpRequest,
|
|
||||||
index_scheduler: Data<IndexScheduler>,
|
index_scheduler: Data<IndexScheduler>,
|
||||||
auth_controller: Data<AuthController>,
|
auth_controller: Data<AuthController>,
|
||||||
search_queue: Data<SearchQueue>,
|
search_queue: Data<SearchQueue>,
|
||||||
analytics: web::Data<dyn Analytics>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
analytics.health_seen(&req);
|
|
||||||
|
|
||||||
search_queue.health().unwrap();
|
search_queue.health().unwrap();
|
||||||
index_scheduler.health().unwrap();
|
index_scheduler.health().unwrap();
|
||||||
auth_controller.health().unwrap();
|
auth_controller.health().unwrap();
|
||||||
|
|||||||
@@ -270,12 +270,8 @@ pub struct AllTasks {
|
|||||||
async fn get_tasks(
|
async fn get_tasks(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
|
||||||
params: AwebQueryParameter<TasksFilterQuery, DeserrQueryParamError>,
|
params: AwebQueryParameter<TasksFilterQuery, DeserrQueryParamError>,
|
||||||
req: HttpRequest,
|
|
||||||
analytics: web::Data<dyn Analytics>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let mut params = params.into_inner();
|
let mut params = params.into_inner();
|
||||||
analytics.get_tasks(¶ms, &req);
|
|
||||||
|
|
||||||
// We +1 just to know if there is more after this "page" or not.
|
// We +1 just to know if there is more after this "page" or not.
|
||||||
params.limit.0 = params.limit.0.saturating_add(1);
|
params.limit.0 = params.limit.0.saturating_add(1);
|
||||||
let limit = params.limit.0;
|
let limit = params.limit.0;
|
||||||
@@ -298,8 +294,6 @@ async fn get_tasks(
|
|||||||
async fn get_task(
|
async fn get_task(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
|
||||||
task_uid: web::Path<String>,
|
task_uid: web::Path<String>,
|
||||||
req: HttpRequest,
|
|
||||||
analytics: web::Data<dyn Analytics>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let task_uid_string = task_uid.into_inner();
|
let task_uid_string = task_uid.into_inner();
|
||||||
|
|
||||||
@@ -310,8 +304,6 @@ async fn get_task(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
analytics.publish("Tasks Seen".to_string(), json!({ "per_task_uid": true }), Some(&req));
|
|
||||||
|
|
||||||
let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
|
let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
|
||||||
let filters = index_scheduler.filters();
|
let filters = index_scheduler.filters();
|
||||||
let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?;
|
let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?;
|
||||||
|
|||||||
@@ -117,3 +117,69 @@ async fn geo_bounding_box_with_string_and_number() {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn bug_4640() {
|
||||||
|
// https://github.com/meilisearch/meilisearch/issues/4640
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||||
|
let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||||
|
index.wait_task(ret.uid()).await;
|
||||||
|
|
||||||
|
// Sort the document with the second one first
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({
|
||||||
|
"sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
|
||||||
|
}),
|
||||||
|
|response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"name": "La Bella Italia",
|
||||||
|
"address": "456 Elm Street, Townsville",
|
||||||
|
"type": "Italian",
|
||||||
|
"rating": 9,
|
||||||
|
"_geo": {
|
||||||
|
"lat": "45.4777599",
|
||||||
|
"lng": "9.1967508"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"name": "Taco Truck",
|
||||||
|
"address": "444 Salsa Street, Burritoville",
|
||||||
|
"type": "Mexican",
|
||||||
|
"rating": 9,
|
||||||
|
"_geo": {
|
||||||
|
"lat": 34.0522,
|
||||||
|
"lng": -118.2437
|
||||||
|
},
|
||||||
|
"_geoDistance": 9714063
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"name": "Crêpe Truck",
|
||||||
|
"address": "2 Billig Avenue, Rouenville",
|
||||||
|
"type": "French",
|
||||||
|
"rating": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[time]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 3
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ bincode = "1.3.3"
|
|||||||
bstr = "1.9.0"
|
bstr = "1.9.0"
|
||||||
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
|
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
|
||||||
byteorder = "1.5.0"
|
byteorder = "1.5.0"
|
||||||
charabia = { version = "0.8.9", default-features = false }
|
charabia = { version = "0.8.10", default-features = false }
|
||||||
concat-arrays = "0.1.2"
|
concat-arrays = "0.1.2"
|
||||||
crossbeam-channel = "0.5.11"
|
crossbeam-channel = "0.5.11"
|
||||||
deserr = "0.6.1"
|
deserr = "0.6.1"
|
||||||
@@ -89,7 +89,7 @@ ureq = { version = "2.9.6", features = ["json"] }
|
|||||||
url = "2.5.0"
|
url = "2.5.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
mimalloc = { version = "0.1.39", default-features = false }
|
mimalloc = { version = "0.1.37", default-features = false }
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
insta = "1.34.0"
|
insta = "1.34.0"
|
||||||
maplit = "1.0.2"
|
maplit = "1.0.2"
|
||||||
@@ -136,7 +136,11 @@ greek = ["charabia/greek"]
|
|||||||
# allow khmer specialized tokenization
|
# allow khmer specialized tokenization
|
||||||
khmer = ["charabia/khmer"]
|
khmer = ["charabia/khmer"]
|
||||||
|
|
||||||
|
# allow vietnamese specialized tokenization
|
||||||
vietnamese = ["charabia/vietnamese"]
|
vietnamese = ["charabia/vietnamese"]
|
||||||
|
|
||||||
|
# force swedish character recomposition
|
||||||
|
swedish-recomposition = ["charabia/swedish-recomposition"]
|
||||||
|
|
||||||
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
|
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
|
||||||
cuda = ["candle-core/cuda"]
|
cuda = ["candle-core/cuda"]
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ use serde_json::Value;
|
|||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::documents::{self, DocumentsBatchCursorError};
|
use crate::documents::{self, DocumentsBatchCursorError};
|
||||||
|
use crate::thread_pool_no_abort::PanicCatched;
|
||||||
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
|
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
|
||||||
|
|
||||||
pub fn is_reserved_keyword(keyword: &str) -> bool {
|
pub fn is_reserved_keyword(keyword: &str) -> bool {
|
||||||
@@ -39,17 +40,19 @@ pub enum InternalError {
|
|||||||
Fst(#[from] fst::Error),
|
Fst(#[from] fst::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
DocumentsError(#[from] documents::Error),
|
DocumentsError(#[from] documents::Error),
|
||||||
#[error("Invalid compression type have been specified to grenad.")]
|
#[error("Invalid compression type have been specified to grenad")]
|
||||||
GrenadInvalidCompressionType,
|
GrenadInvalidCompressionType,
|
||||||
#[error("Invalid grenad file with an invalid version format.")]
|
#[error("Invalid grenad file with an invalid version format")]
|
||||||
GrenadInvalidFormatVersion,
|
GrenadInvalidFormatVersion,
|
||||||
#[error("Invalid merge while processing {process}.")]
|
#[error("Invalid merge while processing {process}")]
|
||||||
IndexingMergingKeys { process: &'static str },
|
IndexingMergingKeys { process: &'static str },
|
||||||
#[error("{}", HeedError::InvalidDatabaseTyping)]
|
#[error("{}", HeedError::InvalidDatabaseTyping)]
|
||||||
InvalidDatabaseTyping,
|
InvalidDatabaseTyping,
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
RayonThreadPool(#[from] ThreadPoolBuildError),
|
RayonThreadPool(#[from] ThreadPoolBuildError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
|
PanicInThreadPool(#[from] PanicCatched),
|
||||||
|
#[error(transparent)]
|
||||||
SerdeJson(#[from] serde_json::Error),
|
SerdeJson(#[from] serde_json::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Serialization(#[from] SerializationError),
|
Serialization(#[from] SerializationError),
|
||||||
@@ -57,9 +60,9 @@ pub enum InternalError {
|
|||||||
Store(#[from] MdbError),
|
Store(#[from] MdbError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Utf8(#[from] str::Utf8Error),
|
Utf8(#[from] str::Utf8Error),
|
||||||
#[error("An indexation process was explicitly aborted.")]
|
#[error("An indexation process was explicitly aborted")]
|
||||||
AbortedIndexation,
|
AbortedIndexation,
|
||||||
#[error("The matching words list contains at least one invalid member.")]
|
#[error("The matching words list contains at least one invalid member")]
|
||||||
InvalidMatchingWords,
|
InvalidMatchingWords,
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
ArroyError(#[from] arroy::Error),
|
ArroyError(#[from] arroy::Error),
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ pub mod prompt;
|
|||||||
pub mod proximity;
|
pub mod proximity;
|
||||||
pub mod score_details;
|
pub mod score_details;
|
||||||
mod search;
|
mod search;
|
||||||
|
mod thread_pool_no_abort;
|
||||||
pub mod update;
|
pub mod update;
|
||||||
pub mod vector;
|
pub mod vector;
|
||||||
|
|
||||||
@@ -42,6 +43,7 @@ pub use search::new::{
|
|||||||
SearchLogger, VisualSearchLogger,
|
SearchLogger, VisualSearchLogger,
|
||||||
};
|
};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||||
pub use {charabia as tokenizer, heed};
|
pub use {charabia as tokenizer, heed};
|
||||||
|
|
||||||
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
||||||
|
|||||||
69
milli/src/thread_pool_no_abort.rs
Normal file
69
milli/src/thread_pool_no_abort.rs
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use rayon::{ThreadPool, ThreadPoolBuilder};
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
/// A rayon ThreadPool wrapper that can catch panics in the pool
|
||||||
|
/// and modifies the install function accordingly.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ThreadPoolNoAbort {
|
||||||
|
thread_pool: ThreadPool,
|
||||||
|
/// Set to true if the thread pool catched a panic.
|
||||||
|
pool_catched_panic: Arc<AtomicBool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ThreadPoolNoAbort {
|
||||||
|
pub fn install<OP, R>(&self, op: OP) -> Result<R, PanicCatched>
|
||||||
|
where
|
||||||
|
OP: FnOnce() -> R + Send,
|
||||||
|
R: Send,
|
||||||
|
{
|
||||||
|
let output = self.thread_pool.install(op);
|
||||||
|
// While reseting the pool panic catcher we return an error if we catched one.
|
||||||
|
if self.pool_catched_panic.swap(false, Ordering::SeqCst) {
|
||||||
|
Err(PanicCatched)
|
||||||
|
} else {
|
||||||
|
Ok(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn current_num_threads(&self) -> usize {
|
||||||
|
self.thread_pool.current_num_threads()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
#[error("A panic occured. Read the logs to find more information about it")]
|
||||||
|
pub struct PanicCatched;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct ThreadPoolNoAbortBuilder(ThreadPoolBuilder);
|
||||||
|
|
||||||
|
impl ThreadPoolNoAbortBuilder {
|
||||||
|
pub fn new() -> ThreadPoolNoAbortBuilder {
|
||||||
|
ThreadPoolNoAbortBuilder::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn thread_name<F>(mut self, closure: F) -> Self
|
||||||
|
where
|
||||||
|
F: FnMut(usize) -> String + 'static,
|
||||||
|
{
|
||||||
|
self.0 = self.0.thread_name(closure);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn num_threads(mut self, num_threads: usize) -> ThreadPoolNoAbortBuilder {
|
||||||
|
self.0 = self.0.num_threads(num_threads);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(mut self) -> Result<ThreadPoolNoAbort, rayon::ThreadPoolBuildError> {
|
||||||
|
let pool_catched_panic = Arc::new(AtomicBool::new(false));
|
||||||
|
self.0 = self.0.panic_handler({
|
||||||
|
let catched_panic = pool_catched_panic.clone();
|
||||||
|
move |_result| catched_panic.store(true, Ordering::SeqCst)
|
||||||
|
});
|
||||||
|
Ok(ThreadPoolNoAbort { thread_pool: self.0.build()?, pool_catched_panic })
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -45,7 +45,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
obkv_documents: grenad::Reader<R>,
|
obkv_documents: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
settings_diff: &InnerIndexSettingsDiff,
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
|
||||||
) -> Result<ExtractedFacetValues> {
|
) -> Result<ExtractedFacetValues> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
@@ -127,12 +126,18 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
add_exists.insert(document);
|
add_exists.insert(document);
|
||||||
}
|
}
|
||||||
|
|
||||||
let geo_support =
|
let del_geo_support = settings_diff
|
||||||
geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
.old
|
||||||
|
.geo_fields_ids
|
||||||
|
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||||
|
let add_geo_support = settings_diff
|
||||||
|
.new
|
||||||
|
.geo_fields_ids
|
||||||
|
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||||
let del_filterable_values =
|
let del_filterable_values =
|
||||||
del_value.map(|value| extract_facet_values(&value, geo_support));
|
del_value.map(|value| extract_facet_values(&value, del_geo_support));
|
||||||
let add_filterable_values =
|
let add_filterable_values =
|
||||||
add_value.map(|value| extract_facet_values(&value, geo_support));
|
add_value.map(|value| extract_facet_values(&value, add_geo_support));
|
||||||
|
|
||||||
// Those closures are just here to simplify things a bit.
|
// Those closures are just here to simplify things a bit.
|
||||||
let mut insert_numbers_diff = |del_numbers, add_numbers| {
|
let mut insert_numbers_diff = |del_numbers, add_numbers| {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
|||||||
use crate::error::GeoError;
|
use crate::error::GeoError;
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||||
use crate::update::index_documents::extract_finite_float_from_value;
|
use crate::update::index_documents::extract_finite_float_from_value;
|
||||||
|
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||||
use crate::{FieldId, InternalError, Result};
|
use crate::{FieldId, InternalError, Result};
|
||||||
|
|
||||||
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
||||||
@@ -18,7 +19,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
|||||||
obkv_documents: grenad::Reader<R>,
|
obkv_documents: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
@@ -40,47 +41,27 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
|||||||
serde_json::from_slice(document_id).unwrap()
|
serde_json::from_slice(document_id).unwrap()
|
||||||
};
|
};
|
||||||
|
|
||||||
// first we get the two fields
|
// extract old version
|
||||||
match (obkv.get(lat_fid), obkv.get(lng_fid)) {
|
let del_lat_lng =
|
||||||
(Some(lat), Some(lng)) => {
|
extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
|
||||||
let deladd_lat_obkv = KvReaderDelAdd::new(lat);
|
// extract new version
|
||||||
let deladd_lng_obkv = KvReaderDelAdd::new(lng);
|
let add_lat_lng =
|
||||||
|
extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
|
||||||
|
|
||||||
// then we extract the values
|
if del_lat_lng != add_lat_lng {
|
||||||
let del_lat_lng = deladd_lat_obkv
|
let mut obkv = KvWriterDelAdd::memory();
|
||||||
.get(DelAdd::Deletion)
|
if let Some([lat, lng]) = del_lat_lng {
|
||||||
.zip(deladd_lng_obkv.get(DelAdd::Deletion))
|
#[allow(clippy::drop_non_drop)]
|
||||||
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
|
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||||
.transpose()?;
|
obkv.insert(DelAdd::Deletion, bytes)?;
|
||||||
let add_lat_lng = deladd_lat_obkv
|
|
||||||
.get(DelAdd::Addition)
|
|
||||||
.zip(deladd_lng_obkv.get(DelAdd::Addition))
|
|
||||||
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
|
|
||||||
.transpose()?;
|
|
||||||
|
|
||||||
if del_lat_lng != add_lat_lng {
|
|
||||||
let mut obkv = KvWriterDelAdd::memory();
|
|
||||||
if let Some([lat, lng]) = del_lat_lng {
|
|
||||||
#[allow(clippy::drop_non_drop)]
|
|
||||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
|
||||||
obkv.insert(DelAdd::Deletion, bytes)?;
|
|
||||||
}
|
|
||||||
if let Some([lat, lng]) = add_lat_lng {
|
|
||||||
#[allow(clippy::drop_non_drop)]
|
|
||||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
|
||||||
obkv.insert(DelAdd::Addition, bytes)?;
|
|
||||||
}
|
|
||||||
let bytes = obkv.into_inner()?;
|
|
||||||
writer.insert(docid_bytes, bytes)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
(None, Some(_)) => {
|
if let Some([lat, lng]) = add_lat_lng {
|
||||||
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
|
#[allow(clippy::drop_non_drop)]
|
||||||
|
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||||
|
obkv.insert(DelAdd::Addition, bytes)?;
|
||||||
}
|
}
|
||||||
(Some(_), None) => {
|
let bytes = obkv.into_inner()?;
|
||||||
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
|
writer.insert(docid_bytes, bytes)?;
|
||||||
}
|
|
||||||
(None, None) => (),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -88,16 +69,37 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Extract the finite floats lat and lng from two bytes slices.
|
/// Extract the finite floats lat and lng from two bytes slices.
|
||||||
fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
|
fn extract_lat_lng(
|
||||||
let lat = extract_finite_float_from_value(
|
document: &obkv::KvReader<FieldId>,
|
||||||
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
|
settings: &InnerIndexSettings,
|
||||||
)
|
deladd: DelAdd,
|
||||||
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
|
document_id: impl Fn() -> Value,
|
||||||
|
) -> Result<Option<[f64; 2]>> {
|
||||||
|
match settings.geo_fields_ids {
|
||||||
|
Some((lat_fid, lng_fid)) => {
|
||||||
|
let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
|
||||||
|
let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
|
||||||
|
let (lat, lng) = match (lat, lng) {
|
||||||
|
(Some(lat), Some(lng)) => (lat, lng),
|
||||||
|
(Some(_), None) => {
|
||||||
|
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
|
||||||
|
}
|
||||||
|
(None, Some(_)) => {
|
||||||
|
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
|
||||||
|
}
|
||||||
|
(None, None) => return Ok(None),
|
||||||
|
};
|
||||||
|
let lat = extract_finite_float_from_value(
|
||||||
|
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
|
||||||
|
)
|
||||||
|
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
|
||||||
|
|
||||||
let lng = extract_finite_float_from_value(
|
let lng = extract_finite_float_from_value(
|
||||||
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
|
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
|
||||||
)
|
)
|
||||||
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
|
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
|
||||||
|
Ok(Some([lat, lng]))
|
||||||
Ok([lat, lng])
|
}
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
|||||||
use crate::update::index_documents::helpers::try_split_at;
|
use crate::update::index_documents::helpers::try_split_at;
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
use crate::vector::Embedder;
|
use crate::vector::Embedder;
|
||||||
use crate::{DocumentId, InternalError, Result, VectorOrArrayOfVectors};
|
use crate::{DocumentId, InternalError, Result, ThreadPoolNoAbort, VectorOrArrayOfVectors};
|
||||||
|
|
||||||
/// The length of the elements that are always in the buffer when inserting new values.
|
/// The length of the elements that are always in the buffer when inserting new values.
|
||||||
const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
|
const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
|
||||||
@@ -198,11 +198,16 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
if document_is_kept {
|
if document_is_kept {
|
||||||
// Don't give up if the old prompt was failing
|
// Don't give up if the old prompt was failing
|
||||||
let old_prompt = prompt
|
let old_prompt = Some(prompt)
|
||||||
.render(obkv, DelAdd::Deletion, old_fields_ids_map)
|
// TODO: this filter works because we erase the vec database when a embedding setting changes.
|
||||||
.unwrap_or_default();
|
// When vector pipeline will be optimized, this should be removed.
|
||||||
|
.filter(|_| !settings_diff.reindex_vectors())
|
||||||
|
.map(|p| {
|
||||||
|
p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default()
|
||||||
|
});
|
||||||
let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||||
if old_prompt != new_prompt {
|
if old_prompt.as_ref() != Some(&new_prompt) {
|
||||||
|
let old_prompt = old_prompt.unwrap_or_default();
|
||||||
tracing::trace!(
|
tracing::trace!(
|
||||||
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
|
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
|
||||||
);
|
);
|
||||||
@@ -224,6 +229,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
&mut manual_vectors_writer,
|
&mut manual_vectors_writer,
|
||||||
&mut key_buffer,
|
&mut key_buffer,
|
||||||
delta,
|
delta,
|
||||||
|
settings_diff,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -264,10 +270,15 @@ fn push_vectors_diff(
|
|||||||
manual_vectors_writer: &mut Writer<BufWriter<File>>,
|
manual_vectors_writer: &mut Writer<BufWriter<File>>,
|
||||||
key_buffer: &mut Vec<u8>,
|
key_buffer: &mut Vec<u8>,
|
||||||
delta: VectorStateDelta,
|
delta: VectorStateDelta,
|
||||||
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
|
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
|
||||||
if must_remove {
|
if must_remove
|
||||||
|
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
||||||
|
// When vector pipeline will be optimized, this should be removed.
|
||||||
|
&& !settings_diff.reindex_vectors()
|
||||||
|
{
|
||||||
key_buffer.truncate(TRUNCATE_SIZE);
|
key_buffer.truncate(TRUNCATE_SIZE);
|
||||||
remove_vectors_writer.insert(&key_buffer, [])?;
|
remove_vectors_writer.insert(&key_buffer, [])?;
|
||||||
}
|
}
|
||||||
@@ -295,12 +306,16 @@ fn push_vectors_diff(
|
|||||||
match eob {
|
match eob {
|
||||||
EitherOrBoth::Both(_, _) => (), // no need to touch anything
|
EitherOrBoth::Both(_, _) => (), // no need to touch anything
|
||||||
EitherOrBoth::Left(vector) => {
|
EitherOrBoth::Left(vector) => {
|
||||||
// We insert only the Del part of the Obkv to inform
|
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
||||||
// that we only want to remove all those vectors.
|
// When vector pipeline will be optimized, this should be removed.
|
||||||
let mut obkv = KvWriterDelAdd::memory();
|
if !settings_diff.reindex_vectors() {
|
||||||
obkv.insert(DelAdd::Deletion, cast_slice(&vector))?;
|
// We insert only the Del part of the Obkv to inform
|
||||||
let bytes = obkv.into_inner()?;
|
// that we only want to remove all those vectors.
|
||||||
manual_vectors_writer.insert(&key_buffer, bytes)?;
|
let mut obkv = KvWriterDelAdd::memory();
|
||||||
|
obkv.insert(DelAdd::Deletion, cast_slice(&vector))?;
|
||||||
|
let bytes = obkv.into_inner()?;
|
||||||
|
manual_vectors_writer.insert(&key_buffer, bytes)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
EitherOrBoth::Right(vector) => {
|
EitherOrBoth::Right(vector) => {
|
||||||
// We insert only the Add part of the Obkv to inform
|
// We insert only the Add part of the Obkv to inform
|
||||||
@@ -347,7 +362,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||||||
prompt_reader: grenad::Reader<R>,
|
prompt_reader: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
request_threads: &rayon::ThreadPool,
|
request_threads: &ThreadPoolNoAbort,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
|
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ use self::extract_word_position_docids::extract_word_position_docids;
|
|||||||
use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
|
use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
|
||||||
use super::{helpers, TypedChunk};
|
use super::{helpers, TypedChunk};
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
use crate::{FieldId, Result};
|
use crate::{FieldId, Result, ThreadPoolNoAbortBuilder};
|
||||||
|
|
||||||
/// Extract data for each databases from obkv documents in parallel.
|
/// Extract data for each databases from obkv documents in parallel.
|
||||||
/// Send data in grenad file over provided Sender.
|
/// Send data in grenad file over provided Sender.
|
||||||
@@ -43,7 +43,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
|
||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@@ -72,7 +71,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
primary_key_id,
|
primary_key_id,
|
||||||
geo_fields_ids,
|
|
||||||
settings_diff.clone(),
|
settings_diff.clone(),
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
)
|
)
|
||||||
@@ -229,7 +227,7 @@ fn send_original_documents_data(
|
|||||||
let documents_chunk_cloned = original_documents_chunk.clone();
|
let documents_chunk_cloned = original_documents_chunk.clone();
|
||||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||||
|
|
||||||
let request_threads = rayon::ThreadPoolBuilder::new()
|
let request_threads = ThreadPoolNoAbortBuilder::new()
|
||||||
.num_threads(crate::vector::REQUEST_PARALLELISM)
|
.num_threads(crate::vector::REQUEST_PARALLELISM)
|
||||||
.thread_name(|index| format!("embedding-request-{index}"))
|
.thread_name(|index| format!("embedding-request-{index}"))
|
||||||
.build()?;
|
.build()?;
|
||||||
@@ -300,7 +298,6 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
|
||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
) -> Result<(
|
) -> Result<(
|
||||||
@@ -310,12 +307,13 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
let flattened_documents_chunk =
|
let flattened_documents_chunk =
|
||||||
flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
||||||
|
|
||||||
if let Some(geo_fields_ids) = geo_fields_ids {
|
if settings_diff.run_geo_indexing() {
|
||||||
let documents_chunk_cloned = flattened_documents_chunk.clone();
|
let documents_chunk_cloned = flattened_documents_chunk.clone();
|
||||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||||
|
let settings_diff = settings_diff.clone();
|
||||||
rayon::spawn(move || {
|
rayon::spawn(move || {
|
||||||
let result =
|
let result =
|
||||||
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_fields_ids);
|
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, &settings_diff);
|
||||||
let _ = match result {
|
let _ = match result {
|
||||||
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
|
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
|
||||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||||
@@ -354,7 +352,6 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
flattened_documents_chunk.clone(),
|
flattened_documents_chunk.clone(),
|
||||||
indexer,
|
indexer,
|
||||||
&settings_diff,
|
&settings_diff,
|
||||||
geo_fields_ids,
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// send fid_docid_facet_numbers_chunk to DB writer
|
// send fid_docid_facet_numbers_chunk to DB writer
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
|||||||
pub use self::transform::{Transform, TransformOutput};
|
pub use self::transform::{Transform, TransformOutput};
|
||||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||||
use crate::error::{Error, InternalError, UserError};
|
use crate::error::{Error, InternalError, UserError};
|
||||||
|
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
||||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||||
@@ -298,18 +299,18 @@ where
|
|||||||
let backup_pool;
|
let backup_pool;
|
||||||
let pool = match self.indexer_config.thread_pool {
|
let pool = match self.indexer_config.thread_pool {
|
||||||
Some(ref pool) => pool,
|
Some(ref pool) => pool,
|
||||||
#[cfg(not(test))]
|
|
||||||
None => {
|
None => {
|
||||||
// We initialize a bakcup pool with the default
|
// We initialize a backup pool with the default
|
||||||
// settings if none have already been set.
|
// settings if none have already been set.
|
||||||
backup_pool = rayon::ThreadPoolBuilder::new().build()?;
|
#[allow(unused_mut)]
|
||||||
&backup_pool
|
let mut pool_builder = ThreadPoolNoAbortBuilder::new();
|
||||||
}
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
None => {
|
{
|
||||||
// We initialize a bakcup pool with the default
|
pool_builder = pool_builder.num_threads(1);
|
||||||
// settings if none have already been set.
|
}
|
||||||
backup_pool = rayon::ThreadPoolBuilder::new().num_threads(1).build()?;
|
|
||||||
|
backup_pool = pool_builder.build()?;
|
||||||
&backup_pool
|
&backup_pool
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -323,28 +324,6 @@ where
|
|||||||
// get the primary key field id
|
// get the primary key field id
|
||||||
let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();
|
let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();
|
||||||
|
|
||||||
// get the fid of the `_geo.lat` and `_geo.lng` fields.
|
|
||||||
let mut field_id_map = self.index.fields_ids_map(self.wtxn)?;
|
|
||||||
|
|
||||||
// self.index.fields_ids_map($a)? ==>> field_id_map
|
|
||||||
let geo_fields_ids = match field_id_map.id("_geo") {
|
|
||||||
Some(gfid) => {
|
|
||||||
let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
|
|
||||||
let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
|
|
||||||
// if `_geo` is faceted then we get the `lat` and `lng`
|
|
||||||
if is_sortable || is_filterable {
|
|
||||||
let field_ids = field_id_map
|
|
||||||
.insert("_geo.lat")
|
|
||||||
.zip(field_id_map.insert("_geo.lng"))
|
|
||||||
.ok_or(UserError::AttributeLimitReached)?;
|
|
||||||
Some(field_ids)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let pool_params = GrenadParameters {
|
let pool_params = GrenadParameters {
|
||||||
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
||||||
chunk_compression_level: self.indexer_config.chunk_compression_level,
|
chunk_compression_level: self.indexer_config.chunk_compression_level,
|
||||||
@@ -411,7 +390,6 @@ where
|
|||||||
pool_params,
|
pool_params,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
primary_key_id,
|
primary_key_id,
|
||||||
geo_fields_ids,
|
|
||||||
settings_diff.clone(),
|
settings_diff.clone(),
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
)
|
)
|
||||||
@@ -533,7 +511,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
})?;
|
}).map_err(InternalError::from)??;
|
||||||
|
|
||||||
// We write the field distribution into the main database
|
// We write the field distribution into the main database
|
||||||
self.index.put_field_distribution(self.wtxn, &field_distribution)?;
|
self.index.put_field_distribution(self.wtxn, &field_distribution)?;
|
||||||
@@ -562,7 +540,8 @@ where
|
|||||||
writer.build(wtxn, &mut rng, None)?;
|
writer.build(wtxn, &mut rng, None)?;
|
||||||
}
|
}
|
||||||
Result::Ok(())
|
Result::Ok(())
|
||||||
})?;
|
})
|
||||||
|
.map_err(InternalError::from)??;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.execute_prefix_databases(
|
self.execute_prefix_databases(
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use rayon::ThreadPool;
|
|
||||||
|
use crate::thread_pool_no_abort::ThreadPoolNoAbort;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct IndexerConfig {
|
pub struct IndexerConfig {
|
||||||
@@ -9,7 +10,7 @@ pub struct IndexerConfig {
|
|||||||
pub max_memory: Option<usize>,
|
pub max_memory: Option<usize>,
|
||||||
pub chunk_compression_type: CompressionType,
|
pub chunk_compression_type: CompressionType,
|
||||||
pub chunk_compression_level: Option<u32>,
|
pub chunk_compression_level: Option<u32>,
|
||||||
pub thread_pool: Option<ThreadPool>,
|
pub thread_pool: Option<ThreadPoolNoAbort>,
|
||||||
pub max_positions_per_attributes: Option<u32>,
|
pub max_positions_per_attributes: Option<u32>,
|
||||||
pub skip_index_budget: bool,
|
pub skip_index_budget: bool,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1161,6 +1161,11 @@ impl InnerIndexSettingsDiff {
|
|||||||
pub fn settings_update_only(&self) -> bool {
|
pub fn settings_update_only(&self) -> bool {
|
||||||
self.settings_update_only
|
self.settings_update_only
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn run_geo_indexing(&self) -> bool {
|
||||||
|
self.old.geo_fields_ids != self.new.geo_fields_ids
|
||||||
|
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -1177,6 +1182,7 @@ pub(crate) struct InnerIndexSettings {
|
|||||||
pub proximity_precision: ProximityPrecision,
|
pub proximity_precision: ProximityPrecision,
|
||||||
pub embedding_configs: EmbeddingConfigs,
|
pub embedding_configs: EmbeddingConfigs,
|
||||||
pub existing_fields: HashSet<String>,
|
pub existing_fields: HashSet<String>,
|
||||||
|
pub geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl InnerIndexSettings {
|
impl InnerIndexSettings {
|
||||||
@@ -1185,7 +1191,7 @@ impl InnerIndexSettings {
|
|||||||
let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
|
let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
|
||||||
let allowed_separators = index.allowed_separators(rtxn)?;
|
let allowed_separators = index.allowed_separators(rtxn)?;
|
||||||
let dictionary = index.dictionary(rtxn)?;
|
let dictionary = index.dictionary(rtxn)?;
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let mut fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
|
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
|
||||||
let user_defined_searchable_fields =
|
let user_defined_searchable_fields =
|
||||||
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
|
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
|
||||||
@@ -1200,6 +1206,24 @@ impl InnerIndexSettings {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(field, count)| (count != 0).then_some(field))
|
.filter_map(|(field, count)| (count != 0).then_some(field))
|
||||||
.collect();
|
.collect();
|
||||||
|
// index.fields_ids_map($a)? ==>> fields_ids_map
|
||||||
|
let geo_fields_ids = match fields_ids_map.id("_geo") {
|
||||||
|
Some(gfid) => {
|
||||||
|
let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
|
||||||
|
let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
|
||||||
|
// if `_geo` is faceted then we get the `lat` and `lng`
|
||||||
|
if is_sortable || is_filterable {
|
||||||
|
let field_ids = fields_ids_map
|
||||||
|
.insert("_geo.lat")
|
||||||
|
.zip(fields_ids_map.insert("_geo.lng"))
|
||||||
|
.ok_or(UserError::AttributeLimitReached)?;
|
||||||
|
Some(field_ids)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
stop_words,
|
stop_words,
|
||||||
@@ -1214,6 +1238,7 @@ impl InnerIndexSettings {
|
|||||||
proximity_precision,
|
proximity_precision,
|
||||||
embedding_configs,
|
embedding_configs,
|
||||||
existing_fields,
|
existing_fields,
|
||||||
|
geo_fields_ids,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ use std::path::PathBuf;
|
|||||||
use hf_hub::api::sync::ApiError;
|
use hf_hub::api::sync::ApiError;
|
||||||
|
|
||||||
use crate::error::FaultSource;
|
use crate::error::FaultSource;
|
||||||
|
use crate::PanicCatched;
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
#[error("Error while generating embeddings: {inner}")]
|
#[error("Error while generating embeddings: {inner}")]
|
||||||
@@ -80,6 +81,8 @@ pub enum EmbedErrorKind {
|
|||||||
OpenAiUnexpectedDimension(usize, usize),
|
OpenAiUnexpectedDimension(usize, usize),
|
||||||
#[error("no embedding was produced")]
|
#[error("no embedding was produced")]
|
||||||
MissingEmbedding,
|
MissingEmbedding,
|
||||||
|
#[error(transparent)]
|
||||||
|
PanicInThreadPool(#[from] PanicCatched),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EmbedError {
|
impl EmbedError {
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use self::error::{EmbedError, NewEmbedderError};
|
use self::error::{EmbedError, NewEmbedderError};
|
||||||
use crate::prompt::{Prompt, PromptData};
|
use crate::prompt::{Prompt, PromptData};
|
||||||
|
use crate::ThreadPoolNoAbort;
|
||||||
|
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod hf;
|
pub mod hf;
|
||||||
@@ -254,7 +255,7 @@ impl Embedder {
|
|||||||
pub fn embed_chunks(
|
pub fn embed_chunks(
|
||||||
&self,
|
&self,
|
||||||
text_chunks: Vec<Vec<String>>,
|
text_chunks: Vec<Vec<String>>,
|
||||||
threads: &rayon::ThreadPool,
|
threads: &ThreadPoolNoAbort,
|
||||||
) -> std::result::Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
) -> std::result::Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
||||||
match self {
|
match self {
|
||||||
Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks),
|
Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks),
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
|||||||
use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};
|
use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};
|
||||||
use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
|
use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
|
||||||
use super::{DistributionShift, Embeddings};
|
use super::{DistributionShift, Embeddings};
|
||||||
|
use crate::error::FaultSource;
|
||||||
|
use crate::ThreadPoolNoAbort;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Embedder {
|
pub struct Embedder {
|
||||||
@@ -71,11 +73,16 @@ impl Embedder {
|
|||||||
pub fn embed_chunks(
|
pub fn embed_chunks(
|
||||||
&self,
|
&self,
|
||||||
text_chunks: Vec<Vec<String>>,
|
text_chunks: Vec<Vec<String>>,
|
||||||
threads: &rayon::ThreadPool,
|
threads: &ThreadPoolNoAbort,
|
||||||
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
||||||
threads.install(move || {
|
threads
|
||||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
|
.install(move || {
|
||||||
})
|
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
|
||||||
|
})
|
||||||
|
.map_err(|error| EmbedError {
|
||||||
|
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||||
|
fault: FaultSource::Bug,
|
||||||
|
})?
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn chunk_count_hint(&self) -> usize {
|
pub fn chunk_count_hint(&self) -> usize {
|
||||||
|
|||||||
@@ -4,7 +4,9 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
|
|||||||
use super::error::{EmbedError, NewEmbedderError};
|
use super::error::{EmbedError, NewEmbedderError};
|
||||||
use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
|
use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
|
||||||
use super::{DistributionShift, Embeddings};
|
use super::{DistributionShift, Embeddings};
|
||||||
|
use crate::error::FaultSource;
|
||||||
use crate::vector::error::EmbedErrorKind;
|
use crate::vector::error::EmbedErrorKind;
|
||||||
|
use crate::ThreadPoolNoAbort;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||||
pub struct EmbedderOptions {
|
pub struct EmbedderOptions {
|
||||||
@@ -241,11 +243,16 @@ impl Embedder {
|
|||||||
pub fn embed_chunks(
|
pub fn embed_chunks(
|
||||||
&self,
|
&self,
|
||||||
text_chunks: Vec<Vec<String>>,
|
text_chunks: Vec<Vec<String>>,
|
||||||
threads: &rayon::ThreadPool,
|
threads: &ThreadPoolNoAbort,
|
||||||
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
||||||
threads.install(move || {
|
threads
|
||||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
|
.install(move || {
|
||||||
})
|
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
|
||||||
|
})
|
||||||
|
.map_err(|error| EmbedError {
|
||||||
|
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||||
|
fault: FaultSource::Bug,
|
||||||
|
})?
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn chunk_count_hint(&self) -> usize {
|
pub fn chunk_count_hint(&self) -> usize {
|
||||||
|
|||||||
@@ -2,9 +2,12 @@ use deserr::Deserr;
|
|||||||
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use super::error::EmbedErrorKind;
|
||||||
use super::{
|
use super::{
|
||||||
DistributionShift, EmbedError, Embedding, Embeddings, NewEmbedderError, REQUEST_PARALLELISM,
|
DistributionShift, EmbedError, Embedding, Embeddings, NewEmbedderError, REQUEST_PARALLELISM,
|
||||||
};
|
};
|
||||||
|
use crate::error::FaultSource;
|
||||||
|
use crate::ThreadPoolNoAbort;
|
||||||
|
|
||||||
// retrying in case of failure
|
// retrying in case of failure
|
||||||
|
|
||||||
@@ -158,11 +161,16 @@ impl Embedder {
|
|||||||
pub fn embed_chunks(
|
pub fn embed_chunks(
|
||||||
&self,
|
&self,
|
||||||
text_chunks: Vec<Vec<String>>,
|
text_chunks: Vec<Vec<String>>,
|
||||||
threads: &rayon::ThreadPool,
|
threads: &ThreadPoolNoAbort,
|
||||||
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
||||||
threads.install(move || {
|
threads
|
||||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
|
.install(move || {
|
||||||
})
|
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
|
||||||
|
})
|
||||||
|
.map_err(|error| EmbedError {
|
||||||
|
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||||
|
fault: FaultSource::Bug,
|
||||||
|
})?
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn chunk_count_hint(&self) -> usize {
|
pub fn chunk_count_hint(&self) -> usize {
|
||||||
|
|||||||
@@ -301,10 +301,14 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
fn from(value: EmbeddingConfig) -> Self {
|
fn from(value: EmbeddingConfig) -> Self {
|
||||||
let EmbeddingConfig { embedder_options, prompt } = value;
|
let EmbeddingConfig { embedder_options, prompt } = value;
|
||||||
match embedder_options {
|
match embedder_options {
|
||||||
super::EmbedderOptions::HuggingFace(options) => Self {
|
super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions {
|
||||||
|
model,
|
||||||
|
revision,
|
||||||
|
distribution,
|
||||||
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::HuggingFace),
|
source: Setting::Set(EmbedderSource::HuggingFace),
|
||||||
model: Setting::Set(options.model),
|
model: Setting::Set(model),
|
||||||
revision: options.revision.map(Setting::Set).unwrap_or_default(),
|
revision: revision.map(Setting::Set).unwrap_or_default(),
|
||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::NotSet,
|
dimensions: Setting::NotSet,
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
@@ -314,14 +318,19 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
path_to_embeddings: Setting::NotSet,
|
path_to_embeddings: Setting::NotSet,
|
||||||
embedding_object: Setting::NotSet,
|
embedding_object: Setting::NotSet,
|
||||||
input_type: Setting::NotSet,
|
input_type: Setting::NotSet,
|
||||||
distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::OpenAi(options) => Self {
|
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
||||||
|
api_key,
|
||||||
|
embedding_model,
|
||||||
|
dimensions,
|
||||||
|
distribution,
|
||||||
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::OpenAi),
|
source: Setting::Set(EmbedderSource::OpenAi),
|
||||||
model: Setting::Set(options.embedding_model.name().to_owned()),
|
model: Setting::Set(embedding_model.name().to_owned()),
|
||||||
revision: Setting::NotSet,
|
revision: Setting::NotSet,
|
||||||
api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
|
api_key: api_key.map(Setting::Set).unwrap_or_default(),
|
||||||
dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(),
|
dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
query: Setting::NotSet,
|
query: Setting::NotSet,
|
||||||
@@ -329,29 +338,37 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
path_to_embeddings: Setting::NotSet,
|
path_to_embeddings: Setting::NotSet,
|
||||||
embedding_object: Setting::NotSet,
|
embedding_object: Setting::NotSet,
|
||||||
input_type: Setting::NotSet,
|
input_type: Setting::NotSet,
|
||||||
distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Ollama(options) => Self {
|
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
||||||
|
embedding_model,
|
||||||
|
url,
|
||||||
|
api_key,
|
||||||
|
distribution,
|
||||||
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::Ollama),
|
source: Setting::Set(EmbedderSource::Ollama),
|
||||||
model: Setting::Set(options.embedding_model.to_owned()),
|
model: Setting::Set(embedding_model),
|
||||||
revision: Setting::NotSet,
|
revision: Setting::NotSet,
|
||||||
api_key: Setting::NotSet,
|
api_key: api_key.map(Setting::Set).unwrap_or_default(),
|
||||||
dimensions: Setting::NotSet,
|
dimensions: Setting::NotSet,
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: Setting::NotSet,
|
url: url.map(Setting::Set).unwrap_or_default(),
|
||||||
query: Setting::NotSet,
|
query: Setting::NotSet,
|
||||||
input_field: Setting::NotSet,
|
input_field: Setting::NotSet,
|
||||||
path_to_embeddings: Setting::NotSet,
|
path_to_embeddings: Setting::NotSet,
|
||||||
embedding_object: Setting::NotSet,
|
embedding_object: Setting::NotSet,
|
||||||
input_type: Setting::NotSet,
|
input_type: Setting::NotSet,
|
||||||
distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::UserProvided(options) => Self {
|
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
||||||
|
dimensions,
|
||||||
|
distribution,
|
||||||
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::UserProvided),
|
source: Setting::Set(EmbedderSource::UserProvided),
|
||||||
model: Setting::NotSet,
|
model: Setting::NotSet,
|
||||||
revision: Setting::NotSet,
|
revision: Setting::NotSet,
|
||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::Set(options.dimensions),
|
dimensions: Setting::Set(dimensions),
|
||||||
document_template: Setting::NotSet,
|
document_template: Setting::NotSet,
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
query: Setting::NotSet,
|
query: Setting::NotSet,
|
||||||
@@ -359,7 +376,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
path_to_embeddings: Setting::NotSet,
|
path_to_embeddings: Setting::NotSet,
|
||||||
embedding_object: Setting::NotSet,
|
embedding_object: Setting::NotSet,
|
||||||
input_type: Setting::NotSet,
|
input_type: Setting::NotSet,
|
||||||
distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
||||||
api_key,
|
api_key,
|
||||||
|
|||||||
@@ -217,9 +217,7 @@ fn add_memory_samples(
|
|||||||
memory_counters: &mut Option<MemoryCounterHandles>,
|
memory_counters: &mut Option<MemoryCounterHandles>,
|
||||||
last_memory: &mut MemoryStats,
|
last_memory: &mut MemoryStats,
|
||||||
) -> Option<MemoryStats> {
|
) -> Option<MemoryStats> {
|
||||||
let Some(stats) = memory else {
|
let stats = memory?;
|
||||||
return None;
|
|
||||||
};
|
|
||||||
|
|
||||||
let memory_counters =
|
let memory_counters =
|
||||||
memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main));
|
memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main));
|
||||||
|
|||||||
68
workloads/movies-subset-hf-embeddings.json
Normal file
68
workloads/movies-subset-hf-embeddings.json
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
{
|
||||||
|
"name": "movies-subset-hf-embeddings",
|
||||||
|
"run_count": 5,
|
||||||
|
"extra_cli_args": [
|
||||||
|
"--max-indexing-threads=4"
|
||||||
|
],
|
||||||
|
"assets": {
|
||||||
|
"movies-100.json": {
|
||||||
|
"local_location": null,
|
||||||
|
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json",
|
||||||
|
"sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"commands": [
|
||||||
|
{
|
||||||
|
"route": "experimental-features",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"vectorStore": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "DontWait"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/settings",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"searchableAttributes": [
|
||||||
|
"title",
|
||||||
|
"overview"
|
||||||
|
],
|
||||||
|
"filterableAttributes": [
|
||||||
|
"genres",
|
||||||
|
"release_date"
|
||||||
|
],
|
||||||
|
"sortableAttributes": [
|
||||||
|
"release_date"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/settings",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"embedders": {
|
||||||
|
"default": {
|
||||||
|
"source": "huggingFace"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/documents",
|
||||||
|
"method": "POST",
|
||||||
|
"body": {
|
||||||
|
"asset": "movies-100.json"
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
72
workloads/settings-add-embeddings.json
Normal file
72
workloads/settings-add-embeddings.json
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
{
|
||||||
|
"name": "settings-add-embeddings-hf",
|
||||||
|
"run_count": 5,
|
||||||
|
"extra_cli_args": [
|
||||||
|
"--max-indexing-threads=4"
|
||||||
|
],
|
||||||
|
"assets": {
|
||||||
|
"movies-100.json": {
|
||||||
|
"local_location": null,
|
||||||
|
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json",
|
||||||
|
"sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"commands": [
|
||||||
|
{
|
||||||
|
"route": "experimental-features",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"vectorStore": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "DontWait"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/settings",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"searchableAttributes": [
|
||||||
|
"title",
|
||||||
|
"overview"
|
||||||
|
],
|
||||||
|
"filterableAttributes": [
|
||||||
|
"genres",
|
||||||
|
"release_date"
|
||||||
|
],
|
||||||
|
"sortableAttributes": [
|
||||||
|
"release_date"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "DontWait"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/documents",
|
||||||
|
"method": "POST",
|
||||||
|
"body": {
|
||||||
|
"asset": "movies-100.json"
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/settings",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"embedders": {
|
||||||
|
"default": {
|
||||||
|
"source": "huggingFace",
|
||||||
|
"model": null,
|
||||||
|
"revision": null,
|
||||||
|
"documentTemplate": null,
|
||||||
|
"distribution": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user