mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 04:11:07 +00:00
Compare commits
255 Commits
prototype-
...
prototype-
Author | SHA1 | Date | |
---|---|---|---|
fa2b96b9a5 | |||
19736cefe8 | |||
4fb25b8782 | |||
c83a33017e | |||
be72326c0a | |||
547379abb0 | |||
0b2fff27f2 | |||
3adbc2b942 | |||
fbea721378 | |||
391eb72137 | |||
d78ad51082 | |||
1956045a06 | |||
248aaa6d45 | |||
50d6317ec0 | |||
b734bd9891 | |||
9800d5a103 | |||
7c4ed07617 | |||
3a99a555a2 | |||
9e1b458010 | |||
2aede03bc2 | |||
e741bc1c62 | |||
6425996e36 | |||
eb5cb91da2 | |||
87bba98bd8 | |||
217105b7da | |||
1b7c164a55 | |||
f3f3944469 | |||
93dcbf598d | |||
ac68f33194 | |||
9991152bbe | |||
a4536b1381 | |||
5b51cb04af | |||
3c1a14f1cd | |||
b8e4709dfa | |||
806e5b6899 | |||
61bd2fb7a9 | |||
e0cc775dc4 | |||
12940d79a9 | |||
922a640188 | |||
abbe131084 | |||
d4715e0c4d | |||
11e2a2c1aa | |||
65e49b7092 | |||
e56f160032 | |||
687d92f217 | |||
fb539f61fe | |||
cb4ebe163e | |||
dde3a04679 | |||
13c2c6c16b | |||
21bcf32109 | |||
35e1981488 | |||
e0f712b9d3 | |||
56571f762a | |||
005800634d | |||
976af4fa8f | |||
99fec27788 | |||
afa8f273a8 | |||
4b644f6bc0 | |||
7e259cb0d2 | |||
0fbc1511d7 | |||
c9860c7913 | |||
03ffabe889 | |||
1f4fc9c229 | |||
8cc3c54117 | |||
467b49153d | |||
0c3fa8cbc4 | |||
bddc168d83 | |||
84a36002d7 | |||
c95d68e244 | |||
3b3fa38f27 | |||
170e063b80 | |||
d6c2ee15a9 | |||
6376c342c1 | |||
5b563f872b | |||
ec9b52d608 | |||
34c67ac389 | |||
d050c9b4ae | |||
7dd1226faf | |||
1575456594 | |||
add2ceef67 | |||
548c8247c2 | |||
181ca48482 | |||
5751f5c640 | |||
d32eb11329 | |||
dc07790133 | |||
3d23b388bc | |||
85626cff8e | |||
58dac8af42 | |||
0dbf1a16ff | |||
462b4c0080 | |||
0d4482625a | |||
56a0d91ecd | |||
b366acdae6 | |||
7cb7e37ba8 | |||
33b7c574ea | |||
d3575fb028 | |||
39cbb499c2 | |||
ebef6bc24d | |||
d59b7db8d0 | |||
263e825619 | |||
69354a6144 | |||
b0adc73ce6 | |||
2b5d9042d1 | |||
5b57fbab08 | |||
72d3fa4898 | |||
772964125d | |||
378deb0bef | |||
1f36410541 | |||
b11f85a635 | |||
a2d6dc8571 | |||
ee1701157f | |||
8c649d8061 | |||
492fc086f0 | |||
a2d0c73b41 | |||
264b10ec20 | |||
825257da76 | |||
f8289cd974 | |||
3053e01c05 | |||
b11c2afac0 | |||
9cef800b2a | |||
db2fb86b8b | |||
882ab9cc85 | |||
5a9c96e1db | |||
70ce40828c | |||
688266c83e | |||
6dab826908 | |||
1e2fbc6a42 | |||
523519fdbf | |||
ef6fa10f7a | |||
620fee35f9 | |||
cbaa54cafd | |||
1bccf2079e | |||
1b2ea6cf19 | |||
1ad1fcc8c8 | |||
87610a5f98 | |||
2544bc1416 | |||
ff522c919d | |||
1c39459cf4 | |||
bf0651f23c | |||
5b20e625f3 | |||
bc51d6157a | |||
1b4ff991c0 | |||
4b64c33aa2 | |||
12323d610e | |||
44e9033b3a | |||
4d864f0702 | |||
b10c060bf7 | |||
e507ef5932 | |||
c71b1d33ae | |||
0fc446c62f | |||
0fb6acefc3 | |||
b1d1355b69 | |||
f19332466e | |||
03ddb4f310 | |||
c855cc2721 | |||
da0503ef80 | |||
54f0ee1ed2 | |||
94206b0055 | |||
b40253bf18 | |||
d8bf3f3fc2 | |||
9d59e8011a | |||
dad78cbf8d | |||
4e91707a06 | |||
de10f20732 | |||
ce5647e730 | |||
b57b818b67 | |||
f7ea94e5f4 | |||
be395c7944 | |||
9fedd8101a | |||
54d07a8da3 | |||
53382bb1b8 | |||
5b004a2583 | |||
13416ccbf7 | |||
58690dfb19 | |||
abf424ebfc | |||
dfab6293c9 | |||
fdf3f7f627 | |||
6260cff65f | |||
8e0d9c9a5e | |||
ae4ec8ea55 | |||
652ac3052d | |||
9a2dccc3bc | |||
a35988550c | |||
e78281785c | |||
3c15881818 | |||
73c06d31d9 | |||
290e773d23 | |||
fa6c7f65ca | |||
113527f466 | |||
c534a1b687 | |||
2263dff02b | |||
d651b3ef01 | |||
762b0b47e6 | |||
01d5eedf2f | |||
073f89db79 | |||
8370fbc92b | |||
85f42fbc03 | |||
c6b3c18c85 | |||
bafeb892a7 | |||
8fb221dae3 | |||
5be569e3e2 | |||
946c762d28 | |||
cda6ca1ee6 | |||
696fcf4d18 | |||
476e4d3dbe | |||
576fa9c6da | |||
77dcbff6b2 | |||
544440c363 | |||
a3dae4db9b | |||
ba90a5ec0e | |||
b26dc9aabe | |||
66abac9364 | |||
59f88c14b3 | |||
14832cb324 | |||
04ec293024 | |||
f67ff3a738 | |||
560e8f5613 | |||
2d3f15f82c | |||
40186bf403 | |||
87e3d27878 | |||
6bcf8b4f8c | |||
46aa75abdb | |||
2597bbd107 | |||
e2bc054604 | |||
fcd3a1434d | |||
a82dee21e0 | |||
bc45c1206d | |||
6ae4100f07 | |||
0c47defeee | |||
313b16bec2 | |||
1dd97578a8 | |||
f5ef69293b | |||
1c5705c164 | |||
66c2c82a18 | |||
28a8d0ccda | |||
96be85396d | |||
df9e5c8651 | |||
b541d48847 | |||
8ccf32d1a0 | |||
db1ca21231 | |||
11ea5acff9 | |||
8d77736a67 | |||
748b333161 | |||
17b647dfe5 | |||
2614e7d9ca | |||
e7244aa485 | |||
9cacc82307 | |||
4c6fddb1cb | |||
ca52021079 | |||
ee6f79d60b | |||
e4c24ca6a3 | |||
2bae9550c8 | |||
32c78ac8b1 | |||
5fe7c4545a | |||
2042229927 |
3
.github/workflows/benchmarks-pr.yml
vendored
3
.github/workflows/benchmarks-pr.yml
vendored
@ -90,7 +90,8 @@ jobs:
|
||||
set -x
|
||||
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
|
||||
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
|
||||
echo 'Here are your benchmarks diff 👊' >> body.txt
|
||||
export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
|
||||
echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
|
||||
echo '```' >> body.txt
|
||||
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
|
||||
echo '```' >> body.txt
|
||||
|
2087
Cargo.lock
generated
2087
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -2,6 +2,7 @@
|
||||
resolver = "2"
|
||||
members = [
|
||||
"meilisearch",
|
||||
"meilitool",
|
||||
"meilisearch-types",
|
||||
"meilisearch-auth",
|
||||
"meili-snap",
|
||||
@ -18,7 +19,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.4.1"
|
||||
version = "1.6.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
13
Dockerfile
13
Dockerfile
@ -1,9 +1,9 @@
|
||||
# Compile
|
||||
FROM rust:alpine3.16 AS compiler
|
||||
FROM rust:1.71.1-alpine3.18 AS compiler
|
||||
|
||||
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
||||
|
||||
WORKDIR /meilisearch
|
||||
WORKDIR /
|
||||
|
||||
ARG COMMIT_SHA
|
||||
ARG COMMIT_DATE
|
||||
@ -17,7 +17,7 @@ RUN set -eux; \
|
||||
if [ "$apkArch" = "aarch64" ]; then \
|
||||
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
||||
fi && \
|
||||
cargo build --release
|
||||
cargo build --release -p meilisearch -p meilitool
|
||||
|
||||
# Run
|
||||
FROM alpine:3.16
|
||||
@ -28,9 +28,10 @@ ENV MEILI_SERVER_PROVIDER docker
|
||||
RUN apk update --quiet \
|
||||
&& apk add -q --no-cache libgcc tini curl
|
||||
|
||||
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
||||
# to find.
|
||||
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
|
||||
# and it's easy to find.
|
||||
COPY --from=compiler /target/release/meilisearch /bin/meilisearch
|
||||
COPY --from=compiler /target/release/meilitool /bin/meilitool
|
||||
# To stay compatible with the older version of the container (pre v0.27.0) we're
|
||||
# going to symlink the meilisearch binary in the path to `/meilisearch`
|
||||
RUN ln -s /bin/meilisearch /meilisearch
|
||||
|
@ -6,9 +6,7 @@ use std::path::Path;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use milli::heed::{EnvOpenOptions, RwTxn};
|
||||
use milli::update::{
|
||||
DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
|
||||
};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::Index;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
@ -38,7 +36,7 @@ fn setup_index() -> Index {
|
||||
}
|
||||
|
||||
fn setup_settings<'t>(
|
||||
wtxn: &mut RwTxn<'t, '_>,
|
||||
wtxn: &mut RwTxn<'t>,
|
||||
index: &'t Index,
|
||||
primary_key: &str,
|
||||
searchable_fields: &[&str],
|
||||
@ -266,17 +264,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
delete_documents_from_ids(index, document_ids_to_delete)
|
||||
},
|
||||
)
|
||||
});
|
||||
@ -613,17 +601,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
delete_documents_from_ids(index, document_ids_to_delete)
|
||||
},
|
||||
)
|
||||
});
|
||||
@ -875,22 +853,31 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
delete_documents_from_ids(index, document_ids_to_delete)
|
||||
},
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBitmap>) {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let indexer_config = IndexerConfig::default();
|
||||
for ids in document_ids_to_delete {
|
||||
let config = IndexDocumentsConfig::default();
|
||||
|
||||
let mut builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &indexer_config, config, |_| (), || false)
|
||||
.unwrap();
|
||||
(builder, _) = builder.remove_documents_from_db_no_batch(&ids).unwrap();
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
}
|
||||
|
||||
fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
@ -1112,17 +1099,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
delete_documents_from_ids(index, document_ids_to_delete)
|
||||
},
|
||||
)
|
||||
});
|
||||
@ -1338,17 +1315,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
delete_documents_from_ids(index, document_ids_to_delete)
|
||||
},
|
||||
)
|
||||
});
|
||||
|
@ -129,3 +129,6 @@ experimental_enable_metrics = false
|
||||
|
||||
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
|
||||
experimental_reduce_indexing_memory_usage = false
|
||||
|
||||
# Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
|
||||
# experimental_max_number_of_batched_tasks = 100
|
||||
|
@ -267,6 +267,7 @@ pub(crate) mod test {
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
proximity_precision: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::Set(FacetingSettings {
|
||||
max_values_per_facet: Setting::Set(111),
|
||||
@ -275,6 +276,7 @@ pub(crate) mod test {
|
||||
),
|
||||
}),
|
||||
pagination: Setting::NotSet,
|
||||
embedders: Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
};
|
||||
settings.check()
|
||||
|
@ -345,6 +345,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
dictionary: v6::Setting::NotSet,
|
||||
synonyms: settings.synonyms.into(),
|
||||
distinct_attribute: settings.distinct_attribute.into(),
|
||||
proximity_precision: v6::Setting::NotSet,
|
||||
typo_tolerance: match settings.typo_tolerance {
|
||||
v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
|
||||
enabled: typo.enabled.into(),
|
||||
@ -377,6 +378,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
v5::Setting::Reset => v6::Setting::Reset,
|
||||
v5::Setting::NotSet => v6::Setting::NotSet,
|
||||
},
|
||||
embedders: v6::Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
@ -13,12 +13,12 @@ use crate::{Result, Version};
|
||||
|
||||
mod compat;
|
||||
|
||||
pub(self) mod v1;
|
||||
pub(self) mod v2;
|
||||
pub(self) mod v3;
|
||||
pub(self) mod v4;
|
||||
pub(self) mod v5;
|
||||
pub(self) mod v6;
|
||||
mod v1;
|
||||
mod v2;
|
||||
mod v3;
|
||||
mod v4;
|
||||
mod v5;
|
||||
mod v6;
|
||||
|
||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
|
||||
|
@ -1,24 +0,0 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: spells.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: products.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {
|
||||
"android": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"iphone": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"phone": [
|
||||
"android",
|
||||
"iphone",
|
||||
"smartphone"
|
||||
]
|
||||
},
|
||||
"distinctAttribute": null
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: movies.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness",
|
||||
"release_date:asc"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
@ -56,8 +56,7 @@ pub enum RankingRule {
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
static ASC_DESC_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
|
||||
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(asc|desc)\(([\w_-]+)\)").unwrap());
|
||||
|
||||
impl FromStr for RankingRule {
|
||||
type Err = ();
|
||||
|
@ -564,10 +564,10 @@ pub mod tests {
|
||||
|
||||
#[test]
|
||||
fn parse_escaped() {
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
|
||||
// but it also works with other sequencies
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
|
||||
}
|
||||
|
@ -270,8 +270,8 @@ pub mod test {
|
||||
("aaaa", "", rtok("", "aaaa"), "aaaa"),
|
||||
(r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
|
||||
(r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
|
||||
(r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
|
||||
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
|
||||
(r"aa\\\aa", r#""#, rtok("", r"aa\\\aa"), r"aa\\\aa"),
|
||||
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r"aa\\"), r"aa\\"),
|
||||
(r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
|
||||
(r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
|
||||
];
|
||||
@ -301,12 +301,12 @@ pub mod test {
|
||||
);
|
||||
// simple quote
|
||||
assert_eq!(
|
||||
unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
|
||||
unescape(Span::new_extra(r"Hello \'World\'", ""), '\''),
|
||||
r#"Hello 'World'"#.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
|
||||
r#"Hello \\'World\\'"#.to_string()
|
||||
unescape(Span::new_extra(r"Hello \\\'World\\\'", ""), '\''),
|
||||
r"Hello \\'World\\'".to_string()
|
||||
);
|
||||
}
|
||||
|
||||
@ -335,19 +335,19 @@ pub mod test {
|
||||
("\"cha'nnel\"", "cha'nnel", false),
|
||||
("I'm tamo", "I", false),
|
||||
// escaped thing but not quote
|
||||
(r#""\\""#, r#"\"#, true),
|
||||
(r#""\\\\\\""#, r#"\\\"#, true),
|
||||
(r#""aa\\aa""#, r#"aa\aa"#, true),
|
||||
(r#""\\""#, r"\", true),
|
||||
(r#""\\\\\\""#, r"\\\", true),
|
||||
(r#""aa\\aa""#, r"aa\aa", true),
|
||||
// with double quote
|
||||
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
|
||||
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
|
||||
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
|
||||
(r#""\"\"""#, r#""""#, true),
|
||||
// with simple quote
|
||||
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
|
||||
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
|
||||
(r"'Hello \'world\''", r#"Hello 'world'"#, true),
|
||||
(r"'Hello \\\'world\\\''", r"Hello \'world\'", true),
|
||||
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
|
||||
(r#"'\'\''"#, r#"''"#, true),
|
||||
(r"'\'\''", r#"''"#, true),
|
||||
];
|
||||
|
||||
for (input, expected, escaped) in test_case {
|
||||
|
@ -113,7 +113,7 @@ fn main() {
|
||||
index.documents(&wtxn, res.documents_ids).unwrap();
|
||||
progression.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
wtxn.abort().unwrap();
|
||||
wtxn.abort();
|
||||
});
|
||||
if let err @ Err(_) = handle.join() {
|
||||
stop.store(true, Ordering::Relaxed);
|
||||
|
@ -18,11 +18,12 @@ derive_builder = "0.12.0"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.28"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
puffin = "0.16.0"
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
@ -30,6 +31,7 @@ synchronoise = "1.0.1"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
ureq = "2.9.1"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -24,16 +24,15 @@ use std::fs::{self, File};
|
||||
use std::io::BufWriter;
|
||||
|
||||
use dump::IndexMetadata;
|
||||
use log::{debug, error, info};
|
||||
use log::{debug, error, info, trace};
|
||||
use meilisearch_types::error::Code;
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::heed::CompactionOption;
|
||||
use meilisearch_types::milli::update::{
|
||||
DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||
Settings as MilliSettings,
|
||||
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Filter, BEU32};
|
||||
use meilisearch_types::milli::{self, Filter};
|
||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||
@ -44,7 +43,7 @@ use uuid::Uuid;
|
||||
|
||||
use crate::autobatcher::{self, BatchKind};
|
||||
use crate::utils::{self, swap_index_uid_in_task};
|
||||
use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
|
||||
use crate::{Error, IndexScheduler, MustStopProcessing, ProcessingTasks, Result, TaskId};
|
||||
|
||||
/// Represents a combination of tasks that can all be processed at the same time.
|
||||
///
|
||||
@ -105,12 +104,6 @@ pub(crate) enum IndexOperation {
|
||||
operations: Vec<DocumentOperation>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentDeletion {
|
||||
index_uid: String,
|
||||
// The vec associated with each document deletion tasks.
|
||||
documents: Vec<Vec<String>>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
IndexDocumentDeletionByFilter {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
@ -162,7 +155,6 @@ impl Batch {
|
||||
}
|
||||
Batch::IndexOperation { op, .. } => match op {
|
||||
IndexOperation::DocumentOperation { tasks, .. }
|
||||
| IndexOperation::DocumentDeletion { tasks, .. }
|
||||
| IndexOperation::Settings { tasks, .. }
|
||||
| IndexOperation::DocumentClear { tasks, .. } => {
|
||||
tasks.iter().map(|task| task.uid).collect()
|
||||
@ -227,7 +219,6 @@ impl IndexOperation {
|
||||
pub fn index_uid(&self) -> &str {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { index_uid, .. }
|
||||
| IndexOperation::DocumentDeletion { index_uid, .. }
|
||||
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
|
||||
| IndexOperation::DocumentClear { index_uid, .. }
|
||||
| IndexOperation::Settings { index_uid, .. }
|
||||
@ -243,9 +234,6 @@ impl fmt::Display for IndexOperation {
|
||||
IndexOperation::DocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::DocumentOperation")
|
||||
}
|
||||
IndexOperation::DocumentDeletion { .. } => {
|
||||
f.write_str("IndexOperation::DocumentDeletion")
|
||||
}
|
||||
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
|
||||
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
|
||||
}
|
||||
@ -348,18 +336,27 @@ impl IndexScheduler {
|
||||
BatchKind::DocumentDeletion { deletion_ids } => {
|
||||
let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;
|
||||
|
||||
let mut documents = Vec::new();
|
||||
let mut operations = Vec::with_capacity(tasks.len());
|
||||
let mut documents_counts = Vec::with_capacity(tasks.len());
|
||||
for task in &tasks {
|
||||
match task.kind {
|
||||
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
|
||||
documents.push(documents_ids.clone())
|
||||
operations.push(DocumentOperation::Delete(documents_ids.clone()));
|
||||
documents_counts.push(documents_ids.len() as u64);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentDeletion { index_uid, documents, tasks },
|
||||
op: IndexOperation::DocumentOperation {
|
||||
index_uid,
|
||||
primary_key: None,
|
||||
method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
documents_counts,
|
||||
operations,
|
||||
tasks,
|
||||
},
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
@ -587,7 +584,9 @@ impl IndexScheduler {
|
||||
let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;
|
||||
|
||||
// If autobatching is disabled we only take one task at a time.
|
||||
let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
|
||||
// Otherwise, we take only a maximum of tasks to create batches.
|
||||
let tasks_limit =
|
||||
if self.autobatching_enabled { self.max_number_of_batched_tasks } else { 1 };
|
||||
|
||||
let enqueued = index_tasks
|
||||
.into_iter()
|
||||
@ -718,7 +717,7 @@ impl IndexScheduler {
|
||||
|
||||
// 2. Snapshot the index-scheduler LMDB env
|
||||
//
|
||||
// When we call copy_to_path, LMDB opens a read transaction by itself,
|
||||
// When we call copy_to_file, LMDB opens a read transaction by itself,
|
||||
// we can't provide our own. It is an issue as we would like to know
|
||||
// the update files to copy but new ones can be enqueued between the copy
|
||||
// of the env and the new transaction we open to retrieve the enqueued tasks.
|
||||
@ -731,7 +730,7 @@ impl IndexScheduler {
|
||||
// 2.1 First copy the LMDB env of the index-scheduler
|
||||
let dst = temp_snapshot_dir.path().join("tasks");
|
||||
fs::create_dir_all(&dst)?;
|
||||
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 2.2 Create a read transaction on the index-scheduler
|
||||
let rtxn = self.env.read_txn()?;
|
||||
@ -756,7 +755,7 @@ impl IndexScheduler {
|
||||
let index = self.index_mapper.index(&rtxn, name)?;
|
||||
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
||||
fs::create_dir_all(&dst)?;
|
||||
index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
}
|
||||
|
||||
drop(rtxn);
|
||||
@ -769,7 +768,7 @@ impl IndexScheduler {
|
||||
.map_size(1024 * 1024 * 1024) // 1 GiB
|
||||
.max_dbs(2)
|
||||
.open(&self.auth_path)?;
|
||||
auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 5. Copy and tarball the flat snapshot
|
||||
// 5.1 Find the original name of the database
|
||||
@ -825,6 +824,10 @@ impl IndexScheduler {
|
||||
// 2. dump the tasks
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
for ret in self.all_tasks.iter(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
@ -845,6 +848,9 @@ impl IndexScheduler {
|
||||
|
||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file) = content_file {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
if status == Status::Enqueued {
|
||||
let content_file = self.file_store.get_update(content_file)?;
|
||||
|
||||
@ -884,6 +890,9 @@ impl IndexScheduler {
|
||||
|
||||
// 3.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
let (_id, doc) = ret?;
|
||||
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
index_dumper.push_document(&document)?;
|
||||
@ -903,6 +912,9 @@ impl IndexScheduler {
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
@ -923,6 +935,10 @@ impl IndexScheduler {
|
||||
self.index_mapper.index(&rtxn, &index_uid)?
|
||||
};
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
*self.currently_updating_index.write().unwrap() =
|
||||
Some((index_uid.clone(), index.clone()));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||
index_wtxn.commit()?;
|
||||
@ -1092,7 +1108,7 @@ impl IndexScheduler {
|
||||
for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
|
||||
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
swap_index_uid_in_task(&mut task, (lhs, rhs));
|
||||
self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?;
|
||||
self.all_tasks.put(wtxn, &task_id, &task)?;
|
||||
}
|
||||
|
||||
// 4. remove the task from indexuid = before_name
|
||||
@ -1118,7 +1134,7 @@ impl IndexScheduler {
|
||||
/// The list of processed tasks.
|
||||
fn apply_index_operation<'i>(
|
||||
&self,
|
||||
index_wtxn: &mut RwTxn<'i, '_>,
|
||||
index_wtxn: &mut RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
) -> Result<Vec<Task>> {
|
||||
@ -1186,12 +1202,16 @@ impl IndexScheduler {
|
||||
|
||||
let config = IndexDocumentsConfig { update_method: method, ..Default::default() };
|
||||
|
||||
let embedder_configs = index.embedding_configs(index_wtxn)?;
|
||||
// TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
|
||||
let embedders = self.embedders(embedder_configs)?;
|
||||
|
||||
let mut builder = milli::update::IndexDocuments::new(
|
||||
index_wtxn,
|
||||
index,
|
||||
indexer_config,
|
||||
config,
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| trace!("update: {:?}", indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
|
||||
@ -1204,6 +1224,8 @@ impl IndexScheduler {
|
||||
let (new_builder, user_result) = builder.add_documents(reader)?;
|
||||
builder = new_builder;
|
||||
|
||||
builder = builder.with_embedders(embedders.clone());
|
||||
|
||||
let received_documents =
|
||||
if let Some(Details::DocumentAdditionOrUpdate {
|
||||
received_documents,
|
||||
@ -1238,7 +1260,8 @@ impl IndexScheduler {
|
||||
let (new_builder, user_result) =
|
||||
builder.remove_documents(document_ids)?;
|
||||
builder = new_builder;
|
||||
|
||||
// Uses Invariant: remove documents actually always returns Ok for the inner result
|
||||
let count = user_result.unwrap();
|
||||
let provided_ids =
|
||||
if let Some(Details::DocumentDeletion { provided_ids, .. }) =
|
||||
task.details
|
||||
@ -1249,23 +1272,11 @@ impl IndexScheduler {
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
match user_result {
|
||||
Ok(count) => {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentDeletion {
|
||||
provided_ids,
|
||||
deleted_documents: Some(count),
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
task.status = Status::Failed;
|
||||
task.details = Some(Details::DocumentDeletion {
|
||||
provided_ids,
|
||||
deleted_documents: Some(0),
|
||||
});
|
||||
task.error = Some(milli::Error::from(e).into());
|
||||
}
|
||||
}
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentDeletion {
|
||||
provided_ids,
|
||||
deleted_documents: Some(count),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1280,31 +1291,13 @@ impl IndexScheduler {
|
||||
milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||
builder.reset_primary_key();
|
||||
builder.execute(
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| trace!("update: {:?}", indexing_step),
|
||||
|| must_stop_processing.clone().get(),
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
|
||||
let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?;
|
||||
documents.iter().flatten().for_each(|id| {
|
||||
builder.delete_external_id(id);
|
||||
});
|
||||
|
||||
let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?;
|
||||
|
||||
for (task, documents) in tasks.iter_mut().zip(documents) {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentDeletion {
|
||||
provided_ids: documents.len(),
|
||||
deleted_documents: Some(deleted_documents.min(documents.len() as u64)),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
|
||||
let filter =
|
||||
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
|
||||
@ -1314,7 +1307,13 @@ impl IndexScheduler {
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
|
||||
let deleted_documents = delete_document_by_filter(
|
||||
index_wtxn,
|
||||
filter,
|
||||
self.index_mapper.indexer_config(),
|
||||
self.must_stop_processing.clone(),
|
||||
index,
|
||||
);
|
||||
let original_filter = if let Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: _,
|
||||
@ -1352,6 +1351,9 @@ impl IndexScheduler {
|
||||
|
||||
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
|
||||
let checked_settings = settings.clone().check();
|
||||
if matches!(checked_settings.embedders, milli::update::Setting::Set(_)) {
|
||||
self.features().check_vector("Passing `embedders` in settings")?
|
||||
}
|
||||
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
|
||||
apply_settings_to_builder(&checked_settings, &mut builder);
|
||||
|
||||
@ -1488,10 +1490,9 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
for task in to_delete_tasks.iter() {
|
||||
self.all_tasks.delete(wtxn, &BEU32::new(task))?;
|
||||
self.all_tasks.delete(wtxn, &task)?;
|
||||
}
|
||||
for canceled_by in affected_canceled_by {
|
||||
let canceled_by = BEU32::new(canceled_by);
|
||||
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
|
||||
tasks -= &to_delete_tasks;
|
||||
if tasks.is_empty() {
|
||||
@ -1539,15 +1540,17 @@ impl IndexScheduler {
|
||||
task.details = task.details.map(|d| d.to_failed());
|
||||
self.update_task(wtxn, &task)?;
|
||||
}
|
||||
self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?;
|
||||
self.canceled_by.put(wtxn, &cancel_task_id, &tasks_to_cancel)?;
|
||||
|
||||
Ok(content_files_to_delete)
|
||||
}
|
||||
}
|
||||
|
||||
fn delete_document_by_filter<'a>(
|
||||
wtxn: &mut RwTxn<'a, '_>,
|
||||
wtxn: &mut RwTxn<'a>,
|
||||
filter: &serde_json::Value,
|
||||
indexer_config: &IndexerConfig,
|
||||
must_stop_processing: MustStopProcessing,
|
||||
index: &'a Index,
|
||||
) -> Result<u64> {
|
||||
let filter = Filter::from_json(filter)?;
|
||||
@ -1558,9 +1561,26 @@ fn delete_document_by_filter<'a>(
|
||||
}
|
||||
e => e.into(),
|
||||
})?;
|
||||
let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
|
||||
delete_operation.delete_documents(&candidates);
|
||||
delete_operation.execute().map(|result| result.deleted_documents)?
|
||||
|
||||
let config = IndexDocumentsConfig {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut builder = milli::update::IndexDocuments::new(
|
||||
wtxn,
|
||||
index,
|
||||
indexer_config,
|
||||
config,
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
|
||||
let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?;
|
||||
builder = new_builder;
|
||||
|
||||
let _ = builder.execute()?;
|
||||
count
|
||||
} else {
|
||||
0
|
||||
})
|
||||
|
@ -108,6 +108,8 @@ pub enum Error {
|
||||
TaskDeletionWithEmptyQuery,
|
||||
#[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
|
||||
TaskCancelationWithEmptyQuery,
|
||||
#[error("Aborted task")]
|
||||
AbortedTask,
|
||||
|
||||
#[error(transparent)]
|
||||
Dump(#[from] dump::Error),
|
||||
@ -175,6 +177,7 @@ impl Error {
|
||||
| Error::TaskNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli(_)
|
||||
@ -236,6 +239,9 @@ impl ErrorCode for Error {
|
||||
Error::TaskDatabaseUpdate(_) => Code::Internal,
|
||||
Error::CreateBatch(_) => Code::Internal,
|
||||
|
||||
// This one should never be seen by the end user
|
||||
Error::AbortedTask => Code::Internal,
|
||||
|
||||
#[cfg(test)]
|
||||
Error::PlannedFailure => Code::Internal,
|
||||
}
|
||||
|
@ -56,12 +56,12 @@ impl RoFeatures {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_vector(&self) -> Result<()> {
|
||||
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.vector_store {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Passing `vector` as a query parameter",
|
||||
disabled_action,
|
||||
feature: "vector store",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/677",
|
||||
}
|
||||
|
@ -1,12 +1,8 @@
|
||||
/// the map size to use when we don't succeed in reading it in indexes.
|
||||
const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::heed::flags::Flags;
|
||||
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
|
||||
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
|
||||
use meilisearch_types::milli::Index;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
@ -236,7 +232,7 @@ impl IndexMap {
|
||||
enable_mdb_writemap: bool,
|
||||
map_size_growth: usize,
|
||||
) {
|
||||
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
|
||||
let map_size = index.map_size() + map_size_growth;
|
||||
let closing_event = index.prepare_for_closing();
|
||||
let generation = self.next_generation();
|
||||
self.unavailable.insert(
|
||||
@ -309,7 +305,7 @@ fn create_or_open_index(
|
||||
options.map_size(clamp_to_page_size(map_size));
|
||||
options.max_readers(1024);
|
||||
if enable_mdb_writemap {
|
||||
unsafe { options.flag(Flags::MdbWriteMap) };
|
||||
unsafe { options.flags(EnvFlags::WRITE_MAP) };
|
||||
}
|
||||
|
||||
if let Some((created, updated)) = date {
|
||||
@ -388,7 +384,7 @@ mod tests {
|
||||
|
||||
fn assert_index_size(index: Index, expected: usize) {
|
||||
let expected = clamp_to_page_size(expected);
|
||||
let index_map_size = index.map_size().unwrap();
|
||||
let index_map_size = index.map_size();
|
||||
assert_eq!(index_map_size, expected);
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::Write;
|
||||
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Details, Task};
|
||||
@ -30,15 +30,20 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
index_mapper,
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks: _,
|
||||
puffin_frame: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
auth_path: _,
|
||||
version_file_path: _,
|
||||
webhook_url: _,
|
||||
webhook_authorization_header: _,
|
||||
test_breakpoint_sdr: _,
|
||||
planned_failures: _,
|
||||
run_loop_iteration: _,
|
||||
currently_updating_index: _,
|
||||
embedders: _,
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
@ -114,7 +119,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
|
||||
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
@ -124,10 +129,7 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_date_db(
|
||||
rtxn: &RoTxn,
|
||||
db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
) -> String {
|
||||
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
@ -247,10 +249,7 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>)
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snapshot_canceled_by(
|
||||
rtxn: &RoTxn,
|
||||
db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
|
||||
) -> String {
|
||||
pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
|
@ -27,13 +27,14 @@ mod index_mapper;
|
||||
mod insta_snapshot;
|
||||
mod lru;
|
||||
mod utils;
|
||||
mod uuid_codec;
|
||||
pub mod uuid_codec;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader, Read};
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
@ -45,13 +46,18 @@ use dump::{KindDump, TaskDump, UpdateFile};
|
||||
pub use error::Error;
|
||||
pub use features::RoFeatures;
|
||||
use file_store::FileStore;
|
||||
use flate2::bufread::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
||||
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use puffin::FrameView;
|
||||
use roaring::RoaringBitmap;
|
||||
@ -64,8 +70,7 @@ use uuid::Uuid;
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
|
||||
|
||||
pub(crate) type BEI128 =
|
||||
meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
|
||||
pub(crate) type BEI128 = I128<BE>;
|
||||
|
||||
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
|
||||
///
|
||||
@ -169,8 +174,8 @@ impl ProcessingTasks {
|
||||
}
|
||||
|
||||
/// Set the processing tasks to an empty list
|
||||
fn stop_processing(&mut self) {
|
||||
self.processing = RoaringBitmap::new();
|
||||
fn stop_processing(&mut self) -> RoaringBitmap {
|
||||
std::mem::take(&mut self.processing)
|
||||
}
|
||||
|
||||
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
||||
@ -240,6 +245,10 @@ pub struct IndexSchedulerOptions {
|
||||
pub snapshots_path: PathBuf,
|
||||
/// The path to the folder containing the dumps.
|
||||
pub dumps_path: PathBuf,
|
||||
/// The URL on which we must send the tasks statuses
|
||||
pub webhook_url: Option<String>,
|
||||
/// The value we will send into the Authorization HTTP header on the webhook URL
|
||||
pub webhook_authorization_header: Option<String>,
|
||||
/// The maximum size, in bytes, of the task index.
|
||||
pub task_db_size: usize,
|
||||
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
|
||||
@ -258,6 +267,9 @@ pub struct IndexSchedulerOptions {
|
||||
/// The maximum number of tasks stored in the task queue before starting
|
||||
/// to auto schedule task deletions.
|
||||
pub max_number_of_tasks: usize,
|
||||
/// If the autobatcher is allowed to automatically batch tasks
|
||||
/// it will only batch this defined number of tasks at once.
|
||||
pub max_number_of_batched_tasks: usize,
|
||||
/// The experimental features enabled for this instance.
|
||||
pub instance_features: InstanceTogglableFeatures,
|
||||
}
|
||||
@ -278,7 +290,7 @@ pub struct IndexScheduler {
|
||||
pub(crate) file_store: FileStore,
|
||||
|
||||
// The main database, it contains all the tasks accessible by their Id.
|
||||
pub(crate) all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>>,
|
||||
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
|
||||
|
||||
/// All the tasks ids grouped by their status.
|
||||
// TODO we should not be able to serialize a `Status::Processing` in this database.
|
||||
@ -289,16 +301,16 @@ pub struct IndexScheduler {
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
|
||||
/// Store the tasks that were canceled by a task uid
|
||||
pub(crate) canceled_by: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
|
||||
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
|
||||
|
||||
/// Store the task ids of tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Store the task ids of finished tasks which started being processed at a specific date
|
||||
pub(crate) started_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Store the task ids of tasks which finished at a specific date
|
||||
pub(crate) finished_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
|
||||
/// In charge of creating, opening, storing and returning indexes.
|
||||
pub(crate) index_mapper: IndexMapper,
|
||||
@ -316,6 +328,14 @@ pub struct IndexScheduler {
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
|
||||
/// The maximum number of tasks that will be batched together.
|
||||
pub(crate) max_number_of_batched_tasks: usize,
|
||||
|
||||
/// The webhook url we should send tasks to after processing every batches.
|
||||
pub(crate) webhook_url: Option<String>,
|
||||
/// The Authorization header to send to the webhook URL.
|
||||
pub(crate) webhook_authorization_header: Option<String>,
|
||||
|
||||
/// A frame to output the indexation profiling files to disk.
|
||||
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
|
||||
|
||||
@ -331,6 +351,12 @@ pub struct IndexScheduler {
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
/// A few types of long running batches of tasks that act on a single index set this field
|
||||
/// so that a handle to the index is available from other threads (search) in an optimized manner.
|
||||
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
|
||||
|
||||
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
|
||||
|
||||
// ================= test
|
||||
// The next entry is dedicated to the tests.
|
||||
/// Provide a way to set a breakpoint in multiple part of the scheduler.
|
||||
@ -369,11 +395,16 @@ impl IndexScheduler {
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
|
||||
puffin_frame: self.puffin_frame.clone(),
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
webhook_url: self.webhook_url.clone(),
|
||||
webhook_authorization_header: self.webhook_authorization_header.clone(),
|
||||
currently_updating_index: self.currently_updating_index.clone(),
|
||||
embedders: self.embedders.clone(),
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
||||
#[cfg(test)]
|
||||
@ -466,10 +497,15 @@ impl IndexScheduler {
|
||||
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
|
||||
dumps_path: options.dumps_path,
|
||||
snapshots_path: options.snapshots_path,
|
||||
auth_path: options.auth_path,
|
||||
version_file_path: options.version_file_path,
|
||||
webhook_url: options.webhook_url,
|
||||
webhook_authorization_header: options.webhook_authorization_header,
|
||||
currently_updating_index: Arc::new(RwLock::new(None)),
|
||||
embedders: Default::default(),
|
||||
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr,
|
||||
@ -652,6 +688,13 @@ impl IndexScheduler {
|
||||
/// If you need to fetch information from or perform an action on all indexes,
|
||||
/// see the `try_for_each_index` function.
|
||||
pub fn index(&self, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
self.currently_updating_index.read().unwrap().as_ref()
|
||||
{
|
||||
if current_name == name {
|
||||
return Ok(current_index.clone());
|
||||
}
|
||||
}
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, name)
|
||||
}
|
||||
@ -717,9 +760,7 @@ impl IndexScheduler {
|
||||
if let Some(canceled_by) = &query.canceled_by {
|
||||
let mut all_canceled_tasks = RoaringBitmap::new();
|
||||
for cancel_task_uid in canceled_by {
|
||||
if let Some(canceled_by_uid) =
|
||||
self.canceled_by.get(rtxn, &BEU32::new(*cancel_task_uid))?
|
||||
{
|
||||
if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, cancel_task_uid)? {
|
||||
all_canceled_tasks |= canceled_by_uid;
|
||||
}
|
||||
}
|
||||
@ -970,7 +1011,7 @@ impl IndexScheduler {
|
||||
|
||||
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
|
||||
{
|
||||
return Err(Error::NoSpaceLeftInTaskQueue);
|
||||
}
|
||||
@ -996,7 +1037,7 @@ impl IndexScheduler {
|
||||
// Get rid of the mutability.
|
||||
let task = task;
|
||||
|
||||
self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?;
|
||||
self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
self.update_index(&mut wtxn, index, |bitmap| {
|
||||
@ -1133,6 +1174,9 @@ impl IndexScheduler {
|
||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||
};
|
||||
|
||||
// Reset the currently updating index to relinquish the index handle
|
||||
*self.currently_updating_index.write().unwrap() = None;
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
||||
|
||||
@ -1167,10 +1211,11 @@ impl IndexScheduler {
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli(milli::Error::InternalError(
|
||||
milli::InternalError::AbortedIndexation,
|
||||
))) => {
|
||||
)))
|
||||
| Err(Error::AbortedTask) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::AbortedIndexation);
|
||||
wtxn.abort().map_err(Error::HeedTransaction)?;
|
||||
wtxn.abort();
|
||||
|
||||
// We make sure that we don't call `stop_processing` on the `processing_tasks`,
|
||||
// this is because we want to let the next tick call `create_next_batch` and keep
|
||||
@ -1191,7 +1236,7 @@ impl IndexScheduler {
|
||||
let index_uid = index_uid.unwrap();
|
||||
// fixme: handle error more gracefully? not sure when this could happen
|
||||
self.index_mapper.resize_index(&wtxn, &index_uid)?;
|
||||
wtxn.abort().map_err(Error::HeedTransaction)?;
|
||||
wtxn.abort();
|
||||
|
||||
return Ok(TickOutcome::TickAgain(0));
|
||||
}
|
||||
@ -1223,19 +1268,99 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
let processed = self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
// We shouldn't crash the tick function if we can't send data to the webhook.
|
||||
let _ = self.notify_webhook(&processed);
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::AfterProcessing);
|
||||
|
||||
Ok(TickOutcome::TickAgain(processed_tasks))
|
||||
}
|
||||
|
||||
/// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
|
||||
fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
|
||||
if let Some(ref url) = self.webhook_url {
|
||||
struct TaskReader<'a, 'b> {
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
tasks: &'b mut roaring::bitmap::Iter<'b>,
|
||||
buffer: Vec<u8>,
|
||||
written: usize,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Read for TaskReader<'a, 'b> {
|
||||
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
if self.buffer.is_empty() {
|
||||
match self.tasks.next() {
|
||||
None => return Ok(0),
|
||||
Some(task_id) => {
|
||||
let task = self
|
||||
.index_scheduler
|
||||
.get_task(self.rtxn, task_id)
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
|
||||
.ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
Error::CorruptedTaskQueue,
|
||||
)
|
||||
})?;
|
||||
|
||||
serde_json::to_writer(
|
||||
&mut self.buffer,
|
||||
&TaskView::from_task(&task),
|
||||
)?;
|
||||
self.buffer.push(b'\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut to_write = &self.buffer[self.written..];
|
||||
let wrote = io::copy(&mut to_write, &mut buf)?;
|
||||
self.written += wrote as usize;
|
||||
|
||||
// we wrote everything and must refresh our buffer on the next call
|
||||
if self.written == self.buffer.len() {
|
||||
self.written = 0;
|
||||
self.buffer.clear();
|
||||
}
|
||||
|
||||
Ok(wrote as usize)
|
||||
}
|
||||
}
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
let task_reader = TaskReader {
|
||||
rtxn: &rtxn,
|
||||
index_scheduler: self,
|
||||
tasks: &mut updated.into_iter(),
|
||||
buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
|
||||
written: 0,
|
||||
};
|
||||
|
||||
// let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
|
||||
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
|
||||
let request = ureq::post(url).set("Content-Encoding", "gzip");
|
||||
let request = match &self.webhook_authorization_header {
|
||||
Some(header) => request.set("Authorization", header),
|
||||
None => request,
|
||||
};
|
||||
|
||||
if let Err(e) = request.send(reader) {
|
||||
log::error!("While sending data to the webhook: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register a task to cleanup the task queue if needed
|
||||
fn cleanup_task_queue(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
@ -1310,6 +1435,40 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: consider using a type alias or a struct embedder/template
|
||||
pub fn embedders(
|
||||
&self,
|
||||
embedding_configs: Vec<(String, milli::vector::EmbeddingConfig)>,
|
||||
) -> Result<EmbeddingConfigs> {
|
||||
let res: Result<_> = embedding_configs
|
||||
.into_iter()
|
||||
.map(|(name, milli::vector::EmbeddingConfig { embedder_options, prompt })| {
|
||||
let prompt =
|
||||
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
|
||||
// optimistically return existing embedder
|
||||
{
|
||||
let embedders = self.embedders.read().unwrap();
|
||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||
return Ok((name, (embedder.clone(), prompt)));
|
||||
}
|
||||
}
|
||||
|
||||
// add missing embedder
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(embedder_options.clone())
|
||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||
.map_err(meilisearch_types::milli::Error::from)?,
|
||||
);
|
||||
{
|
||||
let mut embedders = self.embedders.write().unwrap();
|
||||
embedders.insert(embedder_options, embedder.clone());
|
||||
}
|
||||
Ok((name, (embedder, prompt)))
|
||||
})
|
||||
.collect();
|
||||
res.map(EmbeddingConfigs::new)
|
||||
}
|
||||
|
||||
/// Blocks the thread until the test handle asks to progress to/through this breakpoint.
|
||||
///
|
||||
/// Two messages are sent through the channel for each breakpoint.
|
||||
@ -1337,7 +1496,7 @@ impl IndexScheduler {
|
||||
|
||||
pub struct Dump<'a> {
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
wtxn: RwTxn<'a, 'a>,
|
||||
wtxn: RwTxn<'a>,
|
||||
|
||||
indexes: HashMap<String, RoaringBitmap>,
|
||||
statuses: HashMap<Status, RoaringBitmap>,
|
||||
@ -1452,7 +1611,7 @@ impl<'a> Dump<'a> {
|
||||
},
|
||||
};
|
||||
|
||||
self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?;
|
||||
self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
match self.indexes.get_mut(index) {
|
||||
@ -1494,8 +1653,8 @@ impl<'a> Dump<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid);
|
||||
self.statuses.entry(task.status).or_default().insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
@ -1615,6 +1774,8 @@ mod tests {
|
||||
indexes_path: tempdir.path().join("indexes"),
|
||||
snapshots_path: tempdir.path().join("snapshots"),
|
||||
dumps_path: tempdir.path().join("dumps"),
|
||||
webhook_url: None,
|
||||
webhook_authorization_header: None,
|
||||
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||
enable_mdb_writemap: false,
|
||||
@ -1623,6 +1784,7 @@ mod tests {
|
||||
indexer_config,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: usize::MAX,
|
||||
instance_features: Default::default(),
|
||||
};
|
||||
configuration(&mut options);
|
||||
@ -4323,4 +4485,26 @@ mod tests {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_processing_dump() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
|
||||
let dump_cancellation = KindWithContent::TaskCancelation {
|
||||
query: "cancel dump".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0]),
|
||||
};
|
||||
let _ = index_scheduler.register(dump_creation).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
|
||||
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
|
||||
|
||||
let _ = index_scheduler.register(dump_cancellation).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
|
||||
|
||||
snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,35 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,45 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [1,]
|
||||
canceled [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"taskCancelation" [1,]
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
1 [0,]
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,38 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"taskCancelation" [1,]
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -3,9 +3,9 @@
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ops::Bound;
|
||||
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
|
||||
use meilisearch_types::heed::types::DecodeIgnore;
|
||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::milli::CboRoaringBitmapCodec;
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
@ -18,7 +18,7 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
|
||||
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k.get() + 1))
|
||||
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
|
||||
}
|
||||
|
||||
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
|
||||
@ -26,7 +26,7 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
|
||||
Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
|
||||
Ok(self.all_tasks.get(rtxn, &task_id)?)
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
|
||||
@ -88,7 +88,7 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -169,11 +169,11 @@ impl IndexScheduler {
|
||||
|
||||
pub(crate) fn insert_task_datetime(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
time: OffsetDateTime,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
let timestamp = BEI128::new(time.unix_timestamp_nanos());
|
||||
let timestamp = time.unix_timestamp_nanos();
|
||||
let mut task_ids = database.get(wtxn, ×tamp)?.unwrap_or_default();
|
||||
task_ids.insert(task_id);
|
||||
database.put(wtxn, ×tamp, &RoaringBitmap::from_iter(task_ids))?;
|
||||
@ -182,11 +182,11 @@ pub(crate) fn insert_task_datetime(
|
||||
|
||||
pub(crate) fn remove_task_datetime(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
time: OffsetDateTime,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
let timestamp = BEI128::new(time.unix_timestamp_nanos());
|
||||
let timestamp = time.unix_timestamp_nanos();
|
||||
if let Some(mut existing) = database.get(wtxn, ×tamp)? {
|
||||
existing.remove(task_id);
|
||||
if existing.is_empty() {
|
||||
@ -202,7 +202,7 @@ pub(crate) fn remove_task_datetime(
|
||||
pub(crate) fn keep_tasks_within_datetimes(
|
||||
rtxn: &RoTxn,
|
||||
tasks: &mut RoaringBitmap,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
after: Option<OffsetDateTime>,
|
||||
before: Option<OffsetDateTime>,
|
||||
) -> Result<()> {
|
||||
@ -213,8 +213,8 @@ pub(crate) fn keep_tasks_within_datetimes(
|
||||
(Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
|
||||
};
|
||||
let mut collected_task_ids = RoaringBitmap::new();
|
||||
let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
|
||||
let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
|
||||
let start = map_bound(start, |b| b.unix_timestamp_nanos());
|
||||
let end = map_bound(end, |b| b.unix_timestamp_nanos());
|
||||
let iter = database.range(rtxn, &(start, end))?;
|
||||
for r in iter {
|
||||
let (_timestamp, task_ids) = r?;
|
||||
@ -337,8 +337,6 @@ impl IndexScheduler {
|
||||
let rtxn = self.env.read_txn().unwrap();
|
||||
for task in self.all_tasks.iter(&rtxn).unwrap() {
|
||||
let (task_id, task) = task.unwrap();
|
||||
let task_id = task_id.get();
|
||||
|
||||
let task_index_uid = task.index_uid().map(ToOwned::to_owned);
|
||||
|
||||
let Task {
|
||||
@ -361,16 +359,13 @@ impl IndexScheduler {
|
||||
.unwrap()
|
||||
.contains(task.uid));
|
||||
}
|
||||
let db_enqueued_at = self
|
||||
.enqueued_at
|
||||
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let db_enqueued_at =
|
||||
self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap();
|
||||
assert!(db_enqueued_at.contains(task_id));
|
||||
if let Some(started_at) = started_at {
|
||||
let db_started_at = self
|
||||
.started_at
|
||||
.get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
|
||||
.get(&rtxn, &started_at.unix_timestamp_nanos())
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert!(db_started_at.contains(task_id));
|
||||
@ -378,7 +373,7 @@ impl IndexScheduler {
|
||||
if let Some(finished_at) = finished_at {
|
||||
let db_finished_at = self
|
||||
.finished_at
|
||||
.get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
|
||||
.get(&rtxn, &finished_at.unix_timestamp_nanos())
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert!(db_finished_at.contains(task_id));
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilisearch_types::heed::{BytesDecode, BytesEncode};
|
||||
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// A heed codec for value of struct Uuid.
|
||||
@ -10,15 +10,15 @@ pub struct UuidCodec;
|
||||
impl<'a> BytesDecode<'a> for UuidCodec {
|
||||
type DItem = Uuid;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
bytes.try_into().ok().map(Uuid::from_bytes)
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesEncode<'_> for UuidCodec {
|
||||
type EItem = Uuid;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
Some(Cow::Borrowed(item.as_bytes()))
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item.as_bytes()))
|
||||
}
|
||||
}
|
||||
|
@ -4,17 +4,20 @@ use std::collections::HashSet;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use std::result::Result as StdResult;
|
||||
use std::str;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use hmac::{Hmac, Mac};
|
||||
use meilisearch_types::heed::BoxedError;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::KeyId;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
|
||||
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
|
||||
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
|
||||
use sha2::Sha256;
|
||||
use thiserror::Error;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::fmt::Hyphenated;
|
||||
use uuid::Uuid;
|
||||
@ -30,7 +33,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat
|
||||
#[derive(Clone)]
|
||||
pub struct HeedAuthStore {
|
||||
env: Arc<Env>,
|
||||
keys: Database<ByteSlice, SerdeJson<Key>>,
|
||||
keys: Database<Bytes, SerdeJson<Key>>,
|
||||
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
|
||||
should_close_on_drop: bool,
|
||||
}
|
||||
@ -276,7 +279,7 @@ impl HeedAuthStore {
|
||||
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
|
||||
let mut iter = self
|
||||
.action_keyid_index_expiration
|
||||
.remap_types::<ByteSlice, DecodeIgnore>()
|
||||
.remap_types::<Bytes, DecodeIgnore>()
|
||||
.prefix_iter_mut(wtxn, key.as_bytes())?;
|
||||
while iter.next().transpose()?.is_some() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
@ -294,23 +297,24 @@ pub struct KeyIdActionCodec;
|
||||
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
|
||||
type DItem = (KeyId, Action, Option<&'a [u8]>);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (key_id_bytes, action_bytes) = try_split_array_at(bytes)?;
|
||||
let (action_bytes, index) = match try_split_array_at(action_bytes)? {
|
||||
(action, []) => (action, None),
|
||||
(action, index) => (action, Some(index)),
|
||||
};
|
||||
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
|
||||
let (key_id_bytes, action_bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
|
||||
let (&action_byte, index) =
|
||||
match try_split_array_at(action_bytes).ok_or(SliceTooShortError)? {
|
||||
([action], []) => (action, None),
|
||||
([action], index) => (action, Some(index)),
|
||||
};
|
||||
let key_id = Uuid::from_bytes(*key_id_bytes);
|
||||
let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?;
|
||||
let action = Action::from_repr(action_byte).ok_or(InvalidActionError { action_byte })?;
|
||||
|
||||
Some((key_id, action, index))
|
||||
Ok((key_id, action, index))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
|
||||
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
|
||||
|
||||
fn bytes_encode((key_id, action, index): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
bytes.extend_from_slice(key_id.as_bytes());
|
||||
@ -320,10 +324,20 @@ impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
|
||||
bytes.extend_from_slice(index);
|
||||
}
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("the slice is too short")]
|
||||
pub struct SliceTooShortError;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("cannot construct a valid Action from {action_byte}")]
|
||||
pub struct InvalidActionError {
|
||||
pub action_byte: u8,
|
||||
}
|
||||
|
||||
pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String {
|
||||
// format uid as hyphenated allowing user to generate their own keys.
|
||||
let mut uid_buffer = [0; Hyphenated::LENGTH];
|
||||
|
@ -15,7 +15,7 @@ actix-web = { version = "4.3.1", default-features = false }
|
||||
anyhow = "1.0.70"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.2.1"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
deserr = { version = "0.6.0", features = ["actix-web"] }
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"]
|
||||
japanese = ["milli/japanese"]
|
||||
# thai specialized tokenization
|
||||
thai = ["milli/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["milli/greek"]
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["milli/khmer"]
|
||||
|
@ -188,3 +188,4 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError);
|
||||
merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
||||
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
||||
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
||||
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
||||
|
@ -222,6 +222,8 @@ InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidEmbedder , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidHybridQuery , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||
@ -233,6 +235,7 @@ InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
|
||||
@ -252,9 +255,11 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
@ -294,15 +299,18 @@ MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||
MissingIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||
MissingMasterKey , Auth , UNAUTHORIZED ;
|
||||
MissingPayload , InvalidRequest , BAD_REQUEST ;
|
||||
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
|
||||
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
|
||||
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
|
||||
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
|
||||
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
|
||||
TaskNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
|
||||
TooManyVectors , InvalidRequest , BAD_REQUEST ;
|
||||
UnretrievableDocument , Internal , BAD_REQUEST ;
|
||||
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE
|
||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST
|
||||
}
|
||||
|
||||
impl ErrorCode for JoinError {
|
||||
@ -324,7 +332,6 @@ impl ErrorCode for milli::Error {
|
||||
UserError::SerdeJson(_)
|
||||
| UserError::InvalidLmdbOpenOptions
|
||||
| UserError::DocumentLimitReached
|
||||
| UserError::AccessingSoftDeletedDocument { .. }
|
||||
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
|
||||
UserError::InvalidStoreFile => Code::InvalidStoreFile,
|
||||
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
|
||||
@ -336,6 +343,10 @@ impl ErrorCode for milli::Error {
|
||||
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
|
||||
Code::InvalidDocumentId
|
||||
}
|
||||
UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
|
||||
UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||
UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
|
||||
Code::IndexPrimaryKeyMultipleCandidatesFound
|
||||
@ -353,11 +364,15 @@ impl ErrorCode for milli::Error {
|
||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
|
||||
UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
|
||||
UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
|
||||
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
||||
UserError::SortError(_) => Code::InvalidSearchSort,
|
||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||
Code::InvalidSettingsTypoTolerance
|
||||
}
|
||||
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
|
||||
UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -387,11 +402,11 @@ impl ErrorCode for HeedError {
|
||||
HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile,
|
||||
HeedError::Io(e) => e.error_code(),
|
||||
HeedError::Mdb(_)
|
||||
| HeedError::Encoding
|
||||
| HeedError::Decoding
|
||||
| HeedError::Encoding(_)
|
||||
| HeedError::Decoding(_)
|
||||
| HeedError::InvalidDatabaseTyping
|
||||
| HeedError::DatabaseClosing
|
||||
| HeedError::BadOpenOptions => Code::Internal,
|
||||
| HeedError::BadOpenOptions { .. } => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -445,6 +460,15 @@ impl fmt::Display for DeserrParseIntError {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
|
@ -9,6 +9,7 @@ pub mod index_uid_pattern;
|
||||
pub mod keys;
|
||||
pub mod settings;
|
||||
pub mod star_or;
|
||||
pub mod task_view;
|
||||
pub mod tasks;
|
||||
pub mod versioning;
|
||||
pub use milli::{heed, Index};
|
||||
|
@ -8,6 +8,7 @@ use std::str::FromStr;
|
||||
|
||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||
use fst::IntoStreamer;
|
||||
use milli::proximity::ProximityPrecision;
|
||||
use milli::update::Setting;
|
||||
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
@ -186,6 +187,9 @@ pub struct Settings<T> {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
|
||||
pub distinct_attribute: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
|
||||
pub proximity_precision: Setting<ProximityPrecisionView>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
|
||||
pub typo_tolerance: Setting<TypoSettings>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
@ -195,6 +199,10 @@ pub struct Settings<T> {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsPagination>)]
|
||||
pub pagination: Setting<PaginationSettings>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
|
||||
pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
|
||||
|
||||
#[serde(skip)]
|
||||
#[deserr(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
@ -214,9 +222,11 @@ impl Settings<Checked> {
|
||||
separator_tokens: Setting::Reset,
|
||||
dictionary: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
proximity_precision: Setting::Reset,
|
||||
typo_tolerance: Setting::Reset,
|
||||
faceting: Setting::Reset,
|
||||
pagination: Setting::Reset,
|
||||
embedders: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -234,9 +244,11 @@ impl Settings<Checked> {
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
proximity_precision,
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
embedders,
|
||||
..
|
||||
} = self;
|
||||
|
||||
@ -252,9 +264,11 @@ impl Settings<Checked> {
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
proximity_precision,
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
embedders,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -296,9 +310,11 @@ impl Settings<Unchecked> {
|
||||
separator_tokens: self.separator_tokens,
|
||||
dictionary: self.dictionary,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
proximity_precision: self.proximity_precision,
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
faceting: self.faceting,
|
||||
pagination: self.pagination,
|
||||
embedders: self.embedders,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -390,6 +406,12 @@ pub fn apply_settings_to_builder(
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.proximity_precision {
|
||||
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
|
||||
Setting::Reset => builder.reset_proximity_precision(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.typo_tolerance {
|
||||
Setting::Set(ref value) => {
|
||||
match value.enabled {
|
||||
@ -476,6 +498,12 @@ pub fn apply_settings_to_builder(
|
||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.embedders.clone() {
|
||||
Setting::Set(value) => builder.set_embedder_settings(value),
|
||||
Setting::Reset => builder.reset_embedder_settings(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(
|
||||
@ -509,6 +537,8 @@ pub fn settings(
|
||||
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||
|
||||
let proximity_precision = index.proximity_precision(rtxn)?.map(ProximityPrecisionView::from);
|
||||
|
||||
let synonyms = index.user_defined_synonyms(rtxn)?;
|
||||
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
@ -532,7 +562,10 @@ pub fn settings(
|
||||
|
||||
let faceting = FacetingSettings {
|
||||
max_values_per_facet: Setting::Set(
|
||||
index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||
index
|
||||
.max_values_per_facet(rtxn)?
|
||||
.map(|x| x as usize)
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||
),
|
||||
sort_facet_values_by: Setting::Set(
|
||||
index
|
||||
@ -545,10 +578,19 @@ pub fn settings(
|
||||
|
||||
let pagination = PaginationSettings {
|
||||
max_total_hits: Setting::Set(
|
||||
index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||
index
|
||||
.pagination_max_total_hits(rtxn)?
|
||||
.map(|x| x as usize)
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||
),
|
||||
};
|
||||
|
||||
let embedders = index
|
||||
.embedding_configs(rtxn)?
|
||||
.into_iter()
|
||||
.map(|(name, config)| (name, Setting::Set(config.into())))
|
||||
.collect();
|
||||
|
||||
Ok(Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
@ -569,10 +611,15 @@ pub fn settings(
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
proximity_precision: match proximity_precision {
|
||||
Some(precision) => Setting::Set(precision),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
synonyms: Setting::Set(synonyms),
|
||||
typo_tolerance: Setting::Set(typo_tolerance),
|
||||
faceting: Setting::Set(faceting),
|
||||
pagination: Setting::Set(pagination),
|
||||
embedders: Setting::Set(embedders),
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
@ -673,6 +720,31 @@ impl From<RankingRuleView> for Criterion {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub enum ProximityPrecisionView {
|
||||
ByWord,
|
||||
ByAttribute,
|
||||
}
|
||||
|
||||
impl From<ProximityPrecision> for ProximityPrecisionView {
|
||||
fn from(value: ProximityPrecision) -> Self {
|
||||
match value {
|
||||
ProximityPrecision::ByWord => ProximityPrecisionView::ByWord,
|
||||
ProximityPrecision::ByAttribute => ProximityPrecisionView::ByAttribute,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl From<ProximityPrecisionView> for ProximityPrecision {
|
||||
fn from(value: ProximityPrecisionView) -> Self {
|
||||
match value {
|
||||
ProximityPrecisionView::ByWord => ProximityPrecision::ByWord,
|
||||
ProximityPrecisionView::ByAttribute => ProximityPrecision::ByAttribute,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use super::*;
|
||||
@ -692,9 +764,11 @@ pub(crate) mod test {
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
proximity_precision: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::NotSet,
|
||||
pagination: Setting::NotSet,
|
||||
embedders: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
@ -716,9 +790,11 @@ pub(crate) mod test {
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
proximity_precision: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::NotSet,
|
||||
pagination: Setting::NotSet,
|
||||
embedders: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
|
139
meilisearch-types/src/task_view.rs
Normal file
139
meilisearch-types/src/task_view.rs
Normal file
@ -0,0 +1,139 @@
|
||||
use serde::Serialize;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
pub uid: TaskId,
|
||||
#[serde(default)]
|
||||
pub index_uid: Option<String>,
|
||||
pub status: Status,
|
||||
#[serde(rename = "type")]
|
||||
pub kind: Kind,
|
||||
pub canceled_by: Option<TaskId>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub details: Option<DetailsView>,
|
||||
pub error: Option<ResponseError>,
|
||||
#[serde(serialize_with = "serialize_duration", default)]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub started_at: Option<OffsetDateTime>,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
pub fn from_task(task: &Task) -> TaskView {
|
||||
TaskView {
|
||||
uid: task.uid,
|
||||
index_uid: task.index_uid().map(ToOwned::to_owned),
|
||||
status: task.status,
|
||||
kind: task.kind.as_kind(),
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details.clone().map(DetailsView::from),
|
||||
error: task.error.clone(),
|
||||
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DetailsView {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub received_documents: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub indexed_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub primary_key: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub provided_ids: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub matched_tasks: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub canceled_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub original_filter: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub dump_uid: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(flatten)]
|
||||
pub settings: Option<Box<Settings<Unchecked>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub swaps: Option<Vec<IndexSwap>>,
|
||||
}
|
||||
|
||||
impl From<Details> for DetailsView {
|
||||
fn from(details: Details) -> Self {
|
||||
match details {
|
||||
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
|
||||
DetailsView {
|
||||
received_documents: Some(received_documents),
|
||||
indexed_documents: Some(indexed_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::SettingsUpdate { settings } => {
|
||||
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexInfo { primary_key } => {
|
||||
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
|
||||
}
|
||||
Details::DocumentDeletion {
|
||||
provided_ids: received_document_ids,
|
||||
deleted_documents,
|
||||
} => DetailsView {
|
||||
provided_ids: Some(received_document_ids),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
original_filter: Some(None),
|
||||
..DetailsView::default()
|
||||
},
|
||||
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
|
||||
DetailsView {
|
||||
provided_ids: Some(0),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::ClearAll { deleted_documents } => {
|
||||
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
|
||||
}
|
||||
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
canceled_tasks: Some(canceled_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
deleted_tasks: Some(deleted_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::Dump { dump_uid } => {
|
||||
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexSwap { swaps } => {
|
||||
DetailsView { swaps: Some(swaps), ..Default::default() }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -39,7 +39,7 @@ byte-unit = { version = "4.0.19", default-features = false, features = [
|
||||
bytes = "1.4.0"
|
||||
clap = { version = "4.2.1", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
deserr = { version = "0.6.0", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.8.1"
|
||||
env_logger = "0.10.0"
|
||||
@ -104,6 +104,7 @@ walkdir = "2.3.3"
|
||||
yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.2.0"
|
||||
url = { version = "2.5.0", features = ["serde"] }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.8.0"
|
||||
@ -150,6 +151,7 @@ hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
thai = ["meilisearch-types/thai"]
|
||||
greek = ["meilisearch-types/greek"]
|
||||
khmer = ["meilisearch-types/khmer"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
|
||||
|
@ -36,7 +36,7 @@ use crate::routes::{create_all_stats, Stats};
|
||||
use crate::search::{
|
||||
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::Opt;
|
||||
|
||||
@ -251,6 +251,7 @@ struct Infos {
|
||||
env: String,
|
||||
experimental_enable_metrics: bool,
|
||||
experimental_reduce_indexing_memory_usage: bool,
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
dump_dir: bool,
|
||||
@ -263,6 +264,8 @@ struct Infos {
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
http_addr: bool,
|
||||
http_payload_size_limit: Byte,
|
||||
task_queue_webhook: bool,
|
||||
task_webhook_authorization_header: bool,
|
||||
log_level: String,
|
||||
max_indexing_memory: MaxMemory,
|
||||
max_indexing_threads: MaxThreads,
|
||||
@ -285,9 +288,12 @@ impl From<Opt> for Infos {
|
||||
db_path,
|
||||
experimental_enable_metrics,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
http_addr,
|
||||
master_key: _,
|
||||
env,
|
||||
task_webhook_url,
|
||||
task_webhook_authorization_header,
|
||||
max_index_size: _,
|
||||
max_task_db_size: _,
|
||||
http_payload_size_limit,
|
||||
@ -340,6 +346,9 @@ impl From<Opt> for Infos {
|
||||
ignore_snapshot_if_db_exists,
|
||||
http_addr: http_addr != default_http_addr(),
|
||||
http_payload_size_limit,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
task_queue_webhook: task_webhook_url.is_some(),
|
||||
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
|
||||
log_level: log_level.to_string(),
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
@ -583,6 +592,11 @@ pub struct SearchAggregator {
|
||||
// vector
|
||||
// The maximum number of floats in a vector request
|
||||
max_vector_size: usize,
|
||||
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
|
||||
semantic_ratio: bool,
|
||||
// Whether a non-default embedder was specified
|
||||
embedder: bool,
|
||||
hybrid: bool,
|
||||
|
||||
// every time a search is done, we increment the counter linked to the used settings
|
||||
matching_strategy: HashMap<String, usize>,
|
||||
@ -636,6 +650,7 @@ impl SearchAggregator {
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@ -709,6 +724,12 @@ impl SearchAggregator {
|
||||
ret.show_ranking_score = *show_ranking_score;
|
||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||
|
||||
if let Some(hybrid) = hybrid {
|
||||
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
||||
ret.embedder = hybrid.embedder.is_some();
|
||||
ret.hybrid = true;
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
@ -762,6 +783,9 @@ impl SearchAggregator {
|
||||
facets_total_number_of_facets,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
embedder,
|
||||
hybrid,
|
||||
} = other;
|
||||
|
||||
if self.timestamp.is_none() {
|
||||
@ -807,6 +831,9 @@ impl SearchAggregator {
|
||||
|
||||
// vector
|
||||
self.max_vector_size = self.max_vector_size.max(max_vector_size);
|
||||
self.semantic_ratio |= semantic_ratio;
|
||||
self.hybrid |= hybrid;
|
||||
self.embedder |= embedder;
|
||||
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(max_limit);
|
||||
@ -875,6 +902,9 @@ impl SearchAggregator {
|
||||
facets_total_number_of_facets,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
semantic_ratio,
|
||||
embedder,
|
||||
hybrid,
|
||||
} = self;
|
||||
|
||||
if total_received == 0 {
|
||||
@ -914,6 +944,11 @@ impl SearchAggregator {
|
||||
"vector": {
|
||||
"max_vector_size": max_vector_size,
|
||||
},
|
||||
"hybrid": {
|
||||
"enabled": hybrid,
|
||||
"semantic_ratio": semantic_ratio,
|
||||
"embedder": embedder,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": max_limit,
|
||||
"max_offset": max_offset,
|
||||
@ -1009,6 +1044,7 @@ impl MultiSearchAggregator {
|
||||
crop_marker: _,
|
||||
matching_strategy: _,
|
||||
attributes_to_search_on: _,
|
||||
hybrid: _,
|
||||
} = query;
|
||||
|
||||
index_uid.as_str()
|
||||
@ -1155,6 +1191,7 @@ impl FacetSearchAggregator {
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@ -1168,7 +1205,8 @@ impl FacetSearchAggregator {
|
||||
|| vector.is_some()
|
||||
|| filter.is_some()
|
||||
|| *matching_strategy != MatchingStrategy::default()
|
||||
|| attributes_to_search_on.is_some();
|
||||
|| attributes_to_search_on.is_some()
|
||||
|| hybrid.is_some();
|
||||
|
||||
ret
|
||||
}
|
||||
|
@ -51,6 +51,8 @@ pub enum MeilisearchHttpError {
|
||||
DocumentFormat(#[from] DocumentFormatError),
|
||||
#[error(transparent)]
|
||||
Join(#[from] JoinError),
|
||||
#[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
|
||||
MissingSearchHybrid,
|
||||
}
|
||||
|
||||
impl ErrorCode for MeilisearchHttpError {
|
||||
@ -74,6 +76,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -228,12 +228,15 @@ fn open_or_create_database_unchecked(
|
||||
indexes_path: opt.db_path.join("indexes"),
|
||||
snapshots_path: opt.snapshot_dir.clone(),
|
||||
dumps_path: opt.dump_dir.clone(),
|
||||
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
|
||||
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
|
||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features,
|
||||
@ -362,7 +365,7 @@ fn import_dump(
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
},
|
||||
|indexing_step| log::debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| log::trace!("update: {:?}", indexing_step),
|
||||
|| false,
|
||||
)?;
|
||||
|
||||
@ -397,6 +400,7 @@ pub fn configure_data(
|
||||
.app_data(web::Data::from(analytics))
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
.limit(http_payload_size_limit)
|
||||
.content_type(|mime| mime == mime::APPLICATION_JSON)
|
||||
.error_handler(|err, req: &HttpRequest| match err {
|
||||
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
|
||||
|
@ -19,7 +19,11 @@ static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
/// does all the setup before meilisearch is launched
|
||||
fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
let mut log_builder = env_logger::Builder::new();
|
||||
log_builder.parse_filters(&opt.log_level.to_string());
|
||||
let log_filters = format!(
|
||||
"{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn",
|
||||
opt.log_level
|
||||
);
|
||||
log_builder.parse_filters(&log_filters);
|
||||
|
||||
log_builder.init();
|
||||
|
||||
|
@ -21,6 +21,7 @@ use rustls::RootCertStore;
|
||||
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sysinfo::{RefreshKind, System, SystemExt};
|
||||
use url::Url;
|
||||
|
||||
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
|
||||
|
||||
@ -28,6 +29,8 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
|
||||
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
|
||||
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
|
||||
const MEILI_ENV: &str = "MEILI_ENV";
|
||||
const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
|
||||
const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
|
||||
#[cfg(feature = "analytics")]
|
||||
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
|
||||
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
|
||||
@ -51,6 +54,8 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
|
||||
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
|
||||
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
|
||||
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
@ -154,6 +159,14 @@ pub struct Opt {
|
||||
#[serde(default = "default_env")]
|
||||
pub env: String,
|
||||
|
||||
/// Called whenever a task finishes so a third party can be notified.
|
||||
#[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
|
||||
pub task_webhook_url: Option<Url>,
|
||||
|
||||
/// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified.
|
||||
#[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
|
||||
pub task_webhook_authorization_header: Option<String>,
|
||||
|
||||
/// Deactivates Meilisearch's built-in telemetry when provided.
|
||||
///
|
||||
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
||||
@ -301,6 +314,11 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_reduce_indexing_memory_usage: bool,
|
||||
|
||||
/// Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())]
|
||||
#[serde(default = "default_limit_batched_tasks")]
|
||||
pub experimental_max_number_of_batched_tasks: usize,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
@ -368,9 +386,12 @@ impl Opt {
|
||||
http_addr,
|
||||
master_key,
|
||||
env,
|
||||
task_webhook_url,
|
||||
task_webhook_authorization_header,
|
||||
max_index_size: _,
|
||||
max_task_db_size: _,
|
||||
http_payload_size_limit,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
ssl_cert_path,
|
||||
ssl_key_path,
|
||||
ssl_auth_path,
|
||||
@ -392,8 +413,8 @@ impl Opt {
|
||||
config_file_path: _,
|
||||
#[cfg(feature = "analytics")]
|
||||
no_analytics,
|
||||
experimental_enable_metrics: enable_metrics_route,
|
||||
experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
|
||||
experimental_enable_metrics,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@ -401,6 +422,16 @@ impl Opt {
|
||||
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
|
||||
}
|
||||
export_to_env_if_not_present(MEILI_ENV, env);
|
||||
if let Some(task_webhook_url) = task_webhook_url {
|
||||
export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
|
||||
}
|
||||
if let Some(task_webhook_authorization_header) = task_webhook_authorization_header {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER,
|
||||
task_webhook_authorization_header,
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "analytics")]
|
||||
{
|
||||
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
|
||||
@ -409,6 +440,10 @@ impl Opt {
|
||||
MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
|
||||
http_payload_size_limit.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
|
||||
experimental_max_number_of_batched_tasks.to_string(),
|
||||
);
|
||||
if let Some(ssl_cert_path) = ssl_cert_path {
|
||||
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
|
||||
}
|
||||
@ -433,11 +468,11 @@ impl Opt {
|
||||
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_ENABLE_METRICS,
|
||||
enable_metrics_route.to_string(),
|
||||
experimental_enable_metrics.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
|
||||
reduce_indexing_memory_usage.to_string(),
|
||||
experimental_reduce_indexing_memory_usage.to_string(),
|
||||
);
|
||||
indexer_options.export_to_env();
|
||||
}
|
||||
@ -727,6 +762,10 @@ fn default_http_payload_size_limit() -> Byte {
|
||||
Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap()
|
||||
}
|
||||
|
||||
fn default_limit_batched_tasks() -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
|
||||
fn default_snapshot_dir() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_SNAPSHOT_DIR)
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ use std::io::ErrorKind;
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
|
||||
use bstr::ByteSlice;
|
||||
use bstr::ByteSlice as _;
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
use futures::StreamExt;
|
||||
@ -612,8 +612,8 @@ fn retrieve_document<S: AsRef<str>>(
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
let internal_id = index
|
||||
.external_documents_ids(&txn)?
|
||||
.get(doc_id.as_bytes())
|
||||
.external_documents_ids()
|
||||
.get(&txn, doc_id)?
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||
|
||||
let document = index
|
||||
|
@ -13,9 +13,9 @@ use crate::analytics::{Analytics, FacetSearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
@ -36,6 +36,8 @@ pub struct FacetSearchQuery {
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||
@ -95,6 +97,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = value;
|
||||
|
||||
SearchQuery {
|
||||
@ -119,6 +122,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
matching_strategy,
|
||||
vector,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,12 +2,14 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use log::{debug, warn};
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::vector::DistributionShift;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
|
||||
@ -16,9 +18,9 @@ use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
@ -74,6 +76,31 @@ pub struct SearchQueryGet {
|
||||
matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)]
|
||||
pub attributes_to_search_on: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||
pub hybrid_embedder: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
|
||||
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
|
||||
pub struct SemanticRatioGet(SemanticRatio);
|
||||
|
||||
impl std::convert::TryFrom<String> for SemanticRatioGet {
|
||||
type Error = InvalidSearchSemanticRatio;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let f: f32 = s.parse().map_err(|_| InvalidSearchSemanticRatio)?;
|
||||
Ok(SemanticRatioGet(SemanticRatio::try_from(f)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for SemanticRatioGet {
|
||||
type Target = SemanticRatio;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SearchQueryGet> for SearchQuery {
|
||||
@ -86,6 +113,20 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
|
||||
(None, None) => None,
|
||||
(None, Some(semantic_ratio)) => {
|
||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None })
|
||||
}
|
||||
(Some(embedder), None) => Some(HybridQuery {
|
||||
semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
|
||||
embedder: Some(embedder),
|
||||
}),
|
||||
(Some(embedder), Some(semantic_ratio)) => {
|
||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) })
|
||||
}
|
||||
};
|
||||
|
||||
Self {
|
||||
q: other.q,
|
||||
vector: other.vector.map(CS::into_inner),
|
||||
@ -108,6 +149,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
crop_marker: other.crop_marker,
|
||||
matching_strategy: other.matching_strategy,
|
||||
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
||||
hybrid,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -158,8 +200,12 @@ pub async fn search_with_url_query(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let distribution = embed(&mut query, index_scheduler.get_ref(), &index).await?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution))
|
||||
.await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
@ -193,8 +239,12 @@ pub async fn search_with_post(
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let distribution = embed(&mut query, index_scheduler.get_ref(), &index).await?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution))
|
||||
.await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
@ -206,6 +256,80 @@ pub async fn search_with_post(
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
pub async fn embed(
|
||||
query: &mut SearchQuery,
|
||||
index_scheduler: &IndexScheduler,
|
||||
index: &milli::Index,
|
||||
) -> Result<Option<DistributionShift>, ResponseError> {
|
||||
match (&query.hybrid, &query.vector, &query.q) {
|
||||
(Some(HybridQuery { semantic_ratio: _, embedder }), None, Some(q))
|
||||
if !q.trim().is_empty() =>
|
||||
{
|
||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
|
||||
let embedder = if let Some(embedder_name) = embedder {
|
||||
embedders.get(embedder_name)
|
||||
} else {
|
||||
embedders.get_default()
|
||||
};
|
||||
|
||||
let embedder = embedder
|
||||
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
|
||||
.map_err(milli::Error::from)?
|
||||
.0;
|
||||
|
||||
let distribution = embedder.distribution();
|
||||
|
||||
let embeddings = embedder
|
||||
.embed(vec![q.to_owned()])
|
||||
.await
|
||||
.map_err(milli::vector::Error::from)
|
||||
.map_err(milli::Error::from)?
|
||||
.pop()
|
||||
.expect("No vector returned from embedding");
|
||||
|
||||
if embeddings.iter().nth(1).is_some() {
|
||||
warn!("Ignoring embeddings past the first one in long search query");
|
||||
query.vector = Some(embeddings.iter().next().unwrap().to_vec());
|
||||
} else {
|
||||
query.vector = Some(embeddings.into_inner());
|
||||
}
|
||||
Ok(distribution)
|
||||
}
|
||||
(Some(hybrid), vector, _) => {
|
||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
|
||||
let embedder = if let Some(embedder_name) = &hybrid.embedder {
|
||||
embedders.get(embedder_name)
|
||||
} else {
|
||||
embedders.get_default()
|
||||
};
|
||||
|
||||
let embedder = embedder
|
||||
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
|
||||
.map_err(milli::Error::from)?
|
||||
.0;
|
||||
|
||||
if let Some(vector) = vector {
|
||||
if vector.len() != embedder.dimensions() {
|
||||
return Err(meilisearch_types::milli::Error::UserError(
|
||||
meilisearch_types::milli::UserError::InvalidVectorDimensions {
|
||||
expected: embedder.dimensions(),
|
||||
found: vector.len(),
|
||||
},
|
||||
)
|
||||
.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(embedder.distribution())
|
||||
}
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
@ -7,6 +7,7 @@ use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
@ -78,6 +79,7 @@ macro_rules! make_setting_route {
|
||||
|
||||
let body = body.into_inner();
|
||||
|
||||
#[allow(clippy::redundant_closure_call)]
|
||||
$analytics(&body, &req);
|
||||
|
||||
let new_settings = Settings {
|
||||
@ -434,6 +436,31 @@ make_setting_route!(
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/proximity-precision",
|
||||
put,
|
||||
meilisearch_types::settings::ProximityPrecisionView,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
|
||||
>,
|
||||
proximity_precision,
|
||||
"proximityPrecision",
|
||||
analytics,
|
||||
|precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
analytics.publish(
|
||||
"ProximityPrecision Updated".to_string(),
|
||||
json!({
|
||||
"proximity_precision": {
|
||||
"set": precision.is_some(),
|
||||
"value": precision,
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/ranking-rules",
|
||||
put,
|
||||
@ -520,6 +547,67 @@ make_setting_route!(
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/embedders",
|
||||
patch,
|
||||
std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
|
||||
>,
|
||||
embedders,
|
||||
"embedders",
|
||||
analytics,
|
||||
|setting: &Option<std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>>, req: &HttpRequest| {
|
||||
|
||||
|
||||
analytics.publish(
|
||||
"Embedders Updated".to_string(),
|
||||
serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
fn embedder_analytics(
|
||||
setting: Option<
|
||||
&std::collections::BTreeMap<
|
||||
String,
|
||||
Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
|
||||
>,
|
||||
>,
|
||||
) -> serde_json::Value {
|
||||
let mut sources = std::collections::HashSet::new();
|
||||
|
||||
if let Some(s) = &setting {
|
||||
for source in s
|
||||
.values()
|
||||
.filter_map(|config| config.clone().set())
|
||||
.filter_map(|config| config.embedder_options.set())
|
||||
{
|
||||
use meilisearch_types::milli::vector::settings::EmbedderSettings;
|
||||
match source {
|
||||
EmbedderSettings::OpenAi(_) => sources.insert("openAi"),
|
||||
EmbedderSettings::HuggingFace(_) => sources.insert("huggingFace"),
|
||||
EmbedderSettings::UserProvided(_) => sources.insert("userProvided"),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let document_template_used = setting.as_ref().map(|map| {
|
||||
map.values()
|
||||
.filter_map(|config| config.clone().set())
|
||||
.any(|config| config.document_template.set().is_some())
|
||||
});
|
||||
|
||||
json!(
|
||||
{
|
||||
"total": setting.as_ref().map(|s| s.len()),
|
||||
"sources": sources,
|
||||
"document_template_used": document_template_used,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
macro_rules! generate_configure {
|
||||
($($mod:ident),*) => {
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
@ -540,6 +628,7 @@ generate_configure!(
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
distinct_attribute,
|
||||
proximity_precision,
|
||||
stop_words,
|
||||
separator_tokens,
|
||||
non_separator_tokens,
|
||||
@ -548,7 +637,8 @@ generate_configure!(
|
||||
ranking_rules,
|
||||
typo_tolerance,
|
||||
pagination,
|
||||
faceting
|
||||
faceting,
|
||||
embedders
|
||||
);
|
||||
|
||||
pub async fn update_all(
|
||||
@ -593,6 +683,9 @@ pub async fn update_all(
|
||||
"distinct_attribute": {
|
||||
"set": new_settings.distinct_attribute.as_ref().set().is_some()
|
||||
},
|
||||
"proximity_precision": {
|
||||
"set": new_settings.proximity_precision.as_ref().set().is_some()
|
||||
},
|
||||
"typo_tolerance": {
|
||||
"enabled": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
@ -652,6 +745,7 @@ pub async fn update_all(
|
||||
"synonyms": {
|
||||
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
|
||||
},
|
||||
"embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set())
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
@ -13,6 +13,7 @@ use crate::analytics::{Analytics, MultiSearchAggregator};
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::search::embed;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
};
|
||||
@ -46,49 +47,51 @@ pub async fn multi_search_with_post(
|
||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||
// changes.
|
||||
let search_results: Result<_, (ResponseError, usize)> = (|| {
|
||||
async {
|
||||
let mut search_results = Vec::with_capacity(queries.len());
|
||||
for (query_index, (index_uid, mut query)) in
|
||||
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
|
||||
{
|
||||
debug!("multi-search #{query_index}: called with params: {:?}", query);
|
||||
let search_results: Result<_, (ResponseError, usize)> = async {
|
||||
let mut search_results = Vec::with_capacity(queries.len());
|
||||
for (query_index, (index_uid, mut query)) in
|
||||
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
|
||||
{
|
||||
debug!("multi-search #{query_index}: called with params: {:?}", query);
|
||||
|
||||
// Check index from API key
|
||||
if !index_scheduler.filters().is_index_authorized(&index_uid) {
|
||||
return Err(AuthenticationError::InvalidToken).with_index(query_index);
|
||||
}
|
||||
// Apply search rules from tenant token
|
||||
if let Some(search_rules) =
|
||||
index_scheduler.filters().get_index_search_rules(&index_uid)
|
||||
{
|
||||
add_search_rules(&mut query, search_rules);
|
||||
}
|
||||
|
||||
let index = index_scheduler
|
||||
.index(&index_uid)
|
||||
.map_err(|err| {
|
||||
let mut err = ResponseError::from(err);
|
||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||
// here the resource not found is not part of the URL.
|
||||
err.code = StatusCode::BAD_REQUEST;
|
||||
err
|
||||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features))
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
search_results.push(SearchResultWithIndex {
|
||||
index_uid: index_uid.into_inner(),
|
||||
result: search_result.with_index(query_index)?,
|
||||
});
|
||||
// Check index from API key
|
||||
if !index_scheduler.filters().is_index_authorized(&index_uid) {
|
||||
return Err(AuthenticationError::InvalidToken).with_index(query_index);
|
||||
}
|
||||
Ok(search_results)
|
||||
// Apply search rules from tenant token
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
|
||||
{
|
||||
add_search_rules(&mut query, search_rules);
|
||||
}
|
||||
|
||||
let index = index_scheduler
|
||||
.index(&index_uid)
|
||||
.map_err(|err| {
|
||||
let mut err = ResponseError::from(err);
|
||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||
// here the resource not found is not part of the URL.
|
||||
err.code = StatusCode::BAD_REQUEST;
|
||||
err
|
||||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
let distribution = embed(&mut query, index_scheduler.get_ref(), &index)
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, features, distribution)
|
||||
})
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
search_results.push(SearchResultWithIndex {
|
||||
index_uid: index_uid.into_inner(),
|
||||
result: search_result.with_index(query_index)?,
|
||||
});
|
||||
}
|
||||
})()
|
||||
Ok(search_results)
|
||||
}
|
||||
.await;
|
||||
|
||||
if search_results.is_ok() {
|
||||
|
@ -8,11 +8,9 @@ use meilisearch_types::deserr::DeserrQueryParamError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
|
||||
use meilisearch_types::tasks::{
|
||||
serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
|
||||
};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
@ -37,140 +35,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
|
||||
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
pub uid: TaskId,
|
||||
#[serde(default)]
|
||||
pub index_uid: Option<String>,
|
||||
pub status: Status,
|
||||
#[serde(rename = "type")]
|
||||
pub kind: Kind,
|
||||
pub canceled_by: Option<TaskId>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub details: Option<DetailsView>,
|
||||
pub error: Option<ResponseError>,
|
||||
#[serde(serialize_with = "serialize_duration", default)]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub started_at: Option<OffsetDateTime>,
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
pub fn from_task(task: &Task) -> TaskView {
|
||||
TaskView {
|
||||
uid: task.uid,
|
||||
index_uid: task.index_uid().map(ToOwned::to_owned),
|
||||
status: task.status,
|
||||
kind: task.kind.as_kind(),
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details.clone().map(DetailsView::from),
|
||||
error: task.error.clone(),
|
||||
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DetailsView {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub received_documents: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub indexed_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub primary_key: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub provided_ids: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_documents: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub matched_tasks: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub canceled_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_tasks: Option<Option<u64>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub original_filter: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub dump_uid: Option<Option<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(flatten)]
|
||||
pub settings: Option<Box<Settings<Unchecked>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub swaps: Option<Vec<IndexSwap>>,
|
||||
}
|
||||
|
||||
impl From<Details> for DetailsView {
|
||||
fn from(details: Details) -> Self {
|
||||
match details {
|
||||
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
|
||||
DetailsView {
|
||||
received_documents: Some(received_documents),
|
||||
indexed_documents: Some(indexed_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::SettingsUpdate { settings } => {
|
||||
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexInfo { primary_key } => {
|
||||
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
|
||||
}
|
||||
Details::DocumentDeletion {
|
||||
provided_ids: received_document_ids,
|
||||
deleted_documents,
|
||||
} => DetailsView {
|
||||
provided_ids: Some(received_document_ids),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
original_filter: Some(None),
|
||||
..DetailsView::default()
|
||||
},
|
||||
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
|
||||
DetailsView {
|
||||
provided_ids: Some(0),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::ClearAll { deleted_documents } => {
|
||||
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
|
||||
}
|
||||
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
canceled_tasks: Some(canceled_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
|
||||
DetailsView {
|
||||
matched_tasks: Some(matched_tasks),
|
||||
deleted_tasks: Some(deleted_tasks),
|
||||
original_filter: Some(Some(original_filter)),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::Dump { dump_uid } => {
|
||||
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
|
||||
}
|
||||
Details::IndexSwap { swaps } => {
|
||||
DetailsView { swaps: Some(swaps), ..Default::default() }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct TasksFilterQuery {
|
||||
|
@ -7,24 +7,21 @@ use deserr::Deserr;
|
||||
use either::Either;
|
||||
use index_scheduler::RoFeatures;
|
||||
use indexmap::IndexMap;
|
||||
use log::warn;
|
||||
use meilisearch_auth::IndexSearchRules;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::{
|
||||
dot_product_similarity, FacetValueHit, InternalError, OrderBy, SearchForFacetValues,
|
||||
};
|
||||
use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::vector::DistributionShift;
|
||||
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
use milli::{
|
||||
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
|
||||
SortError, TermsMatchingStrategy, VectorOrArrayOfVectors, DEFAULT_VALUES_PER_FACET,
|
||||
SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
use ordered_float::OrderedFloat;
|
||||
use regex::Regex;
|
||||
use serde::Serialize;
|
||||
use serde_json::{json, Value};
|
||||
@ -39,6 +36,7 @@ pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
||||
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
@ -47,6 +45,8 @@ pub struct SearchQuery {
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
@ -87,6 +87,48 @@ pub struct SearchQuery {
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Deserr)]
|
||||
#[deserr(error = DeserrJsonError<InvalidHybridQuery>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct HybridQuery {
|
||||
/// TODO validate that sementic ratio is between 0.0 and 1,0
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
|
||||
pub semantic_ratio: SemanticRatio,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
||||
pub embedder: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
|
||||
#[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
|
||||
pub struct SemanticRatio(f32);
|
||||
|
||||
impl Default for SemanticRatio {
|
||||
fn default() -> Self {
|
||||
DEFAULT_SEMANTIC_RATIO()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::TryFrom<f32> for SemanticRatio {
|
||||
type Error = InvalidSearchSemanticRatio;
|
||||
|
||||
fn try_from(f: f32) -> Result<Self, Self::Error> {
|
||||
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
|
||||
#[allow(clippy::manual_range_contains)]
|
||||
if f > 1.0 || f < 0.0 {
|
||||
Err(InvalidSearchSemanticRatio)
|
||||
} else {
|
||||
Ok(SemanticRatio(f))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for SemanticRatio {
|
||||
type Target = f32;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl SearchQuery {
|
||||
pub fn is_finite_pagination(&self) -> bool {
|
||||
self.page.or(self.hits_per_page).is_some()
|
||||
@ -106,6 +148,8 @@ pub struct SearchQueryWithIndex {
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
@ -171,6 +215,7 @@ impl SearchQueryWithIndex {
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
} = self;
|
||||
(
|
||||
index_uid,
|
||||
@ -196,6 +241,7 @@ impl SearchQueryWithIndex {
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
// do not use ..Default::default() here,
|
||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||
},
|
||||
@ -335,19 +381,44 @@ fn prepare_search<'t>(
|
||||
rtxn: &'t RoTxn,
|
||||
query: &'t SearchQuery,
|
||||
features: RoFeatures,
|
||||
distribution: Option<DistributionShift>,
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||
let mut search = index.search(rtxn);
|
||||
|
||||
if query.vector.is_some() && query.q.is_some() {
|
||||
warn!("Ignoring the query string `q` when used with the `vector` parameter.");
|
||||
if query.vector.is_some() {
|
||||
features.check_vector("Passing `vector` as a query parameter")?;
|
||||
}
|
||||
|
||||
if query.hybrid.is_some() {
|
||||
features.check_vector("Passing `hybrid` as a query parameter")?;
|
||||
}
|
||||
|
||||
if query.hybrid.is_none() && query.q.is_some() && query.vector.is_some() {
|
||||
return Err(MeilisearchHttpError::MissingSearchHybrid);
|
||||
}
|
||||
|
||||
search.distribution_shift(distribution);
|
||||
|
||||
if let Some(ref vector) = query.vector {
|
||||
search.vector(vector.clone());
|
||||
match &query.hybrid {
|
||||
// If semantic ratio is 0.0, only the query search will impact the search results,
|
||||
// skip the vector
|
||||
Some(hybrid) if *hybrid.semantic_ratio == 0.0 => (),
|
||||
_otherwise => {
|
||||
search.vector(vector.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref query) = query.q {
|
||||
search.query(query);
|
||||
if let Some(ref q) = query.q {
|
||||
match &query.hybrid {
|
||||
// If semantic ratio is 1.0, only the vector search will impact the search results,
|
||||
// skip the query
|
||||
Some(hybrid) if *hybrid.semantic_ratio == 1.0 => (),
|
||||
_otherwise => {
|
||||
search.query(q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref searchable) = query.attributes_to_search_on {
|
||||
@ -360,6 +431,7 @@ fn prepare_search<'t>(
|
||||
let max_total_hits = index
|
||||
.pagination_max_total_hits(rtxn)
|
||||
.map_err(milli::Error::from)?
|
||||
.map(|x| x as usize)
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
search.exhaustive_number_hits(is_finite_pagination);
|
||||
@ -373,8 +445,8 @@ fn prepare_search<'t>(
|
||||
features.check_score_details()?;
|
||||
}
|
||||
|
||||
if query.vector.is_some() {
|
||||
features.check_vector()?;
|
||||
if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid {
|
||||
search.embedder_name(embedder);
|
||||
}
|
||||
|
||||
// compute the offset on the limit depending on the pagination mode.
|
||||
@ -420,15 +492,22 @@ pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
features: RoFeatures,
|
||||
distribution: Option<DistributionShift>,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||
prepare_search(index, &rtxn, &query, features)?;
|
||||
prepare_search(index, &rtxn, &query, features, distribution)?;
|
||||
|
||||
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
|
||||
search.execute()?;
|
||||
match &query.hybrid {
|
||||
Some(hybrid) => match *hybrid.semantic_ratio {
|
||||
ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?,
|
||||
ratio => search.execute_hybrid(ratio)?,
|
||||
},
|
||||
None => search.execute()?,
|
||||
};
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
|
||||
@ -537,13 +616,17 @@ pub fn perform_search(
|
||||
insert_geo_distance(sort, &mut document);
|
||||
}
|
||||
|
||||
let semantic_score = match query.vector.as_ref() {
|
||||
Some(vector) => match extract_field("_vectors", &fields_ids_map, obkv)? {
|
||||
Some(vectors) => compute_semantic_score(vector, vectors)?,
|
||||
None => None,
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
let mut semantic_score = None;
|
||||
for details in &score {
|
||||
if let ScoreDetails::Vector(score_details::Vector {
|
||||
target_vector: _,
|
||||
value_similarity: Some((_matching_vector, similarity)),
|
||||
}) = details
|
||||
{
|
||||
semantic_score = Some(*similarity);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let ranking_score =
|
||||
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
||||
@ -586,6 +669,7 @@ pub fn perform_search(
|
||||
let max_values_by_facet = index
|
||||
.max_values_per_facet(&rtxn)
|
||||
.map_err(milli::Error::from)?
|
||||
.map(|x| x as usize)
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
||||
facet_distribution.max_values_per_facet(max_values_by_facet);
|
||||
|
||||
@ -645,8 +729,9 @@ pub fn perform_facet_search(
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features)?;
|
||||
let mut facet_search = SearchForFacetValues::new(facet_name, search);
|
||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?;
|
||||
let mut facet_search =
|
||||
SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some());
|
||||
if let Some(facet_query) = &facet_query {
|
||||
facet_search.query(facet_query);
|
||||
}
|
||||
@ -674,18 +759,6 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_semantic_score(query: &[f32], vectors: Value) -> milli::Result<Option<f32>> {
|
||||
let vectors = serde_json::from_value(vectors)
|
||||
.map(VectorOrArrayOfVectors::into_array_of_vectors)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
Ok(vectors
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|v| OrderedFloat(dot_product_similarity(query, &v)))
|
||||
.max()
|
||||
.map(OrderedFloat::into_inner))
|
||||
}
|
||||
|
||||
fn compute_formatted_options(
|
||||
attr_to_highlight: &HashSet<String>,
|
||||
attr_to_crop: &[String],
|
||||
@ -813,22 +886,6 @@ fn make_document(
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
/// Extract the JSON value under the field name specified
|
||||
/// but doesn't support nested objects.
|
||||
fn extract_field(
|
||||
field_name: &str,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
obkv: obkv::KvReaderU16,
|
||||
) -> Result<Option<serde_json::Value>, MeilisearchHttpError> {
|
||||
match field_ids_map.id(field_name) {
|
||||
Some(fid) => match obkv.get(fid) {
|
||||
Some(value) => Ok(serde_json::from_slice(value).map(Some)?),
|
||||
None => Ok(None),
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_fields<'a>(
|
||||
document: &Document,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
|
Binary file not shown.
@ -5,9 +5,11 @@ pub mod service;
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
#[allow(unused)]
|
||||
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
|
||||
use meili_snap::json_string;
|
||||
use serde::{Deserialize, Serialize};
|
||||
#[allow(unused)]
|
||||
pub use server::{default_settings, Server};
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||
|
@ -397,7 +397,7 @@ async fn delete_document_by_complex_filter() {
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 4,
|
||||
"deletedDocuments": 2,
|
||||
"originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
|
||||
},
|
||||
"error": null,
|
||||
|
@ -20,6 +20,8 @@ pub enum GetDump {
|
||||
RubyGemsWithSettingsV4,
|
||||
|
||||
TestV5,
|
||||
|
||||
TestV6WithExperimental,
|
||||
}
|
||||
|
||||
impl GetDump {
|
||||
@ -68,6 +70,10 @@ impl GetDump {
|
||||
GetDump::TestV5 => {
|
||||
exist_relative_path!("tests/assets/v5_v0.28.0_test_dump.dump").into()
|
||||
}
|
||||
GetDump::TestV6WithExperimental => exist_relative_path!(
|
||||
"tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump"
|
||||
)
|
||||
.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -59,6 +59,7 @@ async fn import_dump_v1_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -76,7 +77,8 @@ async fn import_dump_v1_movie_raw() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -219,6 +221,7 @@ async fn import_dump_v1_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -236,7 +239,8 @@ async fn import_dump_v1_movie_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -365,6 +369,7 @@ async fn import_dump_v1_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -382,7 +387,8 @@ async fn import_dump_v1_rubygems_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -497,6 +503,7 @@ async fn import_dump_v2_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -514,7 +521,8 @@ async fn import_dump_v2_movie_raw() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -641,6 +649,7 @@ async fn import_dump_v2_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -658,7 +667,8 @@ async fn import_dump_v2_movie_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -784,6 +794,7 @@ async fn import_dump_v2_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -801,7 +812,8 @@ async fn import_dump_v2_rubygems_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -916,6 +928,7 @@ async fn import_dump_v3_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -933,7 +946,8 @@ async fn import_dump_v3_movie_raw() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1060,6 +1074,7 @@ async fn import_dump_v3_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1077,7 +1092,8 @@ async fn import_dump_v3_movie_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1203,6 +1219,7 @@ async fn import_dump_v3_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1220,7 +1237,8 @@ async fn import_dump_v3_rubygems_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1335,6 +1353,7 @@ async fn import_dump_v4_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1352,7 +1371,8 @@ async fn import_dump_v4_movie_raw() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1479,6 +1499,7 @@ async fn import_dump_v4_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1496,7 +1517,8 @@ async fn import_dump_v4_movie_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1622,6 +1644,7 @@ async fn import_dump_v4_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1639,7 +1662,8 @@ async fn import_dump_v4_rubygems_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1810,3 +1834,108 @@ async fn import_dump_v5() {
|
||||
json_string!(tasks, { ".results[].details.dumpUid" => "[uid]", ".results[].duration" => "[duration]" , ".results[].startedAt" => "[date]" , ".results[].finishedAt" => "[date]" })
|
||||
);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn import_dump_v6_containing_experimental_features() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
|
||||
let options = Opt {
|
||||
import_dump: Some(GetDump::TestV6WithExperimental.path()),
|
||||
..default_settings(temp.path())
|
||||
};
|
||||
let mut server = Server::new_auth_with_options(options, temp).await;
|
||||
server.use_api_key("MASTER_KEY");
|
||||
|
||||
let (indexes, code) = server.list_indexes(None, None).await;
|
||||
assert_eq!(code, 200, "{indexes}");
|
||||
|
||||
assert_eq!(indexes["results"].as_array().unwrap().len(), 1);
|
||||
assert_eq!(indexes["results"][0]["uid"], json!("movies"));
|
||||
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
|
||||
|
||||
let (response, code) = server.get_features().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let index = server.index("movies");
|
||||
|
||||
let (response, code) = index.settings().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity"
|
||||
],
|
||||
"stopWords": [],
|
||||
"nonSeparatorTokens": [],
|
||||
"separatorTokens": [],
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": "byAttribute",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
"oneTypo": 5,
|
||||
"twoTypos": 9
|
||||
},
|
||||
"disableOnWords": [],
|
||||
"disableOnAttributes": []
|
||||
},
|
||||
"faceting": {
|
||||
"maxValuesPerFacet": 100,
|
||||
"sortFacetValuesBy": {
|
||||
"*": "alpha"
|
||||
}
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
"###);
|
||||
|
||||
// the expected order is [1, 3, 2] instead of [3, 1, 2]
|
||||
// because the attribute scale doesn't make the difference between 1 and 3.
|
||||
index
|
||||
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
@ -4,23 +4,111 @@ use once_cell::sync::Lazy;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{"productId": 1, "shopId": 1},
|
||||
{"productId": 2, "shopId": 1},
|
||||
{"productId": 3, "shopId": 2},
|
||||
{"productId": 4, "shopId": 2},
|
||||
{"productId": 5, "shopId": 3},
|
||||
{"productId": 6, "shopId": 3},
|
||||
{"productId": 7, "shopId": 4},
|
||||
{"productId": 8, "shopId": 4},
|
||||
{"productId": 9, "shopId": 5},
|
||||
{"productId": 10, "shopId": 5}
|
||||
{
|
||||
"id": 1,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Brown"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": "Red"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"description": "Running Shoes",
|
||||
"brand": "Adidas",
|
||||
"product_id": "456789",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"description": "Running Shoes",
|
||||
"brand": "Adidas",
|
||||
"product_id": "456789",
|
||||
"color": "White"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"description": "Hoodie",
|
||||
"brand": "Puma",
|
||||
"product_id": "987654",
|
||||
"color": "Gray"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Green"
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Red"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Indigo"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Stone Wash"
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
|
||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
|
||||
static DOCUMENT_PRIMARY_KEY: &str = "id";
|
||||
static DOCUMENT_DISTINCT_KEY: &str = "product_id";
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
#[actix_rt::test]
|
||||
@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() {
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(Value(response): Value) -> Vec<i64> {
|
||||
fn get_hits(response: &Value) -> Vec<&str> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[1, 2]");
|
||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||
snapshot!(response["estimatedTotalHits"] , @"11");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[3, 4]");
|
||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"10");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"1");
|
||||
snapshot!(format!("{:?}", hits), @"[5]");
|
||||
snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
}
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4130
|
||||
#[actix_rt::test]
|
||||
async fn distinct_search_with_pagination_no_ranking() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(response: &Value) -> Vec<&str> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["page"], @"0");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||
snapshot!(response["page"], @"1");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||
snapshot!(response["page"], @"2");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||
snapshot!(response["page"], @"3");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["page"], @"4");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"3");
|
||||
snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
|
||||
snapshot!(response["page"], @"2");
|
||||
snapshot!(response["totalPages"], @"2");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
|
@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
|
151
meilisearch/tests/search/hybrid.rs
Normal file
151
meilisearch/tests/search/hybrid.rs
Normal file
@ -0,0 +1,151 @@
|
||||
use meili_snap::snapshot;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::index::Index;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(
|
||||
json!({ "embedders": {"default": {"source": {"userProvided": {"dimensions": 2}}}} }),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index.add_documents(documents.clone(), None).await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(response.uid()).await;
|
||||
index
|
||||
}
|
||||
|
||||
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"desc": "a Captain Marvel ersatz",
|
||||
"id": "1",
|
||||
"_vectors": {"default": [1.0, 3.0]},
|
||||
},
|
||||
{
|
||||
"title": "Captain Planet",
|
||||
"desc": "He's not part of the Marvel Cinematic Universe",
|
||||
"id": "2",
|
||||
"_vectors": {"default": [1.0, 2.0]},
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"desc": "a Shazam ersatz",
|
||||
"id": "3",
|
||||
"_vectors": {"default": [2.0, 3.0]},
|
||||
}])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]}}]"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn invalid_semantic_ratio() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value at `.hybrid.semanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_search_semantic_ratio",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value at `.hybrid.semanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_search_semantic_ratio",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_get(
|
||||
&yaup::to_string(
|
||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}),
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `hybridSemanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_search_semantic_ratio",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.search_get(
|
||||
&yaup::to_string(
|
||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}),
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `hybridSemanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
|
||||
"code": "invalid_search_semantic_ratio",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
|
||||
}
|
||||
"###);
|
||||
}
|
@ -6,6 +6,7 @@ mod errors;
|
||||
mod facet_search;
|
||||
mod formatted;
|
||||
mod geo;
|
||||
mod hybrid;
|
||||
mod multi;
|
||||
mod pagination;
|
||||
mod restrict_searchable;
|
||||
@ -15,32 +16,37 @@ use once_cell::sync::Lazy;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"id": "287947",
|
||||
"_vectors": { "manual": [1, 2, 3]},
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"id": "299537",
|
||||
"_vectors": { "manual": [1, 2, 54] },
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"id": "522681",
|
||||
"_vectors": { "manual": [10, -23, 32] },
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428",
|
||||
"_vectors": { "manual": [-100, 231, 32] },
|
||||
},
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"_vectors": { "manual": [-100, 340, 90] },
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 852,
|
||||
@ -57,6 +63,7 @@ pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
},
|
||||
],
|
||||
"cattos": "pésti",
|
||||
"_vectors": { "manual": [1, 2, 3]},
|
||||
},
|
||||
{
|
||||
"id": 654,
|
||||
@ -69,12 +76,14 @@ pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
},
|
||||
],
|
||||
"cattos": ["simba", "pestiféré"],
|
||||
"_vectors": { "manual": [1, 2, 54] },
|
||||
},
|
||||
{
|
||||
"id": 750,
|
||||
"father": "romain",
|
||||
"mother": "michelle",
|
||||
"cattos": ["enigma"],
|
||||
"_vectors": { "manual": [10, 23, 32] },
|
||||
},
|
||||
{
|
||||
"id": 951,
|
||||
@ -91,6 +100,7 @@ pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
},
|
||||
],
|
||||
"cattos": ["moumoute", "gomez"],
|
||||
"_vectors": { "manual": [10, 23, 32] },
|
||||
},
|
||||
])
|
||||
});
|
||||
@ -802,6 +812,13 @@ async fn experimental_feature_score_details() {
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
]
|
||||
},
|
||||
"_rankingScoreDetails": {
|
||||
"words": {
|
||||
"order": 0,
|
||||
@ -823,7 +840,7 @@ async fn experimental_feature_score_details() {
|
||||
"order": 3,
|
||||
"attributeRankingOrderScore": 1.0,
|
||||
"queryWordDistanceScore": 0.8095238095238095,
|
||||
"score": 0.9365079365079364
|
||||
"score": 0.9727891156462584
|
||||
},
|
||||
"exactness": {
|
||||
"order": 4,
|
||||
@ -870,13 +887,92 @@ async fn experimental_feature_vector_store() {
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(response["vectorStore"], @"true");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({"embedders": {
|
||||
"manual": {
|
||||
"source": {
|
||||
"userProvided": {"dimensions": 3}
|
||||
}
|
||||
}
|
||||
}}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["status"]), @"\"succeeded\"");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
}))
|
||||
.await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]");
|
||||
// vector search returns all documents that don't have vectors in the last bucket, like all sorts
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
3
|
||||
]
|
||||
},
|
||||
"_semanticScore": 1.0
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
54
|
||||
]
|
||||
},
|
||||
"_semanticScore": 0.9129112
|
||||
},
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
340,
|
||||
90
|
||||
]
|
||||
},
|
||||
"_semanticScore": 0.8106413
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
]
|
||||
},
|
||||
"_semanticScore": 0.74120104
|
||||
},
|
||||
{
|
||||
"title": "Escape Room",
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
10,
|
||||
-23,
|
||||
32
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[cfg(feature = "default")]
|
||||
@ -1126,7 +1222,14 @@ async fn simple_search_with_strange_synonyms() {
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428"
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
@ -1140,7 +1243,14 @@ async fn simple_search_with_strange_synonyms() {
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428"
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
@ -1154,7 +1264,14 @@ async fn simple_search_with_strange_synonyms() {
|
||||
[
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428"
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
231,
|
||||
32
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
|
@ -72,7 +72,14 @@ async fn simple_search_single_index() {
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465"
|
||||
"id": "450465",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
340,
|
||||
90
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
@ -86,7 +93,14 @@ async fn simple_search_single_index() {
|
||||
"hits": [
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"id": "299537"
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
54
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "captain",
|
||||
@ -177,7 +191,14 @@ async fn simple_search_two_indexes() {
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465"
|
||||
"id": "450465",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-100,
|
||||
340,
|
||||
90
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
@ -203,7 +224,14 @@ async fn simple_search_two_indexes() {
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
"cattos": "pésti",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
3
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 654,
|
||||
@ -218,7 +246,14 @@ async fn simple_search_two_indexes() {
|
||||
"cattos": [
|
||||
"simba",
|
||||
"pestiféré"
|
||||
]
|
||||
],
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1,
|
||||
2,
|
||||
54
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "pésti",
|
||||
|
@ -335,3 +335,35 @@ async fn exactness_ranking_rule_order() {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_on_exact_field() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(
|
||||
&server,
|
||||
&json!([
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"exact": "Captain Marivel",
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marivel",
|
||||
"exact": "Captain the Marvel",
|
||||
"id": "2",
|
||||
}]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let (response, code) =
|
||||
index.update_settings_typo_tolerance(json!({ "disableOnAttributes": ["exact"] })).await;
|
||||
assert_eq!(202, code, "{:?}", response);
|
||||
index.wait_task(1).await;
|
||||
// Searching on an exact attribute should only return the document matching without typo.
|
||||
index
|
||||
.search(json!({"q": "Marvel", "attributesToSearchOn": ["exact"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"].as_array().unwrap().len(), @"1");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ async fn get_settings() {
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
let settings = response.as_object().unwrap();
|
||||
assert_eq!(settings.keys().len(), 14);
|
||||
assert_eq!(settings.keys().len(), 16);
|
||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||
@ -83,6 +83,7 @@ async fn get_settings() {
|
||||
"maxTotalHits": 1000,
|
||||
})
|
||||
);
|
||||
assert_eq!(settings["embedders"], json!({}));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
@ -1,4 +1,5 @@
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod get_settings;
|
||||
mod proximity_settings;
|
||||
mod tokenizer_customization;
|
||||
|
352
meilisearch/tests/settings/proximity_settings.rs
Normal file
352
meilisearch/tests/settings/proximity_settings.rs
Normal file
@ -0,0 +1,352 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish",
|
||||
},
|
||||
])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "byAttribute",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(1).await;
|
||||
|
||||
// the expected order is [1, 3, 2] instead of [3, 1, 2]
|
||||
// because the attribute scale doesn't make the difference between 1 and 3.
|
||||
index
|
||||
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// the expected order is [1, 2, 3] instead of [1, 3, 2]
|
||||
// because the attribute scale sees all the word in the same attribute
|
||||
// and so doesn't make the difference between the documents.
|
||||
index
|
||||
.search(json!({"q": "many the fish"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_phrase_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "byAttribute",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
// the expected order is [1, 3] instead of [3, 1]
|
||||
// because the attribute scale doesn't make the difference between 1 and 3.
|
||||
// But 2 shouldn't be returned because "the" is not in the same attribute.
|
||||
index
|
||||
.search(json!({"q": "\"the soup of day\""}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// the expected order is [1, 2, 3] instead of [1, 3]
|
||||
// because the attribute scale sees all the word in the same attribute
|
||||
// and so doesn't make the difference between the documents.
|
||||
index
|
||||
.search(json!({"q": "\"many the fish\""}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn word_scale_set_and_reset() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
// Set and reset the setting ensuring the swap between the 2 settings is applied.
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "byAttribute",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "byWord",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
// [3, 1, 2]
|
||||
index
|
||||
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// [1, 3, 2]
|
||||
index
|
||||
.search(json!({"q": "many the fish"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// [3]
|
||||
index
|
||||
.search(json!({"q": "\"the soup of day\""}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// [1, 3]
|
||||
index
|
||||
.search(json!({"q": "\"many the fish\""}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_default_ranking_rules() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "byAttribute"
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(1).await;
|
||||
|
||||
// the expected order is [3, 1, 2]
|
||||
index
|
||||
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// the expected order is [1, 3, 2] instead of [1, 3]
|
||||
// because the attribute scale sees all the word in the same attribute
|
||||
// and so doesn't remove the document 2.
|
||||
index
|
||||
.search(json!({"q": "\"many the fish\""}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
mod errors;
|
||||
mod webhook;
|
||||
|
||||
use meili_snap::insta::assert_json_snapshot;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
|
123
meilisearch/tests/tasks/webhook.rs
Normal file
123
meilisearch/tests/tasks/webhook.rs
Normal file
@ -0,0 +1,123 @@
|
||||
//! To test the webhook, we need to spawn a new server with a URL listening for
|
||||
//! post requests. The webhook handle starts a server and forwards all the
|
||||
//! received requests into a channel for you to handle.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_http::body::MessageBody;
|
||||
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
||||
use actix_web::web::{Bytes, Data};
|
||||
use actix_web::{post, App, HttpResponse, HttpServer};
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
use tokio::sync::mpsc;
|
||||
use url::Url;
|
||||
|
||||
use crate::common::{default_settings, Server};
|
||||
use crate::json;
|
||||
|
||||
#[post("/")]
|
||||
async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
|
||||
let body = body.to_vec();
|
||||
sender.send(body).unwrap();
|
||||
HttpResponse::Ok().into()
|
||||
}
|
||||
|
||||
fn create_app(
|
||||
sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
|
||||
) -> actix_web::App<
|
||||
impl ServiceFactory<
|
||||
actix_web::dev::ServiceRequest,
|
||||
Config = (),
|
||||
Response = ServiceResponse<impl MessageBody>,
|
||||
Error = actix_web::Error,
|
||||
InitError = (),
|
||||
>,
|
||||
> {
|
||||
App::new().service(forward_body).app_data(Data::from(sender))
|
||||
}
|
||||
|
||||
struct WebhookHandle {
|
||||
pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
|
||||
pub url: String,
|
||||
pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
|
||||
}
|
||||
|
||||
async fn create_webhook_server() -> WebhookHandle {
|
||||
let mut log_builder = env_logger::Builder::new();
|
||||
log_builder.parse_filters("info");
|
||||
log_builder.init();
|
||||
|
||||
let (sender, receiver) = mpsc::unbounded_channel();
|
||||
let sender = Arc::new(sender);
|
||||
|
||||
// By listening on the port 0, the system will give us any available port.
|
||||
let server =
|
||||
HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
|
||||
let (ip, scheme) = server.addrs_with_scheme()[0];
|
||||
let url = format!("{scheme}://{ip}/");
|
||||
|
||||
let server_handle = tokio::spawn(server.run());
|
||||
WebhookHandle { server_handle, url, receiver }
|
||||
}
|
||||
|
||||
#[actix_web::test]
|
||||
async fn test_basic_webhook() {
|
||||
let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
|
||||
|
||||
let db_path = tempfile::tempdir().unwrap();
|
||||
let server = Server::new_with_options(Opt {
|
||||
task_webhook_url: Some(Url::parse(&url).unwrap()),
|
||||
..default_settings(db_path.path())
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let index = server.index("tamo");
|
||||
// May be flaky: we're relying on the fact that while the first document addition is processed, the other
|
||||
// operations will be received and will be batched together. If it doesn't happen it's not a problem
|
||||
// the rest of the test won't assume anything about the number of tasks per batch.
|
||||
for i in 0..5 {
|
||||
let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
|
||||
}
|
||||
|
||||
let mut nb_tasks = 0;
|
||||
while let Some(payload) = receiver.recv().await {
|
||||
let payload = String::from_utf8(payload).unwrap();
|
||||
let jsonl = payload.split('\n');
|
||||
for json in jsonl {
|
||||
if json.is_empty() {
|
||||
break; // we reached EOF
|
||||
}
|
||||
nb_tasks += 1;
|
||||
let json: serde_json::Value = serde_json::from_str(json).unwrap();
|
||||
snapshot!(
|
||||
json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"indexUid": "tamo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
if nb_tasks == 5 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
|
||||
|
||||
server_handle.abort();
|
||||
}
|
19
meilitool/Cargo.toml
Normal file
19
meilitool/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "meilitool"
|
||||
description = "A CLI to edit a Meilisearch database from the command line"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
homepage.workspace = true
|
||||
readme.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.75"
|
||||
clap = { version = "4.2.1", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
time = { version = "0.3.30", features = ["formatting"] }
|
||||
uuid = { version = "1.5.0", features = ["v4"], default-features = false }
|
314
meilitool/src/main.rs
Normal file
314
meilitool/src/main.rs
Normal file
@ -0,0 +1,314 @@
|
||||
use std::fs::{read_dir, read_to_string, remove_file, File};
|
||||
use std::io::BufWriter;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use clap::{Parser, Subcommand};
|
||||
use dump::{DumpWriter, IndexMetadata};
|
||||
use file_store::FileStore;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::versioning::check_version_file;
|
||||
use meilisearch_types::Index;
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
use uuid_codec::UuidCodec;
|
||||
|
||||
mod uuid_codec;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Cli {
|
||||
/// The database path where the Meilisearch is running.
|
||||
#[arg(long, default_value = "data.ms/")]
|
||||
db_path: PathBuf,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Command {
|
||||
/// Clears the task queue and make it empty.
|
||||
///
|
||||
/// This command can be safely executed even if Meilisearch is running and processing tasks.
|
||||
/// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
|
||||
/// even the ones that were processing. However, it's highly possible that you see the processing
|
||||
/// tasks in the queue again with an associated internal error message.
|
||||
ClearTaskQueue,
|
||||
|
||||
/// Exports a dump from the Meilisearch database.
|
||||
///
|
||||
/// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
|
||||
/// If tasks are being processed while a dump is being exported there are chances for the dump to be
|
||||
/// malformed with missing tasks.
|
||||
///
|
||||
/// TODO Verify this claim or make sure it cannot happen and we can export dumps
|
||||
/// without caring about killing Meilisearch first!
|
||||
ExportADump {
|
||||
/// The directory in which the dump will be created.
|
||||
#[arg(long, default_value = "dumps/")]
|
||||
dump_dir: PathBuf,
|
||||
|
||||
/// Skip dumping the enqueued or processing tasks.
|
||||
///
|
||||
/// Can be useful when there are a lot of them and it is not particularly useful
|
||||
/// to keep them. Note that only the enqueued tasks takes up space so skipping
|
||||
/// the processed ones is not particularly interesting.
|
||||
#[arg(long)]
|
||||
skip_enqueued_tasks: bool,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let Cli { db_path, command } = Cli::parse();
|
||||
|
||||
check_version_file(&db_path).context("While checking the version file")?;
|
||||
|
||||
match command {
|
||||
Command::ClearTaskQueue => clear_task_queue(db_path),
|
||||
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
||||
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Clears the task queue located at `db_path`.
|
||||
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
||||
let path = db_path.join("tasks");
|
||||
let env = EnvOpenOptions::new()
|
||||
.max_dbs(100)
|
||||
.open(&path)
|
||||
.with_context(|| format!("While trying to open {:?}", path.display()))?;
|
||||
|
||||
eprintln!("Deleting tasks from the database...");
|
||||
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
|
||||
let total = all_tasks.len(&wtxn)?;
|
||||
let status = try_opening_poly_database(&env, &wtxn, "status")?;
|
||||
let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
|
||||
let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
|
||||
let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
|
||||
let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
|
||||
let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
|
||||
let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
|
||||
|
||||
try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
|
||||
try_clearing_poly_database(&mut wtxn, status, "status")?;
|
||||
try_clearing_poly_database(&mut wtxn, kind, "kind")?;
|
||||
try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
|
||||
try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
|
||||
try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
|
||||
try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
|
||||
try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
|
||||
|
||||
wtxn.commit().context("While committing the transaction")?;
|
||||
|
||||
eprintln!("Successfully deleted {total} tasks from the tasks database!");
|
||||
eprintln!("Deleting the content files from disk...");
|
||||
|
||||
let mut count = 0usize;
|
||||
let update_files = db_path.join("update_files");
|
||||
let entries = read_dir(&update_files).with_context(|| {
|
||||
format!("While trying to read the content of {:?}", update_files.display())
|
||||
})?;
|
||||
for result in entries {
|
||||
match result {
|
||||
Ok(ent) => match remove_file(ent.path()) {
|
||||
Ok(_) => count += 1,
|
||||
Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("Sucessfully deleted {count} content files from disk!");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn try_opening_database<KC: 'static, DC: 'static>(
|
||||
env: &Env,
|
||||
rtxn: &RoTxn,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<Database<KC, DC>> {
|
||||
env.open_database(rtxn, Some(db_name))
|
||||
.with_context(|| format!("While opening the {db_name:?} database"))?
|
||||
.with_context(|| format!("Missing the {db_name:?} database"))
|
||||
}
|
||||
|
||||
fn try_opening_poly_database(
|
||||
env: &Env,
|
||||
rtxn: &RoTxn,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<Database<Unspecified, Unspecified>> {
|
||||
env.database_options()
|
||||
.name(db_name)
|
||||
.open(rtxn)
|
||||
.with_context(|| format!("While opening the {db_name:?} poly database"))?
|
||||
.with_context(|| format!("Missing the {db_name:?} poly database"))
|
||||
}
|
||||
|
||||
fn try_clearing_poly_database(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<Unspecified, Unspecified>,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
|
||||
}
|
||||
|
||||
/// Exports a dump into the dump directory.
|
||||
fn export_a_dump(
|
||||
db_path: PathBuf,
|
||||
dump_dir: PathBuf,
|
||||
skip_enqueued_tasks: bool,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let started_at = OffsetDateTime::now_utc();
|
||||
|
||||
// 1. Extracts the instance UID from disk
|
||||
let instance_uid_path = db_path.join("instance-uid");
|
||||
let instance_uid = match read_to_string(&instance_uid_path) {
|
||||
Ok(content) => match content.trim().parse() {
|
||||
Ok(uuid) => Some(uuid),
|
||||
Err(e) => {
|
||||
eprintln!("Impossible to parse instance-uid: {e}");
|
||||
None
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
|
||||
let file_store =
|
||||
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
|
||||
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = EnvOpenOptions::new()
|
||||
.max_dbs(100)
|
||||
.open(&index_scheduler_path)
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
eprintln!("Dumping the keys...");
|
||||
|
||||
// 2. dump the keys
|
||||
let auth_store = AuthController::new(&db_path, &None)
|
||||
.with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
|
||||
let mut dump_keys = dump.create_keys()?;
|
||||
let mut count = 0;
|
||||
for key in auth_store.list_keys()? {
|
||||
dump_keys.push_key(&key)?;
|
||||
count += 1;
|
||||
}
|
||||
dump_keys.flush()?;
|
||||
|
||||
eprintln!("Successfully dumped {count} keys!");
|
||||
|
||||
let rtxn = env.read_txn()?;
|
||||
let all_tasks: Database<BEU32, SerdeJson<Task>> =
|
||||
try_opening_database(&env, &rtxn, "all-tasks")?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
try_opening_database(&env, &rtxn, "index-mapping")?;
|
||||
|
||||
if skip_enqueued_tasks {
|
||||
eprintln!("Skip dumping the enqueued tasks...");
|
||||
} else {
|
||||
eprintln!("Dumping the enqueued tasks...");
|
||||
|
||||
// 3. dump the tasks
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
let mut count = 0;
|
||||
for ret in all_tasks.iter(&rtxn)? {
|
||||
let (_, t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file_uuid) = content_file {
|
||||
if status == Status::Enqueued {
|
||||
let content_file = file_store.get_update(content_file_uuid)?;
|
||||
|
||||
let reader =
|
||||
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
|
||||
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
while let Some(doc) = cursor.next_document().with_context(|| {
|
||||
format!("While iterating on content file {:?}", content_file_uuid)
|
||||
})? {
|
||||
dump_content_file
|
||||
.push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
|
||||
}
|
||||
dump_content_file.flush()?;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
eprintln!("Successfully dumped {count} enqueued tasks!");
|
||||
}
|
||||
|
||||
eprintln!("Dumping the indexes...");
|
||||
|
||||
// 4. Dump the indexes
|
||||
let mut count = 0;
|
||||
for result in index_mapping.iter(&rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
created_at: index.created_at(&rtxn)?,
|
||||
updated_at: index.updated_at(&rtxn)?,
|
||||
};
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
// 4.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
let (_id, doc) = ret?;
|
||||
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
index_dumper.push_document(&document)?;
|
||||
}
|
||||
|
||||
// 4.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
|
||||
index_dumper.settings(&settings)?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
eprintln!("Successfully dumped {count} indexes!");
|
||||
// We will not dump experimental feature settings
|
||||
eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
let path = dump_dir.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(&path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
|
||||
eprintln!("Dump exported at path {:?}", path.display());
|
||||
|
||||
Ok(())
|
||||
}
|
24
meilitool/src/uuid_codec.rs
Normal file
24
meilitool/src/uuid_codec.rs
Normal file
@ -0,0 +1,24 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// A heed codec for value of struct Uuid.
|
||||
pub struct UuidCodec;
|
||||
|
||||
impl<'a> BytesDecode<'a> for UuidCodec {
|
||||
type DItem = Uuid;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesEncode<'_> for UuidCodec {
|
||||
type EItem = Uuid;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item.as_bytes()))
|
||||
}
|
||||
}
|
@ -17,23 +17,25 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.3", default-features = false }
|
||||
charabia = { version = "0.8.5", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
deserr = "0.6.0"
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.4", default-features = false, features = [
|
||||
grenad = { version = "0.4.5", default-features = false, features = [
|
||||
"rayon",
|
||||
"tempfile",
|
||||
] }
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
|
||||
"lmdb", "read-txn-no-tls"
|
||||
heed = { version = "0.20.0-alpha.9", default-features = false, features = [
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
"read-txn-no-tls",
|
||||
] }
|
||||
indexmap = { version = "2.0.0", features = ["serde"] }
|
||||
instant-distance = { version = "0.6.1", features = ["with-serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memmap2 = "0.7.1"
|
||||
@ -72,6 +74,23 @@ puffin = "0.16.0"
|
||||
log = "0.4.17"
|
||||
logging_timer = "1.1.0"
|
||||
csv = "1.2.1"
|
||||
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||
candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||
candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1" }
|
||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
|
||||
"online",
|
||||
] }
|
||||
tokio = { version = "1.34.0", features = ["rt"] }
|
||||
futures = "0.3.29"
|
||||
reqwest = { version = "0.11.16", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
tiktoken-rs = "0.5.7"
|
||||
liquid = "0.26.4"
|
||||
arroy = { git = "https://github.com/meilisearch/arroy.git", version = "0.1.0" }
|
||||
rand = "0.8.5"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
@ -79,10 +98,19 @@ big_s = "1.0.2"
|
||||
insta = "1.29.0"
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.7.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[features]
|
||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
|
||||
all-tokenizations = [
|
||||
"charabia/chinese",
|
||||
"charabia/hebrew",
|
||||
"charabia/japanese",
|
||||
"charabia/thai",
|
||||
"charabia/korean",
|
||||
"charabia/greek",
|
||||
"charabia/khmer",
|
||||
]
|
||||
|
||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||
# For more information on this feature, see heed's Cargo.toml
|
||||
@ -106,3 +134,6 @@ thai = ["charabia/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["charabia/greek"]
|
||||
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["charabia/khmer"]
|
||||
|
@ -5,8 +5,8 @@ use std::time::Instant;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::{
|
||||
execute_search, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, SearchLogger,
|
||||
TermsMatchingStrategy,
|
||||
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext,
|
||||
SearchLogger, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
#[global_allocator]
|
||||
@ -49,14 +49,15 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
let start = Instant::now();
|
||||
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let universe = filtered_universe(&ctx, &None)?;
|
||||
|
||||
let docs = execute_search(
|
||||
&mut ctx,
|
||||
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
|
||||
&None,
|
||||
(!query.trim().is_empty()).then(|| query.trim()),
|
||||
TermsMatchingStrategy::Last,
|
||||
milli::score_details::ScoringStrategy::Skip,
|
||||
false,
|
||||
&None,
|
||||
universe,
|
||||
&None,
|
||||
GeoSortStrategy::default(),
|
||||
0,
|
||||
|
@ -1,41 +0,0 @@
|
||||
use std::ops;
|
||||
|
||||
use instant_distance::Point;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::normalize_vector;
|
||||
|
||||
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
|
||||
pub struct NDotProductPoint(Vec<f32>);
|
||||
|
||||
impl NDotProductPoint {
|
||||
pub fn new(point: Vec<f32>) -> Self {
|
||||
NDotProductPoint(normalize_vector(point))
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> Vec<f32> {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Deref for NDotProductPoint {
|
||||
type Target = [f32];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.0.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point for NDotProductPoint {
|
||||
fn distance(&self, other: &Self) -> f32 {
|
||||
let dist = 1.0 - dot_product_similarity(&self.0, &other.0);
|
||||
debug_assert!(!dist.is_nan());
|
||||
dist
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the dot product similarity score that will between 0.0 and 1.0
|
||||
/// if both vectors are normalized. The higher the more similar the vectors are.
|
||||
pub fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
a.iter().zip(b).map(|(a, b)| a * b).sum()
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
mod builder;
|
||||
mod enriched;
|
||||
mod primary_key;
|
||||
mod reader;
|
||||
mod serde_impl;
|
||||
|
||||
@ -11,6 +12,7 @@ use bimap::BiHashMap;
|
||||
pub use builder::DocumentsBatchBuilder;
|
||||
pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
|
||||
use obkv::KvReader;
|
||||
pub use primary_key::{DocumentIdExtractionError, FieldIdMapper, PrimaryKey, DEFAULT_PRIMARY_KEY};
|
||||
pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@ -87,6 +89,12 @@ impl DocumentsBatchIndex {
|
||||
}
|
||||
}
|
||||
|
||||
impl FieldIdMapper for DocumentsBatchIndex {
|
||||
fn id(&self, name: &str) -> Option<FieldId> {
|
||||
self.id(name)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error("Error parsing number {value:?} at line {line}: {error}")]
|
||||
|
172
milli/src/documents/primary_key.rs
Normal file
172
milli/src/documents/primary_key.rs
Normal file
@ -0,0 +1,172 @@
|
||||
use std::iter;
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::{FieldId, InternalError, Object, Result, UserError};
|
||||
|
||||
/// The symbol used to define levels in a nested primary key.
|
||||
const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';
|
||||
|
||||
/// The default primary that is used when not specified.
|
||||
pub const DEFAULT_PRIMARY_KEY: &str = "id";
|
||||
|
||||
/// Trait for objects that can map the name of a field to its [`FieldId`].
|
||||
pub trait FieldIdMapper {
|
||||
/// Attempts to map the passed name to its [`FieldId`].
|
||||
///
|
||||
/// `None` if the field with this name was not found.
|
||||
fn id(&self, name: &str) -> Option<FieldId>;
|
||||
}
|
||||
|
||||
/// A type that represent the type of primary key that has been set
|
||||
/// for this index, a classic flat one or a nested one.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum PrimaryKey<'a> {
|
||||
Flat { name: &'a str, field_id: FieldId },
|
||||
Nested { name: &'a str },
|
||||
}
|
||||
|
||||
pub enum DocumentIdExtractionError {
|
||||
InvalidDocumentId(UserError),
|
||||
MissingDocumentId,
|
||||
TooManyDocumentIds(usize),
|
||||
}
|
||||
|
||||
impl<'a> PrimaryKey<'a> {
|
||||
pub fn new(path: &'a str, fields: &impl FieldIdMapper) -> Option<Self> {
|
||||
Some(if path.contains(PRIMARY_KEY_SPLIT_SYMBOL) {
|
||||
Self::Nested { name: path }
|
||||
} else {
|
||||
let field_id = fields.id(path)?;
|
||||
Self::Flat { name: path, field_id }
|
||||
})
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &str {
|
||||
match self {
|
||||
PrimaryKey::Flat { name, .. } => name,
|
||||
PrimaryKey::Nested { name } => name,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn document_id(
|
||||
&self,
|
||||
document: &obkv::KvReader<FieldId>,
|
||||
fields: &impl FieldIdMapper,
|
||||
) -> Result<StdResult<String, DocumentIdExtractionError>> {
|
||||
match self {
|
||||
PrimaryKey::Flat { name: _, field_id } => match document.get(*field_id) {
|
||||
Some(document_id_bytes) => {
|
||||
let document_id = serde_json::from_slice(document_id_bytes)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
match validate_document_id_value(document_id)? {
|
||||
Ok(document_id) => Ok(Ok(document_id)),
|
||||
Err(user_error) => {
|
||||
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
|
||||
}
|
||||
}
|
||||
}
|
||||
None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
|
||||
},
|
||||
nested @ PrimaryKey::Nested { .. } => {
|
||||
let mut matching_documents_ids = Vec::new();
|
||||
for (first_level_name, right) in nested.possible_level_names() {
|
||||
if let Some(field_id) = fields.id(first_level_name) {
|
||||
if let Some(value_bytes) = document.get(field_id) {
|
||||
let object = serde_json::from_slice(value_bytes)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
fetch_matching_values(object, right, &mut matching_documents_ids);
|
||||
|
||||
if matching_documents_ids.len() >= 2 {
|
||||
return Ok(Err(DocumentIdExtractionError::TooManyDocumentIds(
|
||||
matching_documents_ids.len(),
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match matching_documents_ids.pop() {
|
||||
Some(document_id) => match validate_document_id_value(document_id)? {
|
||||
Ok(document_id) => Ok(Ok(document_id)),
|
||||
Err(user_error) => {
|
||||
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
|
||||
}
|
||||
},
|
||||
None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an `Iterator` that gives all the possible fields names the primary key
|
||||
/// can have depending of the first level name and depth of the objects.
|
||||
pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
|
||||
let name = self.name();
|
||||
name.match_indices(PRIMARY_KEY_SPLIT_SYMBOL)
|
||||
.map(move |(i, _)| (&name[..i], &name[i + PRIMARY_KEY_SPLIT_SYMBOL.len_utf8()..]))
|
||||
.chain(iter::once((name, "")))
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
|
||||
match value {
|
||||
Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
|
||||
otherwise => output.push(otherwise),
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_matching_values_in_object(
|
||||
object: Object,
|
||||
selector: &str,
|
||||
base_key: &str,
|
||||
output: &mut Vec<Value>,
|
||||
) {
|
||||
for (key, value) in object {
|
||||
let base_key = if base_key.is_empty() {
|
||||
key.to_string()
|
||||
} else {
|
||||
format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
|
||||
};
|
||||
|
||||
if starts_with(selector, &base_key) {
|
||||
match value {
|
||||
Value::Object(object) => {
|
||||
fetch_matching_values_in_object(object, selector, &base_key, output)
|
||||
}
|
||||
value => output.push(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn starts_with(selector: &str, key: &str) -> bool {
|
||||
selector.strip_prefix(key).map_or(false, |tail| {
|
||||
tail.chars().next().map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL).unwrap_or(true)
|
||||
})
|
||||
}
|
||||
|
||||
// FIXME: move to a DocumentId struct
|
||||
|
||||
fn validate_document_id(document_id: &str) -> Option<&str> {
|
||||
if !document_id.is_empty()
|
||||
&& document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
|
||||
{
|
||||
Some(document_id)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
|
||||
match document_id {
|
||||
Value::String(string) => match validate_document_id(&string) {
|
||||
Some(s) if s.len() == string.len() => Ok(Ok(string)),
|
||||
Some(s) => Ok(Ok(s.to_string())),
|
||||
None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
|
||||
},
|
||||
Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
|
||||
content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
|
||||
}
|
||||
}
|
@ -61,6 +61,10 @@ pub enum InternalError {
|
||||
AbortedIndexation,
|
||||
#[error("The matching words list contains at least one invalid member.")]
|
||||
InvalidMatchingWords,
|
||||
#[error(transparent)]
|
||||
ArroyError(#[from] arroy::Error),
|
||||
#[error(transparent)]
|
||||
VectorEmbeddingError(#[from] crate::vector::Error),
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
@ -89,8 +93,6 @@ pub enum FieldIdMapMissingEntry {
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum UserError {
|
||||
#[error("A soft deleted internal document id have been used: `{document_id}`.")]
|
||||
AccessingSoftDeletedDocument { document_id: DocumentId },
|
||||
#[error("A document cannot contain more than 65,535 fields.")]
|
||||
AttributeLimitReached,
|
||||
#[error(transparent)]
|
||||
@ -112,8 +114,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
InvalidGeoField(#[from] GeoError),
|
||||
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
|
||||
InvalidVectorDimensions { expected: usize, found: usize },
|
||||
#[error("The `_vectors` field in the document with the id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
|
||||
InvalidVectorsType { document_id: Value, value: Value },
|
||||
#[error("The `_vectors.{subfield}` field in the document with id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
|
||||
InvalidVectorsType { document_id: Value, value: Value, subfield: String },
|
||||
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
|
||||
InvalidVectorsMapType { document_id: Value, value: Value },
|
||||
#[error("{0}")]
|
||||
InvalidFilter(String),
|
||||
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
|
||||
@ -154,7 +158,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
valid_fields: BTreeSet<String>,
|
||||
hidden_fields: bool,
|
||||
},
|
||||
#[error("{}", HeedError::BadOpenOptions)]
|
||||
#[error("an environment is already opened with different options")]
|
||||
InvalidLmdbOpenOptions,
|
||||
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
|
||||
SortRankingRuleMissing,
|
||||
@ -182,6 +186,49 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
UnknownInternalDocumentId { document_id: DocumentId },
|
||||
#[error("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {0}` and twoTypos: {1}`.")]
|
||||
InvalidMinTypoWordLenSetting(u8, u8),
|
||||
#[error(transparent)]
|
||||
VectorEmbeddingError(#[from] crate::vector::Error),
|
||||
#[error(transparent)]
|
||||
MissingDocumentField(#[from] crate::prompt::error::RenderPromptError),
|
||||
#[error(transparent)]
|
||||
InvalidPrompt(#[from] crate::prompt::error::NewPromptError),
|
||||
#[error("Invalid prompt in for embeddings with name '{0}': {1}.")]
|
||||
InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError),
|
||||
#[error("Too many embedders in the configuration. Found {0}, but limited to 256.")]
|
||||
TooManyEmbedders(usize),
|
||||
#[error("Cannot find embedder with name {0}.")]
|
||||
InvalidEmbedder(String),
|
||||
#[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")]
|
||||
TooManyVectors(String, usize),
|
||||
}
|
||||
|
||||
impl From<crate::vector::Error> for Error {
|
||||
fn from(value: crate::vector::Error) -> Self {
|
||||
match value.fault() {
|
||||
FaultSource::User => Error::UserError(value.into()),
|
||||
FaultSource::Runtime => Error::InternalError(value.into()),
|
||||
FaultSource::Bug => Error::InternalError(value.into()),
|
||||
FaultSource::Undecided => Error::InternalError(value.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<arroy::Error> for Error {
|
||||
fn from(value: arroy::Error) -> Self {
|
||||
match value {
|
||||
arroy::Error::Heed(heed) => heed.into(),
|
||||
arroy::Error::Io(io) => io.into(),
|
||||
arroy::Error::InvalidVecDimension { expected, received } => {
|
||||
Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
|
||||
}
|
||||
arroy::Error::DatabaseFull
|
||||
| arroy::Error::InvalidItemAppend
|
||||
| arroy::Error::UnmatchingDistance { .. }
|
||||
| arroy::Error::MissingMetadata => {
|
||||
Error::InternalError(InternalError::ArroyError(value))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
@ -328,15 +375,36 @@ impl From<HeedError> for Error {
|
||||
HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached),
|
||||
HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile),
|
||||
HeedError::Mdb(error) => InternalError(Store(error)),
|
||||
HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })),
|
||||
HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
|
||||
// TODO use the encoding
|
||||
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
|
||||
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
|
||||
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
|
||||
HeedError::DatabaseClosing => InternalError(DatabaseClosing),
|
||||
HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions),
|
||||
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum FaultSource {
|
||||
User,
|
||||
Runtime,
|
||||
Bug,
|
||||
Undecided,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for FaultSource {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let s = match self {
|
||||
FaultSource::User => "user error",
|
||||
FaultSource::Runtime => "runtime error",
|
||||
FaultSource::Bug => "coding error",
|
||||
FaultSource::Undecided => "error",
|
||||
};
|
||||
f.write_str(s)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn conditionally_lookup_for_error_message() {
|
||||
let prefix = "Attribute `name` is not sortable.";
|
||||
|
@ -1,159 +1,75 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::{fmt, str};
|
||||
|
||||
use fst::map::IndexedValue;
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use roaring::RoaringBitmap;
|
||||
use heed::types::Str;
|
||||
use heed::{Database, RoIter, RoTxn, RwTxn};
|
||||
|
||||
const DELETED_ID: u64 = u64::MAX;
|
||||
use crate::{DocumentId, BEU32};
|
||||
|
||||
pub struct ExternalDocumentsIds<'a> {
|
||||
pub(crate) hard: fst::Map<Cow<'a, [u8]>>,
|
||||
pub(crate) soft: fst::Map<Cow<'a, [u8]>>,
|
||||
soft_deleted_docids: RoaringBitmap,
|
||||
pub enum DocumentOperationKind {
|
||||
Create,
|
||||
Delete,
|
||||
}
|
||||
|
||||
impl<'a> ExternalDocumentsIds<'a> {
|
||||
pub fn new(
|
||||
hard: fst::Map<Cow<'a, [u8]>>,
|
||||
soft: fst::Map<Cow<'a, [u8]>>,
|
||||
soft_deleted_docids: RoaringBitmap,
|
||||
) -> ExternalDocumentsIds<'a> {
|
||||
ExternalDocumentsIds { hard, soft, soft_deleted_docids }
|
||||
}
|
||||
pub struct DocumentOperation {
|
||||
pub external_id: String,
|
||||
pub internal_id: DocumentId,
|
||||
pub kind: DocumentOperationKind,
|
||||
}
|
||||
|
||||
pub fn into_static(self) -> ExternalDocumentsIds<'static> {
|
||||
ExternalDocumentsIds {
|
||||
hard: self.hard.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
|
||||
soft: self.soft.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
|
||||
soft_deleted_docids: self.soft_deleted_docids,
|
||||
}
|
||||
pub struct ExternalDocumentsIds(Database<Str, BEU32>);
|
||||
|
||||
impl ExternalDocumentsIds {
|
||||
pub fn new(db: Database<Str, BEU32>) -> ExternalDocumentsIds {
|
||||
ExternalDocumentsIds(db)
|
||||
}
|
||||
|
||||
/// Returns `true` if hard and soft external documents lists are empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.hard.is_empty() && self.soft.is_empty()
|
||||
pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
|
||||
self.0.is_empty(rtxn).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
|
||||
let external_id = external_id.as_ref();
|
||||
match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
|
||||
Some(id) if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) => {
|
||||
Some(id.try_into().unwrap())
|
||||
}
|
||||
_otherwise => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Rebuild the internal FSTs in the ExternalDocumentsIds structure such that they
|
||||
/// don't contain any soft deleted document id.
|
||||
pub fn delete_soft_deleted_documents_ids_from_fsts(&mut self) -> fst::Result<()> {
|
||||
let mut new_hard_builder = fst::MapBuilder::memory();
|
||||
|
||||
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||
let mut iter = union_op.into_stream();
|
||||
while let Some((external_id, docids)) = iter.next() {
|
||||
// prefer selecting the ids from soft, always
|
||||
let id = indexed_last_value(docids).unwrap();
|
||||
if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) {
|
||||
new_hard_builder.insert(external_id, id)?;
|
||||
}
|
||||
}
|
||||
drop(iter);
|
||||
|
||||
// Delete soft map completely
|
||||
self.soft = fst::Map::default().map_data(Cow::Owned)?;
|
||||
// We save the new map as the new hard map.
|
||||
self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn insert_ids<A: AsRef<[u8]>>(&mut self, other: &fst::Map<A>) -> fst::Result<()> {
|
||||
let union_op = self.soft.op().add(other).r#union();
|
||||
|
||||
let mut new_soft_builder = fst::MapBuilder::memory();
|
||||
let mut iter = union_op.into_stream();
|
||||
while let Some((external_id, marked_docids)) = iter.next() {
|
||||
let id = indexed_last_value(marked_docids).unwrap();
|
||||
new_soft_builder.insert(external_id, id)?;
|
||||
}
|
||||
|
||||
drop(iter);
|
||||
|
||||
// We save the new map as the new soft map.
|
||||
self.soft = new_soft_builder.into_map().map_data(Cow::Owned)?;
|
||||
self.merge_soft_into_hard()
|
||||
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
|
||||
self.0.get(rtxn, external_id.as_ref())
|
||||
}
|
||||
|
||||
/// An helper function to debug this type, returns an `HashMap` of both,
|
||||
/// soft and hard fst maps, combined.
|
||||
pub fn to_hash_map(&self) -> HashMap<String, u32> {
|
||||
let mut map = HashMap::new();
|
||||
|
||||
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||
let mut iter = union_op.into_stream();
|
||||
while let Some((external_id, marked_docids)) = iter.next() {
|
||||
let id = indexed_last_value(marked_docids).unwrap();
|
||||
if id != DELETED_ID {
|
||||
let external_id = str::from_utf8(external_id).unwrap();
|
||||
map.insert(external_id.to_owned(), id.try_into().unwrap());
|
||||
}
|
||||
pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
|
||||
let mut map = HashMap::default();
|
||||
for result in self.0.iter(rtxn)? {
|
||||
let (external, internal) = result?;
|
||||
map.insert(external.to_owned(), internal);
|
||||
}
|
||||
|
||||
map
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
/// Return an fst of the combined hard and soft deleted ID.
|
||||
pub fn to_fst<'b>(&'b self) -> fst::Result<Cow<'b, fst::Map<Cow<'a, [u8]>>>> {
|
||||
if self.soft.is_empty() {
|
||||
return Ok(Cow::Borrowed(&self.hard));
|
||||
}
|
||||
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||
|
||||
let mut iter = union_op.into_stream();
|
||||
let mut new_hard_builder = fst::MapBuilder::memory();
|
||||
while let Some((external_id, marked_docids)) = iter.next() {
|
||||
let value = indexed_last_value(marked_docids).unwrap();
|
||||
if value != DELETED_ID {
|
||||
new_hard_builder.insert(external_id, value)?;
|
||||
/// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
|
||||
///
|
||||
/// If the list contains multiple operations on the same external id, then the result is unspecified.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - If attempting to delete a document that doesn't exist
|
||||
/// - If attempting to create a document that already exists
|
||||
pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
|
||||
for DocumentOperation { external_id, internal_id, kind } in operations {
|
||||
match kind {
|
||||
DocumentOperationKind::Create => {
|
||||
self.0.put(wtxn, &external_id, &internal_id)?;
|
||||
}
|
||||
DocumentOperationKind::Delete => {
|
||||
if !self.0.delete(wtxn, &external_id)? {
|
||||
panic!("Attempting to delete a non-existing document")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drop(iter);
|
||||
|
||||
Ok(Cow::Owned(new_hard_builder.into_map().map_data(Cow::Owned)?))
|
||||
}
|
||||
|
||||
fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
|
||||
if self.soft.len() >= self.hard.len() / 2 {
|
||||
self.hard = self.to_fst()?.into_owned();
|
||||
self.soft = fst::Map::default().map_data(Cow::Owned)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ExternalDocumentsIds<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_tuple("ExternalDocumentsIds").field(&self.to_hash_map()).finish()
|
||||
/// Returns an iterator over all the external ids.
|
||||
pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> {
|
||||
self.0.iter(rtxn)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ExternalDocumentsIds<'static> {
|
||||
fn default() -> Self {
|
||||
ExternalDocumentsIds {
|
||||
hard: fst::Map::default().map_data(Cow::Owned).unwrap(),
|
||||
soft: fst::Map::default().map_data(Cow::Owned).unwrap(),
|
||||
soft_deleted_docids: RoaringBitmap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the value of the `IndexedValue` with the highest _index_.
|
||||
fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option<u64> {
|
||||
indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value)
|
||||
}
|
||||
|
@ -81,6 +81,12 @@ impl Default for FieldsIdsMap {
|
||||
}
|
||||
}
|
||||
|
||||
impl crate::documents::FieldIdMapper for FieldsIdsMap {
|
||||
fn id(&self, name: &str) -> Option<FieldId> {
|
||||
self.id(name)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -2,26 +2,28 @@ use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
use heed::BoxedError;
|
||||
|
||||
pub struct BEU16StrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
|
||||
type DItem = (u16, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (n_bytes, str_bytes) = bytes.split_at(2);
|
||||
let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?;
|
||||
let s = str::from_utf8(str_bytes).ok()?;
|
||||
Some((n, s))
|
||||
let n = n_bytes.try_into().map(u16::from_be_bytes)?;
|
||||
let s = str::from_utf8(str_bytes)?;
|
||||
Ok((n, s))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
|
||||
type EItem = (u16, &'a str);
|
||||
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s.len() + 2);
|
||||
bytes.extend_from_slice(&n.to_be_bytes());
|
||||
bytes.extend_from_slice(s.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -2,26 +2,28 @@ use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
use heed::BoxedError;
|
||||
|
||||
pub struct BEU32StrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
|
||||
type DItem = (u32, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (n_bytes, str_bytes) = bytes.split_at(4);
|
||||
let n = n_bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
let s = str::from_utf8(str_bytes).ok()?;
|
||||
Some((n, s))
|
||||
let n = n_bytes.try_into().map(u32::from_be_bytes)?;
|
||||
let s = str::from_utf8(str_bytes)?;
|
||||
Ok((n, s))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
|
||||
type EItem = (u32, &'a str);
|
||||
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s.len() + 4);
|
||||
bytes.extend_from_slice(&n.to_be_bytes());
|
||||
bytes.extend_from_slice(s.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -1,23 +1,23 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
|
||||
/// A codec for values of type `&[u8]`. Unlike `Bytes`, its `EItem` and `DItem` associated
|
||||
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
|
||||
pub struct ByteSliceRefCodec;
|
||||
pub struct BytesRefCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for ByteSliceRefCodec {
|
||||
impl<'a> BytesEncode<'a> for BytesRefCodec {
|
||||
type EItem = &'a [u8];
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item))
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for ByteSliceRefCodec {
|
||||
impl<'a> BytesDecode<'a> for BytesRefCodec {
|
||||
type DItem = &'a [u8];
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(bytes)
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Ok(bytes)
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,9 @@
|
||||
use std::borrow::Cow;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
|
||||
use crate::heed_codec::SliceTooShortError;
|
||||
use crate::{try_split_array_at, DocumentId, FieldId};
|
||||
|
||||
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
|
||||
@ -13,16 +14,16 @@ where
|
||||
{
|
||||
type DItem = (FieldId, DocumentId, C::DItem);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
|
||||
let field_id = u16::from_be_bytes(field_id_bytes);
|
||||
|
||||
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||
let (document_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
|
||||
let value = C::bytes_decode(bytes)?;
|
||||
|
||||
Some((field_id, document_id, value))
|
||||
Ok((field_id, document_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
@ -32,13 +33,15 @@ where
|
||||
{
|
||||
type EItem = (FieldId, DocumentId, C::EItem);
|
||||
|
||||
fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode(
|
||||
(field_id, document_id, value): &'a Self::EItem,
|
||||
) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(32);
|
||||
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
|
||||
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes
|
||||
let value_bytes = C::bytes_encode(value)?;
|
||||
// variable length, if f64 -> 16 bytes, if string -> large, potentially
|
||||
bytes.extend_from_slice(&value_bytes);
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -5,8 +5,8 @@ use std::borrow::Cow;
|
||||
use std::convert::TryFrom;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use heed::types::{DecodeIgnore, OwnedType};
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::types::DecodeIgnore;
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
|
||||
@ -18,7 +18,7 @@ pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
|
||||
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
|
||||
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
|
||||
|
||||
pub type FieldIdCodec = OwnedType<BEU16>;
|
||||
pub type FieldIdCodec = BEU16;
|
||||
|
||||
/// Tries to split a slice in half at the given middle point,
|
||||
/// `None` if the slice is too short.
|
||||
@ -58,7 +58,7 @@ where
|
||||
{
|
||||
type EItem = FacetGroupKey<T::EItem>;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut v = vec![];
|
||||
v.extend_from_slice(&value.field_id.to_be_bytes());
|
||||
v.extend_from_slice(&[value.level]);
|
||||
@ -66,7 +66,7 @@ where
|
||||
let bound = T::bytes_encode(&value.left_bound)?;
|
||||
v.extend_from_slice(&bound);
|
||||
|
||||
Some(Cow::Owned(v))
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
|
||||
@ -75,11 +75,11 @@ where
|
||||
{
|
||||
type DItem = FacetGroupKey<T::DItem>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?);
|
||||
let level = bytes[2];
|
||||
let bound = T::bytes_decode(&bytes[3..])?;
|
||||
Some(FacetGroupKey { field_id: fid, level, left_bound: bound })
|
||||
Ok(FacetGroupKey { field_id: fid, level, left_bound: bound })
|
||||
}
|
||||
}
|
||||
|
||||
@ -87,17 +87,17 @@ pub struct FacetGroupValueCodec;
|
||||
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
type EItem = FacetGroupValue;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut v = vec![value.size];
|
||||
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
||||
Some(Cow::Owned(v))
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
type DItem = FacetGroupValue;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let size = bytes[0];
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
|
||||
Some(FacetGroupValue { size, bitmap })
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
|
||||
Ok(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
@ -1,37 +1,45 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::heed_codec::SliceTooShortError;
|
||||
|
||||
pub struct OrderedF64Codec;
|
||||
|
||||
impl<'a> BytesDecode<'a> for OrderedF64Codec {
|
||||
type DItem = f64;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
if bytes.len() < 16 {
|
||||
return None;
|
||||
Err(SliceTooShortError.into())
|
||||
} else {
|
||||
bytes[8..].try_into().map(f64::from_be_bytes).map_err(Into::into)
|
||||
}
|
||||
let f = bytes[8..].try_into().ok().map(f64::from_be_bytes)?;
|
||||
Some(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for OrderedF64Codec {
|
||||
type EItem = f64;
|
||||
|
||||
fn bytes_encode(f: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut buffer = [0u8; 16];
|
||||
|
||||
// write the globally ordered float
|
||||
let bytes = f64_into_bytes(*f)?;
|
||||
let bytes = f64_into_bytes(*f).ok_or(InvalidGloballyOrderedFloatError { float: *f })?;
|
||||
buffer[..8].copy_from_slice(&bytes[..]);
|
||||
// Then the f64 value just to be able to read it back
|
||||
let bytes = f.to_be_bytes();
|
||||
buffer[8..16].copy_from_slice(&bytes[..]);
|
||||
|
||||
Some(Cow::Owned(buffer.to_vec()))
|
||||
Ok(Cow::Owned(buffer.to_vec()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("the float {float} cannot be converted to a globally ordered representation")]
|
||||
pub struct InvalidGloballyOrderedFloatError {
|
||||
float: f64,
|
||||
}
|
||||
|
@ -1,5 +1,8 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::BoxedError;
|
||||
|
||||
use super::SliceTooShortError;
|
||||
use crate::{try_split_array_at, FieldId};
|
||||
|
||||
pub struct FieldIdWordCountCodec;
|
||||
@ -7,21 +10,21 @@ pub struct FieldIdWordCountCodec;
|
||||
impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
|
||||
type DItem = (FieldId, u8);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
|
||||
let field_id = u16::from_be_bytes(field_id_bytes);
|
||||
let ([word_count], _nothing) = try_split_array_at(bytes)?;
|
||||
Some((field_id, word_count))
|
||||
let ([word_count], _nothing) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
|
||||
Ok((field_id, word_count))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
|
||||
type EItem = (FieldId, u8);
|
||||
|
||||
fn bytes_encode((field_id, word_count): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(2 + 1);
|
||||
bytes.extend_from_slice(&field_id.to_be_bytes());
|
||||
bytes.push(*word_count);
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use fst::Set;
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `Set<&[u8]>`.
|
||||
pub struct FstSetCodec;
|
||||
@ -9,15 +9,15 @@ pub struct FstSetCodec;
|
||||
impl<'a> BytesEncode<'a> for FstSetCodec {
|
||||
type EItem = Set<Vec<u8>>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item.as_fst().as_bytes()))
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item.as_fst().as_bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for FstSetCodec {
|
||||
type DItem = Set<&'a [u8]>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Set::new(bytes).ok()
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Set::new(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
@ -12,8 +12,10 @@ mod str_beu32_codec;
|
||||
mod str_ref;
|
||||
mod str_str_u8_codec;
|
||||
|
||||
pub use byte_slice_ref::ByteSliceRefCodec;
|
||||
pub use byte_slice_ref::BytesRefCodec;
|
||||
use heed::BoxedError;
|
||||
pub use str_ref::StrRefCodec;
|
||||
use thiserror::Error;
|
||||
|
||||
pub use self::beu16_str_codec::BEU16StrCodec;
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
@ -31,5 +33,9 @@ pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||
pub trait BytesDecodeOwned {
|
||||
type DItem;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>;
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError>;
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("the slice is too short")]
|
||||
pub struct SliceTooShortError;
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::BoxedError;
|
||||
use obkv::{KvReaderU16, KvWriterU16};
|
||||
|
||||
pub struct ObkvCodec;
|
||||
@ -7,15 +8,15 @@ pub struct ObkvCodec;
|
||||
impl<'a> heed::BytesDecode<'a> for ObkvCodec {
|
||||
type DItem = KvReaderU16<'a>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(KvReaderU16::new(bytes))
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Ok(KvReaderU16::new(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for ObkvCodec {
|
||||
type EItem = KvWriterU16<Vec<u8>>;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
item.clone().into_inner().map(Cow::Owned).ok()
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
item.clone().into_inner().map(Cow::Owned).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::mem::size_of;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
@ -19,22 +19,22 @@ impl BoRoaringBitmapCodec {
|
||||
impl BytesDecode<'_> for BoRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
|
||||
for chunk in bytes.chunks(size_of::<u32>()) {
|
||||
let bytes = chunk.try_into().ok()?;
|
||||
let bytes = chunk.try_into()?;
|
||||
bitmap.push(u32::from_ne_bytes(bytes));
|
||||
}
|
||||
|
||||
Some(bitmap)
|
||||
Ok(bitmap)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for BoRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
@ -42,9 +42,9 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec {
|
||||
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut out = Vec::new();
|
||||
BoRoaringBitmapCodec::serialize_into(item, &mut out);
|
||||
Some(Cow::Owned(out))
|
||||
Ok(Cow::Owned(out))
|
||||
}
|
||||
}
|
||||
|
@ -3,9 +3,11 @@ use std::io;
|
||||
use std::mem::size_of;
|
||||
|
||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
use heed::BoxedError;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
|
||||
/// This is the limit where using a byteorder became less size efficient
|
||||
/// than using a direct roaring encoding, it is also the point where we are able
|
||||
@ -60,12 +62,16 @@ impl CboRoaringBitmapCodec {
|
||||
/// if the merged values length is under the threshold, values are directly
|
||||
/// serialized in the buffer else a RoaringBitmap is created from the
|
||||
/// values and is serialized in the buffer.
|
||||
pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
|
||||
pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = A>,
|
||||
A: AsRef<[u8]>,
|
||||
{
|
||||
let mut roaring = RoaringBitmap::new();
|
||||
let mut vec = Vec::new();
|
||||
|
||||
for bytes in slices {
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
|
||||
let mut reader = bytes.as_ref();
|
||||
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
||||
vec.push(integer);
|
||||
@ -85,7 +91,7 @@ impl CboRoaringBitmapCodec {
|
||||
}
|
||||
} else {
|
||||
// We can unwrap safely because the vector is sorted upper.
|
||||
let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter()).unwrap();
|
||||
let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
|
||||
roaring.serialize_into(buffer)?;
|
||||
}
|
||||
} else {
|
||||
@ -95,31 +101,58 @@ impl CboRoaringBitmapCodec {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Merges a DelAdd delta into a CboRoaringBitmap.
|
||||
pub fn merge_deladd_into<'a>(
|
||||
deladd: KvReaderDelAdd<'_>,
|
||||
previous: &[u8],
|
||||
buffer: &'a mut Vec<u8>,
|
||||
) -> io::Result<Option<&'a [u8]>> {
|
||||
// Deserialize the bitmap that is already there
|
||||
let mut previous = Self::deserialize_from(previous)?;
|
||||
|
||||
// Remove integers we no more want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Deletion) {
|
||||
previous -= Self::deserialize_from(value)?;
|
||||
}
|
||||
|
||||
// Insert the new integers we want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Addition) {
|
||||
previous |= Self::deserialize_from(value)?;
|
||||
}
|
||||
|
||||
if previous.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Self::serialize_into(&previous, buffer);
|
||||
Ok(Some(&buffer[..]))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::deserialize_from(bytes).ok()
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for CboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::deserialize_from(bytes).ok()
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut vec = Vec::with_capacity(Self::serialized_size(item));
|
||||
Self::serialize_into(item, &mut vec);
|
||||
Some(Cow::Owned(vec))
|
||||
Ok(Cow::Owned(vec))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::BoxedError;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
@ -9,25 +10,25 @@ pub struct RoaringBitmapCodec;
|
||||
impl heed::BytesDecode<'_> for RoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmap::deserialize_unchecked_from(bytes).ok()
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
RoaringBitmap::deserialize_unchecked_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for RoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmap::deserialize_from(bytes).ok()
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
RoaringBitmap::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(item.serialized_size());
|
||||
item.serialize_into(&mut bytes).ok()?;
|
||||
Some(Cow::Owned(bytes))
|
||||
item.serialize_into(&mut bytes)?;
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::mem;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
@ -9,15 +9,15 @@ pub struct BoRoaringBitmapLenCodec;
|
||||
impl BytesDecode<'_> for BoRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Some((bytes.len() / mem::size_of::<u32>()) as u64)
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Ok((bytes.len() / mem::size_of::<u32>()) as u64)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for BoRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::mem;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
|
||||
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
|
||||
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
|
||||
@ -11,7 +11,7 @@ pub struct CboRoaringBitmapLenCodec;
|
||||
impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
if bytes.len() <= THRESHOLD * mem::size_of::<u32>() {
|
||||
// If there is threshold or less than threshold integers that can fit into this array
|
||||
// of bytes it means that we used the ByteOrder codec serializer.
|
||||
@ -27,7 +27,7 @@ impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
|
||||
impl BytesDecodeOwned for CboRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ use std::io::{self, BufRead, Read};
|
||||
use std::mem;
|
||||
|
||||
use byteorder::{LittleEndian, ReadBytesExt};
|
||||
use heed::BoxedError;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
@ -56,16 +57,16 @@ impl RoaringBitmapLenCodec {
|
||||
impl heed::BytesDecode<'_> for RoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for RoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,30 +1,31 @@
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::CStr;
|
||||
use std::str;
|
||||
|
||||
use charabia::{Language, Script};
|
||||
use heed::BoxedError;
|
||||
|
||||
pub struct ScriptLanguageCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
|
||||
type DItem = (Script, Language);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let sep = bytes.iter().position(|b| *b == 0)?;
|
||||
let (s_bytes, l_bytes) = bytes.split_at(sep);
|
||||
let script = str::from_utf8(s_bytes).ok()?;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let cstr = CStr::from_bytes_until_nul(bytes)?;
|
||||
let script = cstr.to_str()?;
|
||||
let script_name = Script::from_name(script);
|
||||
let lan = str::from_utf8(l_bytes).ok()?;
|
||||
// skip '\0' byte between the two strings.
|
||||
let lan_name = Language::from_name(&lan[1..]);
|
||||
let lan = str::from_utf8(&bytes[script.len() + 1..])?;
|
||||
let lan_name = Language::from_name(lan);
|
||||
|
||||
Some((script_name, lan_name))
|
||||
Ok((script_name, lan_name))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
|
||||
type EItem = (Script, Language);
|
||||
|
||||
fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let script_name = script.name().as_bytes();
|
||||
let lan_name = lan.name().as_bytes();
|
||||
|
||||
@ -33,6 +34,6 @@ impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(lan_name);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -3,37 +3,41 @@ use std::convert::TryInto;
|
||||
use std::mem::size_of;
|
||||
use std::str;
|
||||
|
||||
use heed::BoxedError;
|
||||
|
||||
use super::SliceTooShortError;
|
||||
|
||||
pub struct StrBEU32Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrBEU32Codec {
|
||||
type DItem = (&'a str, u32);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let footer_len = size_of::<u32>();
|
||||
|
||||
if bytes.len() < footer_len {
|
||||
return None;
|
||||
return Err(SliceTooShortError.into());
|
||||
}
|
||||
|
||||
let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let word = str::from_utf8(word).ok()?;
|
||||
let pos = bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
let word = str::from_utf8(word)?;
|
||||
let pos = bytes.try_into().map(u32::from_be_bytes)?;
|
||||
|
||||
Some((word, pos))
|
||||
Ok((word, pos))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
|
||||
type EItem = (&'a str, u32);
|
||||
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + pos.len());
|
||||
bytes.extend_from_slice(word.as_bytes());
|
||||
bytes.extend_from_slice(&pos[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
@ -42,26 +46,27 @@ pub struct StrBEU16Codec;
|
||||
impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
|
||||
type DItem = (&'a str, u16);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let footer_len = size_of::<u16>();
|
||||
|
||||
if bytes.len() < footer_len + 1 {
|
||||
return None;
|
||||
return Err(SliceTooShortError.into());
|
||||
}
|
||||
|
||||
let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let (_, word) = word_plus_nul_byte.split_last()?;
|
||||
let word = str::from_utf8(word).ok()?;
|
||||
let pos = bytes.try_into().map(u16::from_be_bytes).ok()?;
|
||||
// unwrap: we just checked the footer + 1 above.
|
||||
let (_, word) = word_plus_nul_byte.split_last().unwrap();
|
||||
let word = str::from_utf8(word)?;
|
||||
let pos = bytes.try_into().map(u16::from_be_bytes)?;
|
||||
|
||||
Some((word, pos))
|
||||
Ok((word, pos))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
|
||||
type EItem = (&'a str, u16);
|
||||
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
|
||||
@ -69,6 +74,6 @@ impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(&pos[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated
|
||||
/// types are equivalent (= `&'a str`) and these values can reside within another structure.
|
||||
@ -8,15 +8,14 @@ pub struct StrRefCodec;
|
||||
impl<'a> BytesEncode<'a> for StrRefCodec {
|
||||
type EItem = &'a str;
|
||||
|
||||
fn bytes_encode(item: &'a &'a str) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item.as_bytes()))
|
||||
fn bytes_encode(item: &'a &'a str) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item.as_bytes()))
|
||||
}
|
||||
}
|
||||
impl<'a> BytesDecode<'a> for StrRefCodec {
|
||||
type DItem = &'a str;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let s = std::str::from_utf8(bytes).ok()?;
|
||||
Some(s)
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
std::str::from_utf8(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
@ -1,32 +1,36 @@
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::CStr;
|
||||
use std::str;
|
||||
|
||||
use heed::BoxedError;
|
||||
|
||||
use super::SliceTooShortError;
|
||||
|
||||
pub struct U8StrStrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
|
||||
type DItem = (u8, &'a str, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (n, bytes) = bytes.split_first()?;
|
||||
let s1_end = bytes.iter().position(|b| *b == 0)?;
|
||||
let (s1_bytes, rest) = bytes.split_at(s1_end);
|
||||
let s2_bytes = &rest[1..];
|
||||
let s1 = str::from_utf8(s1_bytes).ok()?;
|
||||
let s2 = str::from_utf8(s2_bytes).ok()?;
|
||||
Some((*n, s1, s2))
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?;
|
||||
let cstr = CStr::from_bytes_until_nul(bytes)?;
|
||||
let s1 = cstr.to_str()?;
|
||||
// skip '\0' byte between the two strings.
|
||||
let s2 = str::from_utf8(&bytes[s1.len() + 1..])?;
|
||||
Ok((*n, s1, s2))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
|
||||
type EItem = (u8, &'a str, &'a str);
|
||||
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
|
||||
bytes.push(*n);
|
||||
bytes.extend_from_slice(s1.as_bytes());
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(s2.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
pub struct UncheckedU8StrStrCodec;
|
||||
@ -34,24 +38,25 @@ pub struct UncheckedU8StrStrCodec;
|
||||
impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
|
||||
type DItem = (u8, &'a [u8], &'a [u8]);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (n, bytes) = bytes.split_first()?;
|
||||
let s1_end = bytes.iter().position(|b| *b == 0)?;
|
||||
let (s1_bytes, rest) = bytes.split_at(s1_end);
|
||||
let s2_bytes = &rest[1..];
|
||||
Some((*n, s1_bytes, s2_bytes))
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?;
|
||||
let cstr = CStr::from_bytes_until_nul(bytes)?;
|
||||
let s1_bytes = cstr.to_bytes();
|
||||
// skip '\0' byte between the two strings.
|
||||
let s2_bytes = &bytes[s1_bytes.len() + 1..];
|
||||
Ok((*n, s1_bytes, s2_bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
|
||||
type EItem = (u8, &'a [u8], &'a [u8]);
|
||||
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
|
||||
bytes.push(*n);
|
||||
bytes.extend_from_slice(s1);
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(s2);
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
1170
milli/src/index.rs
1170
milli/src/index.rs
File diff suppressed because it is too large
Load Diff
@ -10,18 +10,18 @@ pub mod documents;
|
||||
|
||||
mod asc_desc;
|
||||
mod criterion;
|
||||
pub mod distance;
|
||||
mod error;
|
||||
mod external_documents_ids;
|
||||
pub mod facet;
|
||||
mod fields_ids_map;
|
||||
pub mod heed_codec;
|
||||
pub mod index;
|
||||
pub mod prompt;
|
||||
pub mod proximity;
|
||||
mod readable_slices;
|
||||
pub mod score_details;
|
||||
mod search;
|
||||
pub mod update;
|
||||
pub mod vector;
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
@ -32,13 +32,12 @@ use std::convert::{TryFrom, TryInto};
|
||||
use std::hash::BuildHasherDefault;
|
||||
|
||||
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
||||
pub use distance::dot_product_similarity;
|
||||
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
||||
use fxhash::{FxHasher32, FxHasher64};
|
||||
pub use grenad::CompressionType;
|
||||
pub use search::new::{
|
||||
execute_search, DefaultSearchLogger, GeoSortStrategy, SearchContext, SearchLogger,
|
||||
VisualSearchLogger,
|
||||
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, SearchContext,
|
||||
SearchLogger, VisualSearchLogger,
|
||||
};
|
||||
use serde_json::Value;
|
||||
pub use {charabia as tokenizer, heed};
|
||||
@ -66,9 +65,9 @@ pub use self::search::{
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
|
||||
pub type Attribute = u32;
|
||||
pub type BEU16 = heed::zerocopy::U16<heed::byteorder::BE>;
|
||||
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
|
||||
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
|
||||
pub type BEU16 = heed::types::U16<heed::byteorder::BE>;
|
||||
pub type BEU32 = heed::types::U32<heed::byteorder::BE>;
|
||||
pub type BEU64 = heed::types::U64<heed::byteorder::BE>;
|
||||
pub type DocumentId = u32;
|
||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
|
||||
|
97
milli/src/prompt/context.rs
Normal file
97
milli/src/prompt/context.rs
Normal file
@ -0,0 +1,97 @@
|
||||
use liquid::model::{
|
||||
ArrayView, DisplayCow, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue,
|
||||
};
|
||||
use liquid::{ObjectView, ValueView};
|
||||
|
||||
use super::document::Document;
|
||||
use super::fields::Fields;
|
||||
use crate::FieldsIdsMap;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Context<'a> {
|
||||
document: &'a Document<'a>,
|
||||
fields: Fields<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self {
|
||||
Self { document, fields: Fields::new(document, field_id_map) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ObjectView for Context<'a> {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
2
|
||||
}
|
||||
|
||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||
Box::new(["doc", "fields"].iter().map(|s| KStringCow::from_static(s)))
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(
|
||||
std::iter::once(self.document.as_value())
|
||||
.chain(std::iter::once(self.fields.as_value())),
|
||||
)
|
||||
}
|
||||
|
||||
fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
|
||||
Box::new(self.keys().zip(self.values()))
|
||||
}
|
||||
|
||||
fn contains_key(&self, index: &str) -> bool {
|
||||
index == "doc" || index == "fields"
|
||||
}
|
||||
|
||||
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
||||
match index {
|
||||
"doc" => Some(self.document.as_value()),
|
||||
"fields" => Some(self.fields.as_value()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ValueView for Context<'a> {
|
||||
fn as_debug(&self) -> &dyn std::fmt::Debug {
|
||||
self
|
||||
}
|
||||
|
||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectRender::new(self)))
|
||||
}
|
||||
|
||||
fn source(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectSource::new(self)))
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"object"
|
||||
}
|
||||
|
||||
fn query_state(&self, state: liquid::model::State) -> bool {
|
||||
match state {
|
||||
State::Truthy => true,
|
||||
State::DefaultValue | State::Empty | State::Blank => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
|
||||
let s = ObjectRender::new(self).to_string();
|
||||
KStringCow::from_string(s)
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
LiquidValue::Object(
|
||||
self.iter().map(|(k, x)| (k.to_string().into(), x.to_value())).collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
131
milli/src/prompt/document.rs
Normal file
131
milli/src/prompt/document.rs
Normal file
@ -0,0 +1,131 @@
|
||||
use std::cell::OnceCell;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use liquid::model::{
|
||||
DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue,
|
||||
};
|
||||
use liquid::{ObjectView, ValueView};
|
||||
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
use crate::FieldsIdsMap;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Document<'a>(BTreeMap<&'a str, (&'a [u8], ParsedValue)>);
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ParsedValue(std::cell::OnceCell<LiquidValue>);
|
||||
|
||||
impl ParsedValue {
|
||||
fn empty() -> ParsedValue {
|
||||
ParsedValue(OnceCell::new())
|
||||
}
|
||||
|
||||
fn get(&self, raw: &[u8]) -> &LiquidValue {
|
||||
self.0.get_or_init(|| {
|
||||
let value: serde_json::Value = serde_json::from_slice(raw).unwrap();
|
||||
liquid::model::to_value(&value).unwrap()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Document<'a> {
|
||||
pub fn new(
|
||||
data: obkv::KvReaderU16<'a>,
|
||||
side: DelAdd,
|
||||
inverted_field_map: &'a FieldsIdsMap,
|
||||
) -> Self {
|
||||
let mut out_data = BTreeMap::new();
|
||||
for (fid, raw) in data {
|
||||
let obkv = KvReaderDelAdd::new(raw);
|
||||
let Some(raw) = obkv.get(side) else {
|
||||
continue;
|
||||
};
|
||||
let Some(name) = inverted_field_map.name(fid) else {
|
||||
continue;
|
||||
};
|
||||
out_data.insert(name, (raw, ParsedValue::empty()));
|
||||
}
|
||||
Self(out_data)
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
fn iter(&self) -> impl Iterator<Item = (KString, LiquidValue)> + '_ {
|
||||
self.0.iter().map(|(&k, (raw, data))| (k.to_owned().into(), data.get(raw).to_owned()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ObjectView for Document<'a> {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
self.len() as i64
|
||||
}
|
||||
|
||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||
let keys = BTreeMap::keys(&self.0).map(|&s| s.into());
|
||||
Box::new(keys)
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(self.0.values().map(|(raw, v)| v.get(raw) as &dyn ValueView))
|
||||
}
|
||||
|
||||
fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
|
||||
Box::new(self.0.iter().map(|(&k, (raw, data))| (k.into(), data.get(raw) as &dyn ValueView)))
|
||||
}
|
||||
|
||||
fn contains_key(&self, index: &str) -> bool {
|
||||
self.0.contains_key(index)
|
||||
}
|
||||
|
||||
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
||||
self.0.get(index).map(|(raw, v)| v.get(raw) as &dyn ValueView)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ValueView for Document<'a> {
|
||||
fn as_debug(&self) -> &dyn std::fmt::Debug {
|
||||
self
|
||||
}
|
||||
|
||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectRender::new(self)))
|
||||
}
|
||||
|
||||
fn source(&self) -> liquid::model::DisplayCow<'_> {
|
||||
DisplayCow::Owned(Box::new(ObjectSource::new(self)))
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
"object"
|
||||
}
|
||||
|
||||
fn query_state(&self, state: liquid::model::State) -> bool {
|
||||
match state {
|
||||
State::Truthy => true,
|
||||
State::DefaultValue | State::Empty | State::Blank => self.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
|
||||
let s = ObjectRender::new(self).to_string();
|
||||
KStringCow::from_string(s)
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
LiquidValue::Object(self.iter().collect())
|
||||
}
|
||||
|
||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user