mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-30 01:35:36 +00:00
Compare commits
19 Commits
release-v1
...
document-b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b43edabbcd | ||
|
|
f2041fd78c | ||
|
|
2495058a6e | ||
|
|
4cfb48fbb6 | ||
|
|
67dc0268c5 | ||
|
|
48865470d7 | ||
|
|
c810df4d9f | ||
|
|
5e3df76699 | ||
|
|
02765fb267 | ||
|
|
841165d529 | ||
|
|
ea4a266f08 | ||
|
|
49f069ed97 | ||
|
|
be16b99d40 | ||
|
|
ec0c09d17c | ||
|
|
a9230f6e6c | ||
|
|
62ea81bef6 | ||
|
|
f28f09ae2f | ||
|
|
62cc97ba70 | ||
|
|
fed59cc1d5 |
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
needs: check-version
|
||||
steps:
|
||||
- name: Create PR to Homebrew
|
||||
uses: mislav/bump-homebrew-formula-action@v2
|
||||
uses: mislav/bump-homebrew-formula-action@v3
|
||||
with:
|
||||
formula-name: meilisearch
|
||||
formula-path: Formula/m/meilisearch.rb
|
||||
|
||||
2
.github/workflows/publish-docker-images.yml
vendored
2
.github/workflows/publish-docker-images.yml
vendored
@@ -63,7 +63,7 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
4
.github/workflows/sdks-tests.yml
vendored
4
.github/workflows/sdks-tests.yml
vendored
@@ -160,7 +160,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
cache: 'yarn'
|
||||
- name: Install dependencies
|
||||
@@ -318,7 +318,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js-plugins
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
cache: yarn
|
||||
- name: Install dependencies
|
||||
|
||||
10
.github/workflows/test-suite.yml
vendored
10
.github/workflows/test-suite.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -149,7 +149,7 @@ jobs:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -168,7 +168,7 @@ jobs:
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -187,7 +187,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
878
Cargo.lock
generated
878
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,6 @@
|
||||
resolver = "2"
|
||||
members = [
|
||||
"meilisearch",
|
||||
"meilitool",
|
||||
"meilisearch-types",
|
||||
"meilisearch-auth",
|
||||
"meili-snap",
|
||||
@@ -19,7 +18,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.5.0"
|
||||
version = "1.4.1"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
||||
11
Dockerfile
11
Dockerfile
@@ -3,7 +3,7 @@ FROM rust:alpine3.16 AS compiler
|
||||
|
||||
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
||||
|
||||
WORKDIR /
|
||||
WORKDIR /meilisearch
|
||||
|
||||
ARG COMMIT_SHA
|
||||
ARG COMMIT_DATE
|
||||
@@ -17,7 +17,7 @@ RUN set -eux; \
|
||||
if [ "$apkArch" = "aarch64" ]; then \
|
||||
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
||||
fi && \
|
||||
cargo build --release -p meilisearch -p meilitool
|
||||
cargo build --release
|
||||
|
||||
# Run
|
||||
FROM alpine:3.16
|
||||
@@ -28,10 +28,9 @@ ENV MEILI_SERVER_PROVIDER docker
|
||||
RUN apk update --quiet \
|
||||
&& apk add -q --no-cache libgcc tini curl
|
||||
|
||||
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
|
||||
# and it's easy to find.
|
||||
COPY --from=compiler /target/release/meilisearch /bin/meilisearch
|
||||
COPY --from=compiler /target/release/meilitool /bin/meilitool
|
||||
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
||||
# to find.
|
||||
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||
# To stay compatible with the older version of the container (pre v0.27.0) we're
|
||||
# going to symlink the meilisearch binary in the path to `/meilisearch`
|
||||
RUN ln -s /bin/meilisearch /meilisearch
|
||||
|
||||
@@ -25,12 +25,6 @@
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
|
||||
---
|
||||
|
||||
### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
|
||||
|
||||
---
|
||||
|
||||
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
||||
|
||||
<p align="center" name="demo">
|
||||
|
||||
@@ -12,7 +12,7 @@ use milli::heed::EnvOpenOptions;
|
||||
use milli::update::{
|
||||
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
||||
};
|
||||
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
||||
use milli::{Filter, Index, Object, RankingRule, TermsMatchingStrategy};
|
||||
use serde_json::Value;
|
||||
|
||||
pub struct Conf<'a> {
|
||||
@@ -78,11 +78,11 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
|
||||
if let Some(criterion) = conf.criterion {
|
||||
builder.reset_filterable_fields();
|
||||
builder.reset_criteria();
|
||||
builder.reset_ranking_rules();
|
||||
builder.reset_stop_words();
|
||||
|
||||
let criterion = criterion.iter().map(|s| Criterion::from_str(s).unwrap()).collect();
|
||||
builder.set_criteria(criterion);
|
||||
let criterion = criterion.iter().map(|s| RankingRule::from_str(s).unwrap()).collect();
|
||||
builder.set_ranking_rules(criterion);
|
||||
}
|
||||
|
||||
(conf.configure)(&mut builder);
|
||||
|
||||
@@ -526,12 +526,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.688964637Z",
|
||||
"updatedAt": "2022-10-09T20:27:23.951017769Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -541,12 +541,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.197788495Z",
|
||||
"updatedAt": "2022-10-09T20:28:01.93111053Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -571,12 +571,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:24.242683494Z",
|
||||
"updatedAt": "2022-10-09T20:27:24.312809641Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -617,12 +617,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.595257Z",
|
||||
"updatedAt": "2023-01-30T16:25:58.70348Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -632,12 +632,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.192178Z",
|
||||
"updatedAt": "2023-01-30T16:25:56.455714Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -647,12 +647,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:58.876405Z",
|
||||
"updatedAt": "2023-01-30T16:25:59.079906Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ pub type Checked = settings::Checked;
|
||||
pub type Unchecked = settings::Unchecked;
|
||||
|
||||
pub type Task = updates::UpdateEntry;
|
||||
pub type Kind = updates::UpdateMeta;
|
||||
|
||||
// everything related to the errors
|
||||
pub type ResponseError = errors::ResponseError;
|
||||
@@ -107,8 +108,11 @@ impl V2Reader {
|
||||
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
|
||||
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
|
||||
V2IndexReader::new(
|
||||
index.uid.clone(),
|
||||
&self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
|
||||
index,
|
||||
BufReader::new(
|
||||
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
|
||||
),
|
||||
)
|
||||
}))
|
||||
}
|
||||
@@ -143,16 +147,41 @@ pub struct V2IndexReader {
|
||||
}
|
||||
|
||||
impl V2IndexReader {
|
||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
||||
pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
|
||||
let meta = File::open(path.join("meta.json"))?;
|
||||
let meta: DumpMeta = serde_json::from_reader(meta)?;
|
||||
|
||||
let mut created_at = None;
|
||||
let mut updated_at = None;
|
||||
|
||||
for line in tasks.lines() {
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let new_created_at = match task.update.meta() {
|
||||
Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
|
||||
_ => None,
|
||||
};
|
||||
let new_updated_at = task.update.finished_at();
|
||||
|
||||
if created_at.is_none() || created_at > new_created_at {
|
||||
created_at = new_created_at;
|
||||
}
|
||||
|
||||
if updated_at.is_none() || updated_at < new_updated_at {
|
||||
updated_at = new_updated_at;
|
||||
}
|
||||
}
|
||||
|
||||
let current_time = OffsetDateTime::now_utc();
|
||||
|
||||
let metadata = IndexMetadata {
|
||||
uid: name,
|
||||
uid: index_uuid.uid.clone(),
|
||||
primary_key: meta.primary_key,
|
||||
// FIXME: Iterate over the whole task queue to find the creation and last update date.
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
updated_at: OffsetDateTime::now_utc(),
|
||||
created_at: created_at.unwrap_or(current_time),
|
||||
updated_at: updated_at.unwrap_or(current_time),
|
||||
};
|
||||
|
||||
let ret = V2IndexReader {
|
||||
@@ -248,12 +277,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.688964637Z",
|
||||
"updatedAt": "2022-10-09T20:27:23.951017769Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -263,12 +292,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:22.197788495Z",
|
||||
"updatedAt": "2022-10-09T20:28:01.93111053Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -293,12 +322,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2022-10-09T20:27:24.242683494Z",
|
||||
"updatedAt": "2022-10-09T20:27:24.312809641Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -340,12 +369,12 @@ pub(crate) mod test {
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.595257Z",
|
||||
"updatedAt": "2023-01-30T16:25:58.70348Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -355,12 +384,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:56.192178Z",
|
||||
"updatedAt": "2023-01-30T16:25:56.455714Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -370,12 +399,12 @@ pub(crate) mod test {
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
"createdAt": "2023-01-30T16:25:58.876405Z",
|
||||
"updatedAt": "2023-01-30T16:25:59.079906Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
|
||||
@@ -227,4 +227,14 @@ impl UpdateStatus {
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finished_at(&self) -> Option<OffsetDateTime> {
|
||||
match self {
|
||||
UpdateStatus::Processing(_) => None,
|
||||
UpdateStatus::Enqueued(_) => None,
|
||||
UpdateStatus::Processed(u) => Some(u.processed_at),
|
||||
UpdateStatus::Aborted(_) => None,
|
||||
UpdateStatus::Failed(u) => Some(u.failed_at),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -923,10 +923,6 @@ impl IndexScheduler {
|
||||
self.index_mapper.index(&rtxn, &index_uid)?
|
||||
};
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
*self.currently_updating_index.write().unwrap() =
|
||||
Some((index_uid.clone(), index.clone()));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||
index_wtxn.commit()?;
|
||||
|
||||
@@ -39,7 +39,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
test_breakpoint_sdr: _,
|
||||
planned_failures: _,
|
||||
run_loop_iteration: _,
|
||||
currently_updating_index: _,
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
|
||||
@@ -27,7 +27,7 @@ mod index_mapper;
|
||||
mod insta_snapshot;
|
||||
mod lru;
|
||||
mod utils;
|
||||
pub mod uuid_codec;
|
||||
mod uuid_codec;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
@@ -331,10 +331,6 @@ pub struct IndexScheduler {
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
/// A few types of long running batches of tasks that act on a single index set this field
|
||||
/// so that a handle to the index is available from other threads (search) in an optimized manner.
|
||||
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
|
||||
|
||||
// ================= test
|
||||
// The next entry is dedicated to the tests.
|
||||
/// Provide a way to set a breakpoint in multiple part of the scheduler.
|
||||
@@ -378,7 +374,6 @@ impl IndexScheduler {
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
currently_updating_index: self.currently_updating_index.clone(),
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
||||
#[cfg(test)]
|
||||
@@ -475,7 +470,6 @@ impl IndexScheduler {
|
||||
snapshots_path: options.snapshots_path,
|
||||
auth_path: options.auth_path,
|
||||
version_file_path: options.version_file_path,
|
||||
currently_updating_index: Arc::new(RwLock::new(None)),
|
||||
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr,
|
||||
@@ -658,13 +652,6 @@ impl IndexScheduler {
|
||||
/// If you need to fetch information from or perform an action on all indexes,
|
||||
/// see the `try_for_each_index` function.
|
||||
pub fn index(&self, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
self.currently_updating_index.read().unwrap().as_ref()
|
||||
{
|
||||
if current_name == name {
|
||||
return Ok(current_index.clone());
|
||||
}
|
||||
}
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, name)
|
||||
}
|
||||
@@ -1146,9 +1133,6 @@ impl IndexScheduler {
|
||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||
};
|
||||
|
||||
// Reset the currently updating index to relinquish the index handle
|
||||
*self.currently_updating_index.write().unwrap() = None;
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
||||
|
||||
|
||||
@@ -50,7 +50,6 @@ hebrew = ["milli/hebrew"]
|
||||
japanese = ["milli/japanese"]
|
||||
# thai specialized tokenization
|
||||
thai = ["milli/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["milli/greek"]
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["milli/khmer"]
|
||||
|
||||
@@ -235,6 +235,7 @@ InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchBoostingFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -331,6 +332,7 @@ impl ErrorCode for milli::Error {
|
||||
UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
|
||||
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
|
||||
UserError::InvalidFilter(_) => Code::InvalidSearchFilter,
|
||||
UserError::InvalidBoostingFilter(_) => Code::InvalidSearchBoostingFilter,
|
||||
UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter,
|
||||
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
|
||||
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::str::FromStr;
|
||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||
use fst::IntoStreamer;
|
||||
use milli::update::Setting;
|
||||
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
||||
use milli::{Index, RankingRule, RankingRuleError, DEFAULT_VALUES_PER_FACET};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
|
||||
use crate::deserr::DeserrJsonError;
|
||||
@@ -117,10 +117,10 @@ pub struct PaginationSettings {
|
||||
pub max_total_hits: Setting<usize>,
|
||||
}
|
||||
|
||||
impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRankingRules> {
|
||||
impl MergeWithError<milli::RankingRuleError> for DeserrJsonError<InvalidSettingsRankingRules> {
|
||||
fn merge(
|
||||
_self_: Option<Self>,
|
||||
other: milli::CriterionError,
|
||||
other: milli::RankingRuleError,
|
||||
merge_location: ValuePointerRef,
|
||||
) -> ControlFlow<Self, Self> {
|
||||
Self::error::<Infallible>(
|
||||
@@ -344,9 +344,9 @@ pub fn apply_settings_to_builder(
|
||||
|
||||
match settings.ranking_rules {
|
||||
Setting::Set(ref criteria) => {
|
||||
builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect())
|
||||
builder.set_ranking_rules(criteria.iter().map(|c| c.clone().into()).collect())
|
||||
}
|
||||
Setting::Reset => builder.reset_criteria(),
|
||||
Setting::Reset => builder.reset_ranking_rules(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
@@ -578,11 +578,13 @@ pub fn settings(
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
|
||||
#[deserr(try_from(&String) = FromStr::from_str -> CriterionError)]
|
||||
#[deserr(try_from(&String) = FromStr::from_str -> RankingRuleError)]
|
||||
pub enum RankingRuleView {
|
||||
/// Sorted by decreasing number of matched query terms.
|
||||
/// Query words at the front of an attribute is considered better than if it was at the back.
|
||||
Words,
|
||||
/// Sorted by documents matching the given filter and then documents not matching it.
|
||||
Boost(String),
|
||||
/// Sorted by increasing number of typos.
|
||||
Typo,
|
||||
/// Sorted by increasing distance between matched query terms.
|
||||
@@ -605,7 +607,7 @@ impl Serialize for RankingRuleView {
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
serializer.serialize_str(&format!("{}", Criterion::from(self.clone())))
|
||||
serializer.serialize_str(&format!("{}", RankingRule::from(self.clone())))
|
||||
}
|
||||
}
|
||||
impl<'de> Deserialize<'de> for RankingRuleView {
|
||||
@@ -623,7 +625,7 @@ impl<'de> Deserialize<'de> for RankingRuleView {
|
||||
where
|
||||
E: serde::de::Error,
|
||||
{
|
||||
let criterion = Criterion::from_str(v).map_err(|_| {
|
||||
let criterion = RankingRule::from_str(v).map_err(|_| {
|
||||
E::invalid_value(serde::de::Unexpected::Str(v), &"a valid ranking rule")
|
||||
})?;
|
||||
Ok(RankingRuleView::from(criterion))
|
||||
@@ -633,42 +635,44 @@ impl<'de> Deserialize<'de> for RankingRuleView {
|
||||
}
|
||||
}
|
||||
impl FromStr for RankingRuleView {
|
||||
type Err = <Criterion as FromStr>::Err;
|
||||
type Err = <RankingRule as FromStr>::Err;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(RankingRuleView::from(Criterion::from_str(s)?))
|
||||
Ok(RankingRuleView::from(RankingRule::from_str(s)?))
|
||||
}
|
||||
}
|
||||
impl fmt::Display for RankingRuleView {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
fmt::Display::fmt(&Criterion::from(self.clone()), f)
|
||||
fmt::Display::fmt(&RankingRule::from(self.clone()), f)
|
||||
}
|
||||
}
|
||||
impl From<Criterion> for RankingRuleView {
|
||||
fn from(value: Criterion) -> Self {
|
||||
impl From<RankingRule> for RankingRuleView {
|
||||
fn from(value: RankingRule) -> Self {
|
||||
match value {
|
||||
Criterion::Words => RankingRuleView::Words,
|
||||
Criterion::Typo => RankingRuleView::Typo,
|
||||
Criterion::Proximity => RankingRuleView::Proximity,
|
||||
Criterion::Attribute => RankingRuleView::Attribute,
|
||||
Criterion::Sort => RankingRuleView::Sort,
|
||||
Criterion::Exactness => RankingRuleView::Exactness,
|
||||
Criterion::Asc(x) => RankingRuleView::Asc(x),
|
||||
Criterion::Desc(x) => RankingRuleView::Desc(x),
|
||||
RankingRule::Words => RankingRuleView::Words,
|
||||
RankingRule::FilterBoosting(filter) => RankingRuleView::Boost(filter),
|
||||
RankingRule::Typo => RankingRuleView::Typo,
|
||||
RankingRule::Proximity => RankingRuleView::Proximity,
|
||||
RankingRule::Attribute => RankingRuleView::Attribute,
|
||||
RankingRule::Sort => RankingRuleView::Sort,
|
||||
RankingRule::Exactness => RankingRuleView::Exactness,
|
||||
RankingRule::Asc(x) => RankingRuleView::Asc(x),
|
||||
RankingRule::Desc(x) => RankingRuleView::Desc(x),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl From<RankingRuleView> for Criterion {
|
||||
impl From<RankingRuleView> for RankingRule {
|
||||
fn from(value: RankingRuleView) -> Self {
|
||||
match value {
|
||||
RankingRuleView::Words => Criterion::Words,
|
||||
RankingRuleView::Typo => Criterion::Typo,
|
||||
RankingRuleView::Proximity => Criterion::Proximity,
|
||||
RankingRuleView::Attribute => Criterion::Attribute,
|
||||
RankingRuleView::Sort => Criterion::Sort,
|
||||
RankingRuleView::Exactness => Criterion::Exactness,
|
||||
RankingRuleView::Asc(x) => Criterion::Asc(x),
|
||||
RankingRuleView::Desc(x) => Criterion::Desc(x),
|
||||
RankingRuleView::Words => RankingRule::Words,
|
||||
RankingRuleView::Boost(filter) => RankingRule::FilterBoosting(filter),
|
||||
RankingRuleView::Typo => RankingRule::Typo,
|
||||
RankingRuleView::Proximity => RankingRule::Proximity,
|
||||
RankingRuleView::Attribute => RankingRule::Attribute,
|
||||
RankingRuleView::Sort => RankingRule::Sort,
|
||||
RankingRuleView::Exactness => RankingRule::Exactness,
|
||||
RankingRuleView::Asc(x) => RankingRule::Asc(x),
|
||||
RankingRuleView::Desc(x) => RankingRule::Desc(x),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,7 +150,6 @@ hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
thai = ["meilisearch-types/thai"]
|
||||
greek = ["meilisearch-types/greek"]
|
||||
khmer = ["meilisearch-types/khmer"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
|
||||
|
||||
@@ -629,6 +629,7 @@ impl SearchAggregator {
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
filter,
|
||||
boosting_filter,
|
||||
sort,
|
||||
facets: _,
|
||||
highlight_pre_tag,
|
||||
@@ -1002,6 +1003,7 @@ impl MultiSearchAggregator {
|
||||
show_ranking_score_details: _,
|
||||
show_matches_position: _,
|
||||
filter: _,
|
||||
boosting_filter: _,
|
||||
sort: _,
|
||||
facets: _,
|
||||
highlight_pre_tag: _,
|
||||
|
||||
@@ -111,6 +111,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
show_ranking_score: false,
|
||||
show_ranking_score_details: false,
|
||||
filter,
|
||||
boosting_filter: None,
|
||||
sort: None,
|
||||
facets: None,
|
||||
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
|
||||
|
||||
@@ -54,6 +54,8 @@ pub struct SearchQueryGet {
|
||||
attributes_to_highlight: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFilter>)]
|
||||
filter: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchBoostingFilter>)]
|
||||
boosting_filter: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
|
||||
sort: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
||||
@@ -86,6 +88,14 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let boosting_filter = match other.boosting_filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
Ok(v) => Some(v),
|
||||
_ => Some(Value::String(f)),
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
|
||||
Self {
|
||||
q: other.q,
|
||||
vector: other.vector.map(CS::into_inner),
|
||||
@@ -98,6 +108,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
||||
crop_length: other.crop_length.0,
|
||||
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
||||
filter,
|
||||
boosting_filter,
|
||||
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
||||
show_matches_position: other.show_matches_position.0,
|
||||
show_ranking_score: other.show_ranking_score.0,
|
||||
|
||||
@@ -71,6 +71,8 @@ pub struct SearchQuery {
|
||||
pub show_ranking_score_details: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchBoostingFilter>)]
|
||||
pub boosting_filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||
pub sort: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
||||
@@ -130,6 +132,8 @@ pub struct SearchQueryWithIndex {
|
||||
pub show_matches_position: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchBoostingFilter>)]
|
||||
pub boosting_filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||
pub sort: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
||||
@@ -164,6 +168,7 @@ impl SearchQueryWithIndex {
|
||||
show_ranking_score_details,
|
||||
show_matches_position,
|
||||
filter,
|
||||
boosting_filter,
|
||||
sort,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
@@ -189,6 +194,7 @@ impl SearchQueryWithIndex {
|
||||
show_ranking_score_details,
|
||||
show_matches_position,
|
||||
filter,
|
||||
boosting_filter,
|
||||
sort,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
@@ -397,8 +403,14 @@ fn prepare_search<'t>(
|
||||
search.limit(limit);
|
||||
|
||||
if let Some(ref filter) = query.filter {
|
||||
if let Some(facets) = parse_filter(filter)? {
|
||||
search.filter(facets);
|
||||
if let Some(filter) = parse_filter(filter)? {
|
||||
search.filter(filter);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref boosting_filter) = query.boosting_filter {
|
||||
if let Some(boosting_filter) = parse_filter(boosting_filter)? {
|
||||
search.boosting_filter(boosting_filter);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,11 +5,9 @@ pub mod service;
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
#[allow(unused)]
|
||||
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
|
||||
use meili_snap::json_string;
|
||||
use serde::{Deserialize, Serialize};
|
||||
#[allow(unused)]
|
||||
pub use server::{default_settings, Server};
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||
|
||||
@@ -6,109 +6,21 @@ use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Brown"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": "Red"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"description": "Running Shoes",
|
||||
"brand": "Adidas",
|
||||
"product_id": "456789",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"description": "Running Shoes",
|
||||
"brand": "Adidas",
|
||||
"product_id": "456789",
|
||||
"color": "White"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"description": "Hoodie",
|
||||
"brand": "Puma",
|
||||
"product_id": "987654",
|
||||
"color": "Gray"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Green"
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Red"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Indigo"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Stone Wash"
|
||||
}
|
||||
{"productId": 1, "shopId": 1},
|
||||
{"productId": 2, "shopId": 1},
|
||||
{"productId": 3, "shopId": 2},
|
||||
{"productId": 4, "shopId": 2},
|
||||
{"productId": 5, "shopId": 3},
|
||||
{"productId": 6, "shopId": 3},
|
||||
{"productId": 7, "shopId": 4},
|
||||
{"productId": 8, "shopId": 4},
|
||||
{"productId": 9, "shopId": 5},
|
||||
{"productId": 10, "shopId": 5}
|
||||
])
|
||||
});
|
||||
|
||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
|
||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
|
||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
|
||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
#[actix_rt::test]
|
||||
@@ -121,121 +33,31 @@ async fn distinct_search_with_offset_no_ranking() {
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(response: &Value) -> Vec<&str> {
|
||||
fn get_hits(Value(response): Value) -> Vec<i64> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||
snapshot!(response["estimatedTotalHits"] , @"11");
|
||||
snapshot!(format!("{:?}", hits), @"[1, 2]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"10");
|
||||
snapshot!(format!("{:?}", hits), @"[3, 4]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"1");
|
||||
snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
snapshot!(format!("{:?}", hits), @"[5]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
}
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4130
|
||||
#[actix_rt::test]
|
||||
async fn distinct_search_with_pagination_no_ranking() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(response: &Value) -> Vec<&str> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["page"], @"0");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||
snapshot!(response["page"], @"1");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||
snapshot!(response["page"], @"2");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||
snapshot!(response["page"], @"3");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["page"], @"4");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"3");
|
||||
snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
|
||||
snapshot!(response["page"], @"2");
|
||||
snapshot!(response["totalPages"], @"2");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
}
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
[package]
|
||||
name = "meilitool"
|
||||
description = "A CLI to edit a Meilisearch database from the command line"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
homepage.workspace = true
|
||||
readme.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.75"
|
||||
clap = { version = "4.2.1", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
time = { version = "0.3.30", features = ["formatting"] }
|
||||
uuid = { version = "1.5.0", features = ["v4"], default-features = false }
|
||||
@@ -1,312 +0,0 @@
|
||||
use std::fs::{read_dir, read_to_string, remove_file, File};
|
||||
use std::io::BufWriter;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use clap::{Parser, Subcommand};
|
||||
use dump::{DumpWriter, IndexMetadata};
|
||||
use file_store::FileStore;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::versioning::check_version_file;
|
||||
use meilisearch_types::Index;
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
use uuid_codec::UuidCodec;
|
||||
|
||||
mod uuid_codec;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Cli {
|
||||
/// The database path where the Meilisearch is running.
|
||||
#[arg(long, default_value = "data.ms/")]
|
||||
db_path: PathBuf,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Command {
|
||||
/// Clears the task queue and make it empty.
|
||||
///
|
||||
/// This command can be safely executed even if Meilisearch is running and processing tasks.
|
||||
/// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
|
||||
/// even the ones that were processing. However, it's highly possible that you see the processing
|
||||
/// tasks in the queue again with an associated internal error message.
|
||||
ClearTaskQueue,
|
||||
|
||||
/// Exports a dump from the Meilisearch database.
|
||||
///
|
||||
/// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
|
||||
/// If tasks are being processed while a dump is being exported there are chances for the dump to be
|
||||
/// malformed with missing tasks.
|
||||
///
|
||||
/// TODO Verify this claim or make sure it cannot happen and we can export dumps
|
||||
/// without caring about killing Meilisearch first!
|
||||
ExportADump {
|
||||
/// The directory in which the dump will be created.
|
||||
#[arg(long, default_value = "dumps/")]
|
||||
dump_dir: PathBuf,
|
||||
|
||||
/// Skip dumping the enqueued or processing tasks.
|
||||
///
|
||||
/// Can be useful when there are a lot of them and it is not particularly useful
|
||||
/// to keep them. Note that only the enqueued tasks takes up space so skipping
|
||||
/// the processed ones is not particularly interesting.
|
||||
#[arg(long)]
|
||||
skip_enqueued_tasks: bool,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let Cli { db_path, command } = Cli::parse();
|
||||
|
||||
check_version_file(&db_path).context("While checking the version file")?;
|
||||
|
||||
match command {
|
||||
Command::ClearTaskQueue => clear_task_queue(db_path),
|
||||
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
||||
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Clears the task queue located at `db_path`.
|
||||
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
||||
let path = db_path.join("tasks");
|
||||
let env = EnvOpenOptions::new()
|
||||
.max_dbs(100)
|
||||
.open(&path)
|
||||
.with_context(|| format!("While trying to open {:?}", path.display()))?;
|
||||
|
||||
eprintln!("Deleting tasks from the database...");
|
||||
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
|
||||
let total = all_tasks.len(&wtxn)?;
|
||||
let status = try_opening_poly_database(&env, &wtxn, "status")?;
|
||||
let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
|
||||
let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
|
||||
let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
|
||||
let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
|
||||
let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
|
||||
let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
|
||||
|
||||
try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
|
||||
try_clearing_poly_database(&mut wtxn, status, "status")?;
|
||||
try_clearing_poly_database(&mut wtxn, kind, "kind")?;
|
||||
try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
|
||||
try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
|
||||
try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
|
||||
try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
|
||||
try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
|
||||
|
||||
wtxn.commit().context("While committing the transaction")?;
|
||||
|
||||
eprintln!("Successfully deleted {total} tasks from the tasks database!");
|
||||
eprintln!("Deleting the content files from disk...");
|
||||
|
||||
let mut count = 0usize;
|
||||
let update_files = db_path.join("update_files");
|
||||
let entries = read_dir(&update_files).with_context(|| {
|
||||
format!("While trying to read the content of {:?}", update_files.display())
|
||||
})?;
|
||||
for result in entries {
|
||||
match result {
|
||||
Ok(ent) => match remove_file(ent.path()) {
|
||||
Ok(_) => count += 1,
|
||||
Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("Sucessfully deleted {count} content files from disk!");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn try_opening_database<KC: 'static, DC: 'static>(
|
||||
env: &Env,
|
||||
rtxn: &RoTxn,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<Database<KC, DC>> {
|
||||
env.open_database(rtxn, Some(db_name))
|
||||
.with_context(|| format!("While opening the {db_name:?} database"))?
|
||||
.with_context(|| format!("Missing the {db_name:?} database"))
|
||||
}
|
||||
|
||||
fn try_opening_poly_database(
|
||||
env: &Env,
|
||||
rtxn: &RoTxn,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<PolyDatabase> {
|
||||
env.open_poly_database(rtxn, Some(db_name))
|
||||
.with_context(|| format!("While opening the {db_name:?} poly database"))?
|
||||
.with_context(|| format!("Missing the {db_name:?} poly database"))
|
||||
}
|
||||
|
||||
fn try_clearing_poly_database(
|
||||
wtxn: &mut RwTxn,
|
||||
database: PolyDatabase,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
|
||||
}
|
||||
|
||||
/// Exports a dump into the dump directory.
|
||||
fn export_a_dump(
|
||||
db_path: PathBuf,
|
||||
dump_dir: PathBuf,
|
||||
skip_enqueued_tasks: bool,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let started_at = OffsetDateTime::now_utc();
|
||||
|
||||
// 1. Extracts the instance UID from disk
|
||||
let instance_uid_path = db_path.join("instance-uid");
|
||||
let instance_uid = match read_to_string(&instance_uid_path) {
|
||||
Ok(content) => match content.trim().parse() {
|
||||
Ok(uuid) => Some(uuid),
|
||||
Err(e) => {
|
||||
eprintln!("Impossible to parse instance-uid: {e}");
|
||||
None
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
|
||||
let file_store =
|
||||
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
|
||||
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = EnvOpenOptions::new()
|
||||
.max_dbs(100)
|
||||
.open(&index_scheduler_path)
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
eprintln!("Dumping the keys...");
|
||||
|
||||
// 2. dump the keys
|
||||
let auth_store = AuthController::new(&db_path, &None)
|
||||
.with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
|
||||
let mut dump_keys = dump.create_keys()?;
|
||||
let mut count = 0;
|
||||
for key in auth_store.list_keys()? {
|
||||
dump_keys.push_key(&key)?;
|
||||
count += 1;
|
||||
}
|
||||
dump_keys.flush()?;
|
||||
|
||||
eprintln!("Successfully dumped {count} keys!");
|
||||
|
||||
let rtxn = env.read_txn()?;
|
||||
let all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>> =
|
||||
try_opening_database(&env, &rtxn, "all-tasks")?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
try_opening_database(&env, &rtxn, "index-mapping")?;
|
||||
|
||||
if skip_enqueued_tasks {
|
||||
eprintln!("Skip dumping the enqueued tasks...");
|
||||
} else {
|
||||
eprintln!("Dumping the enqueued tasks...");
|
||||
|
||||
// 3. dump the tasks
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
let mut count = 0;
|
||||
for ret in all_tasks.iter(&rtxn)? {
|
||||
let (_, t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file_uuid) = content_file {
|
||||
if status == Status::Enqueued {
|
||||
let content_file = file_store.get_update(content_file_uuid)?;
|
||||
|
||||
let reader =
|
||||
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
|
||||
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
while let Some(doc) = cursor.next_document().with_context(|| {
|
||||
format!("While iterating on content file {:?}", content_file_uuid)
|
||||
})? {
|
||||
dump_content_file
|
||||
.push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
|
||||
}
|
||||
dump_content_file.flush()?;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
eprintln!("Successfully dumped {count} enqueued tasks!");
|
||||
}
|
||||
|
||||
eprintln!("Dumping the indexes...");
|
||||
|
||||
// 4. Dump the indexes
|
||||
let mut count = 0;
|
||||
for result in index_mapping.iter(&rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
created_at: index.created_at(&rtxn)?,
|
||||
updated_at: index.updated_at(&rtxn)?,
|
||||
};
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
// 4.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
let (_id, doc) = ret?;
|
||||
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
index_dumper.push_document(&document)?;
|
||||
}
|
||||
|
||||
// 4.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
|
||||
index_dumper.settings(&settings)?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
eprintln!("Successfully dumped {count} indexes!");
|
||||
// We will not dump experimental feature settings
|
||||
eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
let path = dump_dir.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(&path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
|
||||
eprintln!("Dump exported at path {:?}", path.display());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilisearch_types::heed::{BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// A heed codec for value of struct Uuid.
|
||||
pub struct UuidCodec;
|
||||
|
||||
impl<'a> BytesDecode<'a> for UuidCodec {
|
||||
type DItem = Uuid;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
bytes.try_into().ok().map(Uuid::from_bytes)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesEncode<'_> for UuidCodec {
|
||||
type EItem = Uuid;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
Some(Cow::Borrowed(item.as_bytes()))
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.5", default-features = false }
|
||||
charabia = { version = "0.8.3", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
@@ -82,7 +82,7 @@ md5 = "0.7.0"
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[features]
|
||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
|
||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
|
||||
|
||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||
# For more information on this feature, see heed's Cargo.toml
|
||||
@@ -106,6 +106,3 @@ thai = ["charabia/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["charabia/greek"]
|
||||
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["charabia/khmer"]
|
||||
|
||||
@@ -58,6 +58,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
false,
|
||||
&None,
|
||||
&None,
|
||||
&None,
|
||||
GeoSortStrategy::default(),
|
||||
0,
|
||||
20,
|
||||
|
||||
@@ -3,7 +3,7 @@ use heed::EnvOpenOptions;
|
||||
// use maplit::hashset;
|
||||
use milli::{
|
||||
update::{IndexerConfig, Settings},
|
||||
Criterion, Index,
|
||||
Index, RankingRule,
|
||||
};
|
||||
|
||||
fn main() {
|
||||
@@ -19,13 +19,13 @@ fn main() {
|
||||
// builder.set_min_word_len_one_typo(5);
|
||||
// builder.set_min_word_len_two_typos(7);
|
||||
// builder.set_sortable_fields(hashset! { S("release_date") });
|
||||
builder.set_criteria(vec![
|
||||
Criterion::Words,
|
||||
Criterion::Typo,
|
||||
Criterion::Proximity,
|
||||
Criterion::Attribute,
|
||||
Criterion::Sort,
|
||||
Criterion::Exactness,
|
||||
builder.set_ranking_rules(vec![
|
||||
RankingRule::Words,
|
||||
RankingRule::Typo,
|
||||
RankingRule::Proximity,
|
||||
RankingRule::Attribute,
|
||||
RankingRule::Sort,
|
||||
RankingRule::Exactness,
|
||||
]);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
@@ -8,7 +8,7 @@ use thiserror::Error;
|
||||
|
||||
use crate::error::is_reserved_keyword;
|
||||
use crate::search::facet::BadGeoError;
|
||||
use crate::{CriterionError, Error, UserError};
|
||||
use crate::{Error, RankingRuleError, UserError};
|
||||
|
||||
/// This error type is never supposed to be shown to the end user.
|
||||
/// You must always cast it to a sort error or a criterion error.
|
||||
@@ -28,23 +28,23 @@ impl From<BadGeoError> for AscDescError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<AscDescError> for CriterionError {
|
||||
impl From<AscDescError> for RankingRuleError {
|
||||
fn from(error: AscDescError) -> Self {
|
||||
match error {
|
||||
AscDescError::GeoError(_) => {
|
||||
CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() }
|
||||
RankingRuleError::ReservedNameForSort { name: "_geoPoint".to_string() }
|
||||
}
|
||||
AscDescError::InvalidSyntax { name } => CriterionError::InvalidName { name },
|
||||
AscDescError::InvalidSyntax { name } => RankingRuleError::InvalidName { name },
|
||||
AscDescError::ReservedKeyword { name } if name.starts_with("_geoPoint") => {
|
||||
CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() }
|
||||
RankingRuleError::ReservedNameForSort { name: "_geoPoint".to_string() }
|
||||
}
|
||||
AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => {
|
||||
CriterionError::ReservedNameForFilter { name: "_geoRadius".to_string() }
|
||||
RankingRuleError::ReservedNameForFilter { name: "_geoRadius".to_string() }
|
||||
}
|
||||
AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => {
|
||||
CriterionError::ReservedNameForFilter { name: "_geoBoundingBox".to_string() }
|
||||
RankingRuleError::ReservedNameForFilter { name: "_geoBoundingBox".to_string() }
|
||||
}
|
||||
AscDescError::ReservedKeyword { name } => CriterionError::ReservedName { name },
|
||||
AscDescError::ReservedKeyword { name } => RankingRuleError::ReservedName { name },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use serde_json::Value;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::documents::{self, DocumentsBatchCursorError};
|
||||
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
|
||||
use crate::{DocumentId, FieldId, Object, RankingRuleError, SortError};
|
||||
|
||||
pub fn is_reserved_keyword(keyword: &str) -> bool {
|
||||
["_geo", "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"].contains(&keyword)
|
||||
@@ -94,7 +94,7 @@ pub enum UserError {
|
||||
#[error("A document cannot contain more than 65,535 fields.")]
|
||||
AttributeLimitReached,
|
||||
#[error(transparent)]
|
||||
CriterionError(#[from] CriterionError),
|
||||
CriterionError(#[from] RankingRuleError),
|
||||
#[error("Maximum number of documents reached.")]
|
||||
DocumentLimitReached,
|
||||
#[error(
|
||||
@@ -116,6 +116,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
InvalidVectorsType { document_id: Value, value: Value },
|
||||
#[error("{0}")]
|
||||
InvalidFilter(String),
|
||||
#[error("{0}")]
|
||||
InvalidBoostingFilter(String),
|
||||
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
|
||||
InvalidFilterExpression(&'static [&'static str], Value),
|
||||
#[error("Attribute `{}` is not sortable. {}",
|
||||
@@ -280,7 +282,7 @@ error_from_sub_error! {
|
||||
ThreadPoolBuildError => InternalError,
|
||||
SerializationError => InternalError,
|
||||
GeoError => UserError,
|
||||
CriterionError => UserError,
|
||||
RankingRuleError => UserError,
|
||||
}
|
||||
|
||||
impl<E> From<grenad::Error<E>> for Error
|
||||
|
||||
@@ -25,10 +25,9 @@ use crate::heed_codec::{
|
||||
};
|
||||
use crate::readable_slices::ReadableSlices;
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
|
||||
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
|
||||
BEU32,
|
||||
default_criteria, CboRoaringBitmapCodec, DocumentId, ExternalDocumentsIds, FacetDistribution,
|
||||
FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, OrderBy, RankingRule,
|
||||
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32,
|
||||
};
|
||||
|
||||
/// The HNSW data-structure that we serialize, fill and search in.
|
||||
@@ -895,7 +894,7 @@ impl Index {
|
||||
let distinct_field = self.distinct_field(rtxn)?;
|
||||
let asc_desc_fields =
|
||||
self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
|
||||
Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
|
||||
RankingRule::Asc(field) | RankingRule::Desc(field) => Some(field),
|
||||
_otherwise => None,
|
||||
});
|
||||
|
||||
@@ -1023,17 +1022,17 @@ impl Index {
|
||||
pub(crate) fn put_criteria(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
criteria: &[Criterion],
|
||||
criteria: &[RankingRule],
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
|
||||
self.main.put::<_, Str, SerdeJson<&[RankingRule]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY)
|
||||
}
|
||||
|
||||
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
|
||||
match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, main_key::CRITERIA_KEY)? {
|
||||
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<RankingRule>> {
|
||||
match self.main.get::<_, Str, SerdeJson<Vec<RankingRule>>>(rtxn, main_key::CRITERIA_KEY)? {
|
||||
Some(criteria) => Ok(criteria),
|
||||
None => Ok(default_criteria()),
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
pub mod documents;
|
||||
|
||||
mod asc_desc;
|
||||
mod criterion;
|
||||
pub mod distance;
|
||||
mod error;
|
||||
mod external_documents_ids;
|
||||
@@ -18,6 +17,7 @@ mod fields_ids_map;
|
||||
pub mod heed_codec;
|
||||
pub mod index;
|
||||
pub mod proximity;
|
||||
mod ranking_rule;
|
||||
mod readable_slices;
|
||||
pub mod score_details;
|
||||
mod search;
|
||||
@@ -44,7 +44,6 @@ use serde_json::Value;
|
||||
pub use {charabia as tokenizer, heed};
|
||||
|
||||
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
||||
pub use self::criterion::{default_criteria, Criterion, CriterionError};
|
||||
pub use self::error::{
|
||||
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
||||
};
|
||||
@@ -57,6 +56,7 @@ pub use self::heed_codec::{
|
||||
UncheckedU8StrStrCodec,
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::ranking_rule::{default_criteria, RankingRule, RankingRuleError};
|
||||
pub use self::search::{
|
||||
FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
|
||||
MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
|
||||
|
||||
@@ -7,7 +7,7 @@ use thiserror::Error;
|
||||
use crate::{AscDesc, Member};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum CriterionError {
|
||||
pub enum RankingRuleError {
|
||||
#[error("`{name}` ranking rule is invalid. Valid ranking rules are words, typo, sort, proximity, attribute, exactness and custom ranking rules.")]
|
||||
InvalidName { name: String },
|
||||
#[error("`{name}` is a reserved keyword and thus can't be used as a ranking rule")]
|
||||
@@ -25,7 +25,9 @@ pub enum CriterionError {
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum Criterion {
|
||||
pub enum RankingRule {
|
||||
/// Sorted by documents matching the given filter and then documents not matching it.
|
||||
FilterBoosting(String),
|
||||
/// Sorted by decreasing number of matched query terms.
|
||||
/// Query words at the front of an attribute is considered better than if it was at the back.
|
||||
Words,
|
||||
@@ -47,62 +49,68 @@ pub enum Criterion {
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl Criterion {
|
||||
impl RankingRule {
|
||||
/// Returns the field name parameter of this criterion.
|
||||
pub fn field_name(&self) -> Option<&str> {
|
||||
match self {
|
||||
Criterion::Asc(name) | Criterion::Desc(name) => Some(name),
|
||||
RankingRule::Asc(name) | RankingRule::Desc(name) => Some(name),
|
||||
_otherwise => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Criterion {
|
||||
type Err = CriterionError;
|
||||
impl FromStr for RankingRule {
|
||||
type Err = RankingRuleError;
|
||||
|
||||
fn from_str(text: &str) -> Result<Criterion, Self::Err> {
|
||||
fn from_str(text: &str) -> Result<RankingRule, Self::Err> {
|
||||
match text {
|
||||
"words" => Ok(Criterion::Words),
|
||||
"typo" => Ok(Criterion::Typo),
|
||||
"proximity" => Ok(Criterion::Proximity),
|
||||
"attribute" => Ok(Criterion::Attribute),
|
||||
"sort" => Ok(Criterion::Sort),
|
||||
"exactness" => Ok(Criterion::Exactness),
|
||||
text => match AscDesc::from_str(text)? {
|
||||
AscDesc::Asc(Member::Field(field)) => Ok(Criterion::Asc(field)),
|
||||
AscDesc::Desc(Member::Field(field)) => Ok(Criterion::Desc(field)),
|
||||
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => {
|
||||
Err(CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() })?
|
||||
}
|
||||
"words" => Ok(RankingRule::Words),
|
||||
"typo" => Ok(RankingRule::Typo),
|
||||
"proximity" => Ok(RankingRule::Proximity),
|
||||
"attribute" => Ok(RankingRule::Attribute),
|
||||
"sort" => Ok(RankingRule::Sort),
|
||||
"exactness" => Ok(RankingRule::Exactness),
|
||||
text => match AscDesc::from_str(text) {
|
||||
Ok(asc_desc) => match asc_desc {
|
||||
AscDesc::Asc(Member::Field(field)) => Ok(RankingRule::Asc(field)),
|
||||
AscDesc::Desc(Member::Field(field)) => Ok(RankingRule::Desc(field)),
|
||||
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => {
|
||||
Err(RankingRuleError::ReservedNameForSort {
|
||||
name: "_geoPoint".to_string(),
|
||||
})?
|
||||
}
|
||||
},
|
||||
Err(err) => Err(err.into()),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_criteria() -> Vec<Criterion> {
|
||||
pub fn default_criteria() -> Vec<RankingRule> {
|
||||
vec![
|
||||
Criterion::Words,
|
||||
Criterion::Typo,
|
||||
Criterion::Proximity,
|
||||
Criterion::Attribute,
|
||||
Criterion::Sort,
|
||||
Criterion::Exactness,
|
||||
RankingRule::Words,
|
||||
RankingRule::Typo,
|
||||
RankingRule::Proximity,
|
||||
RankingRule::Attribute,
|
||||
RankingRule::Sort,
|
||||
RankingRule::Exactness,
|
||||
]
|
||||
}
|
||||
|
||||
impl fmt::Display for Criterion {
|
||||
impl fmt::Display for RankingRule {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use Criterion::*;
|
||||
use RankingRule::*;
|
||||
|
||||
match self {
|
||||
Words => f.write_str("words"),
|
||||
FilterBoosting(_) => write!(f, "filterBoosting"),
|
||||
Typo => f.write_str("typo"),
|
||||
Proximity => f.write_str("proximity"),
|
||||
Attribute => f.write_str("attribute"),
|
||||
Sort => f.write_str("sort"),
|
||||
Exactness => f.write_str("exactness"),
|
||||
Asc(attr) => write!(f, "{}:asc", attr),
|
||||
Desc(attr) => write!(f, "{}:desc", attr),
|
||||
Asc(attr) => write!(f, "{attr}:asc"),
|
||||
Desc(attr) => write!(f, "{attr}:desc"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -110,29 +118,29 @@ impl fmt::Display for Criterion {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
use CriterionError::*;
|
||||
use RankingRuleError::*;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_criterion() {
|
||||
let valid_criteria = [
|
||||
("words", Criterion::Words),
|
||||
("typo", Criterion::Typo),
|
||||
("proximity", Criterion::Proximity),
|
||||
("attribute", Criterion::Attribute),
|
||||
("sort", Criterion::Sort),
|
||||
("exactness", Criterion::Exactness),
|
||||
("price:asc", Criterion::Asc(S("price"))),
|
||||
("price:desc", Criterion::Desc(S("price"))),
|
||||
("price:asc:desc", Criterion::Desc(S("price:asc"))),
|
||||
("truc:machin:desc", Criterion::Desc(S("truc:machin"))),
|
||||
("hello-world!:desc", Criterion::Desc(S("hello-world!"))),
|
||||
("it's spacy over there:asc", Criterion::Asc(S("it's spacy over there"))),
|
||||
("words", RankingRule::Words),
|
||||
("typo", RankingRule::Typo),
|
||||
("proximity", RankingRule::Proximity),
|
||||
("attribute", RankingRule::Attribute),
|
||||
("sort", RankingRule::Sort),
|
||||
("exactness", RankingRule::Exactness),
|
||||
("price:asc", RankingRule::Asc(S("price"))),
|
||||
("price:desc", RankingRule::Desc(S("price"))),
|
||||
("price:asc:desc", RankingRule::Desc(S("price:asc"))),
|
||||
("truc:machin:desc", RankingRule::Desc(S("truc:machin"))),
|
||||
("hello-world!:desc", RankingRule::Desc(S("hello-world!"))),
|
||||
("it's spacy over there:asc", RankingRule::Asc(S("it's spacy over there"))),
|
||||
];
|
||||
|
||||
for (input, expected) in valid_criteria {
|
||||
let res = input.parse::<Criterion>();
|
||||
let res = input.parse::<RankingRule>();
|
||||
assert!(
|
||||
res.is_ok(),
|
||||
"Failed to parse `{}`, was expecting `{:?}` but instead got `{:?}`",
|
||||
@@ -167,7 +175,7 @@ mod tests {
|
||||
];
|
||||
|
||||
for (input, expected) in invalid_criteria {
|
||||
let res = input.parse::<Criterion>();
|
||||
let res = input.parse::<RankingRule>();
|
||||
assert!(
|
||||
res.is_err(),
|
||||
"Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",
|
||||
@@ -5,6 +5,7 @@ use crate::distance_between_two_points;
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum ScoreDetails {
|
||||
Words(Words),
|
||||
FilterBoosting(FilterBoosting),
|
||||
Typo(Typo),
|
||||
Proximity(Rank),
|
||||
Fid(Rank),
|
||||
@@ -23,6 +24,7 @@ impl ScoreDetails {
|
||||
pub fn rank(&self) -> Option<Rank> {
|
||||
match self {
|
||||
ScoreDetails::Words(details) => Some(details.rank()),
|
||||
ScoreDetails::FilterBoosting(_) => None,
|
||||
ScoreDetails::Typo(details) => Some(details.rank()),
|
||||
ScoreDetails::Proximity(details) => Some(*details),
|
||||
ScoreDetails::Fid(details) => Some(*details),
|
||||
@@ -60,6 +62,11 @@ impl ScoreDetails {
|
||||
details_map.insert("words".into(), words_details);
|
||||
order += 1;
|
||||
}
|
||||
ScoreDetails::FilterBoosting(FilterBoosting { matching }) => {
|
||||
let sort_details = serde_json::json!({ "matching": matching });
|
||||
details_map.insert("filterBoosting".into(), sort_details);
|
||||
order += 1;
|
||||
}
|
||||
ScoreDetails::Typo(typo) => {
|
||||
let typo_details = serde_json::json!({
|
||||
"order": order,
|
||||
@@ -221,6 +228,11 @@ impl Words {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct FilterBoosting {
|
||||
pub matching: bool,
|
||||
}
|
||||
|
||||
/// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
|
||||
///
|
||||
/// In exactness, the number of matching words can actually be 0 with a non-zero score,
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
|
||||
use std::ops::Bound::{self, Excluded, Included};
|
||||
|
||||
use either::Either;
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
||||
use self::new::PartialSearchResult;
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
@@ -38,6 +38,7 @@ pub struct Search<'a> {
|
||||
vector: Option<Vec<f32>>,
|
||||
// this should be linked to the String in the query
|
||||
filter: Option<Filter<'a>>,
|
||||
boosting_filter: Option<Filter<'a>>,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
sort_criteria: Option<Vec<AscDesc>>,
|
||||
@@ -57,6 +58,7 @@ impl<'a> Search<'a> {
|
||||
query: None,
|
||||
vector: None,
|
||||
filter: None,
|
||||
boosting_filter: None,
|
||||
offset: 0,
|
||||
limit: 20,
|
||||
sort_criteria: None,
|
||||
@@ -121,6 +123,11 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn boosting_filter(&mut self, condition: Filter<'a>) -> &mut Search<'a> {
|
||||
self.boosting_filter = Some(condition);
|
||||
self
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
|
||||
self.geo_strategy = strategy;
|
||||
@@ -150,6 +157,7 @@ impl<'a> Search<'a> {
|
||||
self.scoring_strategy,
|
||||
self.exhaustive_number_hits,
|
||||
&self.filter,
|
||||
&self.boosting_filter,
|
||||
&self.sort_criteria,
|
||||
self.geo_strategy,
|
||||
self.offset,
|
||||
@@ -175,6 +183,7 @@ impl fmt::Debug for Search<'_> {
|
||||
query,
|
||||
vector: _,
|
||||
filter,
|
||||
boosting_filter,
|
||||
offset,
|
||||
limit,
|
||||
sort_criteria,
|
||||
@@ -191,6 +200,7 @@ impl fmt::Debug for Search<'_> {
|
||||
.field("query", query)
|
||||
.field("vector", &"[...]")
|
||||
.field("filter", filter)
|
||||
.field("boosting_filter", boosting_filter)
|
||||
.field("offset", offset)
|
||||
.field("limit", limit)
|
||||
.field("sort_criteria", sort_criteria)
|
||||
|
||||
@@ -46,8 +46,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
if let Some(distinct_fid) = distinct_fid {
|
||||
let mut excluded = RoaringBitmap::new();
|
||||
let mut results = vec![];
|
||||
let mut skip = 0;
|
||||
for docid in universe.iter() {
|
||||
if results.len() >= from + length {
|
||||
if results.len() >= length {
|
||||
break;
|
||||
}
|
||||
if excluded.contains(docid) {
|
||||
@@ -55,19 +56,16 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
}
|
||||
|
||||
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
||||
skip += 1;
|
||||
if skip <= from {
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push(docid);
|
||||
}
|
||||
|
||||
let mut all_candidates = universe - excluded;
|
||||
all_candidates.extend(results.iter().copied());
|
||||
// drain the results of the skipped elements
|
||||
// this **must** be done **after** writing the entire results in `all_candidates` to ensure
|
||||
// e.g. estimatedTotalHits is correct.
|
||||
if results.len() >= from {
|
||||
results.drain(..from);
|
||||
} else {
|
||||
results.clear();
|
||||
}
|
||||
|
||||
return Ok(BucketSortOutput {
|
||||
scores: vec![Default::default(); results.len()],
|
||||
|
||||
79
milli/src/search/new/filter_boosting.rs
Normal file
79
milli/src/search/new/filter_boosting.rs
Normal file
@@ -0,0 +1,79 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::{Filter, Result};
|
||||
|
||||
pub struct FilterBoosting<'f, Query> {
|
||||
filter: Filter<'f>,
|
||||
original_query: Option<Query>,
|
||||
matching: Option<RankingRuleOutput<Query>>,
|
||||
non_matching: Option<RankingRuleOutput<Query>>,
|
||||
}
|
||||
|
||||
impl<'f, Query> FilterBoosting<'f, Query> {
|
||||
pub fn new(filter: Filter<'f>) -> Result<Self> {
|
||||
Ok(Self { filter, original_query: None, matching: None, non_matching: None })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx, 'f, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query>
|
||||
for FilterBoosting<'f, Query>
|
||||
{
|
||||
fn id(&self) -> String {
|
||||
// TODO improve this
|
||||
let Self { filter: original_expression, .. } = self;
|
||||
format!("boost:{original_expression:?}")
|
||||
}
|
||||
|
||||
fn start_iteration(
|
||||
&mut self,
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
parent_candidates: &RoaringBitmap,
|
||||
parent_query: &Query,
|
||||
) -> Result<()> {
|
||||
let universe_matching = match self.filter.evaluate(ctx.txn, ctx.index) {
|
||||
Ok(documents) => documents,
|
||||
Err(e) => return Err(e), // TODO manage the invalid_search_boosting_filter
|
||||
};
|
||||
let matching = parent_candidates & universe_matching;
|
||||
let non_matching = parent_candidates - &matching;
|
||||
|
||||
self.original_query = Some(parent_query.clone());
|
||||
|
||||
self.matching = Some(RankingRuleOutput {
|
||||
query: parent_query.clone(),
|
||||
candidates: matching,
|
||||
score: ScoreDetails::FilterBoosting(score_details::FilterBoosting { matching: true }),
|
||||
});
|
||||
|
||||
self.non_matching = Some(RankingRuleOutput {
|
||||
query: parent_query.clone(),
|
||||
candidates: non_matching,
|
||||
score: ScoreDetails::FilterBoosting(score_details::FilterBoosting { matching: false }),
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn next_bucket(
|
||||
&mut self,
|
||||
_ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
_universe: &RoaringBitmap,
|
||||
) -> Result<Option<RankingRuleOutput<Query>>> {
|
||||
Ok(self.matching.take().or_else(|| self.non_matching.take()))
|
||||
}
|
||||
|
||||
fn end_iteration(
|
||||
&mut self,
|
||||
_ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
) {
|
||||
self.original_query = None;
|
||||
self.matching = None;
|
||||
self.non_matching = None;
|
||||
}
|
||||
}
|
||||
@@ -512,6 +512,7 @@ mod tests {
|
||||
false,
|
||||
&None,
|
||||
&None,
|
||||
&None,
|
||||
crate::search::new::GeoSortStrategy::default(),
|
||||
0,
|
||||
100,
|
||||
|
||||
@@ -15,6 +15,7 @@ mod resolve_query_graph;
|
||||
mod small_bitmap;
|
||||
|
||||
mod exact_attribute;
|
||||
mod filter_boosting;
|
||||
mod sort;
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -26,6 +27,7 @@ use bucket_sort::{bucket_sort, BucketSortOutput};
|
||||
use charabia::TokenizerBuilder;
|
||||
use db_cache::DatabaseCache;
|
||||
use exact_attribute::ExactAttribute;
|
||||
use filter_boosting::FilterBoosting;
|
||||
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
|
||||
use heed::RoTxn;
|
||||
use instant_distance::Search;
|
||||
@@ -190,25 +192,30 @@ fn resolve_universe(
|
||||
}
|
||||
|
||||
/// Return the list of initialised ranking rules to be used for a placeholder search.
|
||||
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
fn get_ranking_rules_for_placeholder_search<'ctx, 'f: 'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
boosting_filter: &Option<Filter<'f>>,
|
||||
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
||||
let mut sort = false;
|
||||
let mut sorted_fields = HashSet::new();
|
||||
let mut geo_sorted = false;
|
||||
let mut ranking_rules: Vec<BoxRankingRule<PlaceholderQuery>> = vec![];
|
||||
let mut ranking_rules: Vec<BoxRankingRule<_>> = match boosting_filter {
|
||||
Some(filter) => vec![Box::new(FilterBoosting::new(filter.clone())?)],
|
||||
None => Vec::new(),
|
||||
};
|
||||
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
||||
for rr in settings_ranking_rules {
|
||||
match rr {
|
||||
// These rules need a query to have an effect; ignore them in placeholder search
|
||||
crate::Criterion::Words
|
||||
| crate::Criterion::Typo
|
||||
| crate::Criterion::Attribute
|
||||
| crate::Criterion::Proximity
|
||||
| crate::Criterion::Exactness => continue,
|
||||
crate::Criterion::Sort => {
|
||||
crate::RankingRule::FilterBoosting(_)
|
||||
| crate::RankingRule::Words
|
||||
| crate::RankingRule::Typo
|
||||
| crate::RankingRule::Attribute
|
||||
| crate::RankingRule::Proximity
|
||||
| crate::RankingRule::Exactness => continue,
|
||||
crate::RankingRule::Sort => {
|
||||
if sort {
|
||||
continue;
|
||||
}
|
||||
@@ -222,14 +229,14 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
)?;
|
||||
sort = true;
|
||||
}
|
||||
crate::Criterion::Asc(field_name) => {
|
||||
crate::RankingRule::Asc(field_name) => {
|
||||
if sorted_fields.contains(&field_name) {
|
||||
continue;
|
||||
}
|
||||
sorted_fields.insert(field_name.clone());
|
||||
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
|
||||
}
|
||||
crate::Criterion::Desc(field_name) => {
|
||||
crate::RankingRule::Desc(field_name) => {
|
||||
if sorted_fields.contains(&field_name) {
|
||||
continue;
|
||||
}
|
||||
@@ -242,11 +249,12 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
}
|
||||
|
||||
/// Return the list of initialised ranking rules to be used for a query graph search.
|
||||
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
fn get_ranking_rules_for_query_graph_search<'ctx, 'f: 'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
boosting_filter: &Option<Filter<'f>>,
|
||||
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
|
||||
// query graph search
|
||||
let mut words = false;
|
||||
@@ -263,15 +271,18 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
words = true;
|
||||
}
|
||||
|
||||
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
|
||||
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = match boosting_filter {
|
||||
Some(filter) => vec![Box::new(FilterBoosting::new(filter.clone())?)],
|
||||
None => Vec::new(),
|
||||
};
|
||||
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
||||
for rr in settings_ranking_rules {
|
||||
// Add Words before any of: typo, proximity, attribute
|
||||
match rr {
|
||||
crate::Criterion::Typo
|
||||
| crate::Criterion::Attribute
|
||||
| crate::Criterion::Proximity
|
||||
| crate::Criterion::Exactness => {
|
||||
crate::RankingRule::Typo
|
||||
| crate::RankingRule::Attribute
|
||||
| crate::RankingRule::Proximity
|
||||
| crate::RankingRule::Exactness => {
|
||||
if !words {
|
||||
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
|
||||
words = true;
|
||||
@@ -280,28 +291,33 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
_ => {}
|
||||
}
|
||||
match rr {
|
||||
crate::Criterion::Words => {
|
||||
crate::RankingRule::Words => {
|
||||
if words {
|
||||
continue;
|
||||
}
|
||||
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
|
||||
words = true;
|
||||
}
|
||||
crate::Criterion::Typo => {
|
||||
crate::RankingRule::FilterBoosting(_) => {
|
||||
// that is not possible to define the filterBoosting ranking rule by hand,
|
||||
// or by using the seetings. It is always inserted by the engine itself.
|
||||
continue;
|
||||
}
|
||||
crate::RankingRule::Typo => {
|
||||
if typo {
|
||||
continue;
|
||||
}
|
||||
typo = true;
|
||||
ranking_rules.push(Box::new(Typo::new(None)));
|
||||
}
|
||||
crate::Criterion::Proximity => {
|
||||
crate::RankingRule::Proximity => {
|
||||
if proximity {
|
||||
continue;
|
||||
}
|
||||
proximity = true;
|
||||
ranking_rules.push(Box::new(Proximity::new(None)));
|
||||
}
|
||||
crate::Criterion::Attribute => {
|
||||
crate::RankingRule::Attribute => {
|
||||
if attribute {
|
||||
continue;
|
||||
}
|
||||
@@ -309,7 +325,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
ranking_rules.push(Box::new(Fid::new(None)));
|
||||
ranking_rules.push(Box::new(Position::new(None)));
|
||||
}
|
||||
crate::Criterion::Sort => {
|
||||
crate::RankingRule::Sort => {
|
||||
if sort {
|
||||
continue;
|
||||
}
|
||||
@@ -323,7 +339,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
)?;
|
||||
sort = true;
|
||||
}
|
||||
crate::Criterion::Exactness => {
|
||||
crate::RankingRule::Exactness => {
|
||||
if exactness {
|
||||
continue;
|
||||
}
|
||||
@@ -331,14 +347,15 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
ranking_rules.push(Box::new(Exactness::new()));
|
||||
exactness = true;
|
||||
}
|
||||
crate::Criterion::Asc(field_name) => {
|
||||
crate::RankingRule::Asc(field_name) => {
|
||||
// TODO Question: Why would it be invalid to sort price:asc, typo, price:desc?
|
||||
if sorted_fields.contains(&field_name) {
|
||||
continue;
|
||||
}
|
||||
sorted_fields.insert(field_name.clone());
|
||||
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
|
||||
}
|
||||
crate::Criterion::Desc(field_name) => {
|
||||
crate::RankingRule::Desc(field_name) => {
|
||||
if sorted_fields.contains(&field_name) {
|
||||
continue;
|
||||
}
|
||||
@@ -406,14 +423,15 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn execute_search(
|
||||
ctx: &mut SearchContext,
|
||||
pub fn execute_search<'ctx, 'f: 'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
query: &Option<String>,
|
||||
vector: &Option<Vec<f32>>,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
exhaustive_number_hits: bool,
|
||||
filters: &Option<Filter>,
|
||||
filter: &Option<Filter>,
|
||||
boosting_filter: &Option<Filter<'f>>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
from: usize,
|
||||
@@ -422,8 +440,8 @@ pub fn execute_search(
|
||||
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
|
||||
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) -> Result<PartialSearchResult> {
|
||||
let mut universe = if let Some(filters) = filters {
|
||||
filters.evaluate(ctx.txn, ctx.index)?
|
||||
let mut universe = if let Some(filter) = filter {
|
||||
filter.evaluate(ctx.txn, ctx.index)?
|
||||
} else {
|
||||
ctx.index.documents_ids(ctx.txn)?
|
||||
};
|
||||
@@ -434,18 +452,7 @@ pub fn execute_search(
|
||||
let mut search = Search::default();
|
||||
let docids = match ctx.index.vector_hnsw(ctx.txn)? {
|
||||
Some(hnsw) => {
|
||||
if let Some(expected_size) = hnsw.iter().map(|(_, point)| point.len()).next() {
|
||||
if vector.len() != expected_size {
|
||||
return Err(UserError::InvalidVectorDimensions {
|
||||
expected: expected_size,
|
||||
found: vector.len(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
|
||||
let vector = NDotProductPoint::new(vector.clone());
|
||||
|
||||
let neighbors = hnsw.search(&vector, &mut search);
|
||||
|
||||
let mut docids = Vec::new();
|
||||
@@ -527,6 +534,7 @@ pub fn execute_search(
|
||||
sort_criteria,
|
||||
geo_strategy,
|
||||
terms_matching_strategy,
|
||||
boosting_filter,
|
||||
)?;
|
||||
|
||||
universe =
|
||||
@@ -543,8 +551,13 @@ pub fn execute_search(
|
||||
query_graph_logger,
|
||||
)?
|
||||
} else {
|
||||
let ranking_rules =
|
||||
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
|
||||
let ranking_rules = get_ranking_rules_for_placeholder_search(
|
||||
ctx,
|
||||
sort_criteria,
|
||||
geo_strategy,
|
||||
boosting_filter,
|
||||
)?;
|
||||
|
||||
bucket_sort(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
@@ -591,7 +604,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
|
||||
|
||||
// We check that the sort ranking rule exists and throw an
|
||||
// error if we try to use it and that it doesn't.
|
||||
let sort_ranking_rule_missing = !ctx.index.criteria(ctx.txn)?.contains(&crate::Criterion::Sort);
|
||||
let sort_ranking_rule_missing =
|
||||
!ctx.index.criteria(ctx.txn)?.contains(&crate::RankingRule::Sort);
|
||||
if sort_ranking_rule_missing {
|
||||
return Err(UserError::SortRankingRuleMissing.into());
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ use std::hash::Hash;
|
||||
pub use cheapest_paths::PathVisitor;
|
||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||
pub use dead_ends_cache::DeadEndsCache;
|
||||
pub use exactness::ExactnessGraph;
|
||||
pub use exactness::{ExactnessCondition, ExactnessGraph};
|
||||
pub use fid::{FidCondition, FidGraph};
|
||||
pub use position::{PositionCondition, PositionGraph};
|
||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -12,7 +12,7 @@ fn create_index() -> TempIndex {
|
||||
"description".to_owned(),
|
||||
"plot".to_owned(),
|
||||
]);
|
||||
s.set_criteria(vec![Criterion::Attribute]);
|
||||
s.set_ranking_rules(vec![RankingRule::Attribute]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{db_snap, RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -12,7 +12,7 @@ fn create_index() -> TempIndex {
|
||||
"text2".to_owned(),
|
||||
"other".to_owned(),
|
||||
]);
|
||||
s.set_criteria(vec![Criterion::Attribute]);
|
||||
s.set_ranking_rules(vec![RankingRule::Attribute]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ use maplit::hashset;
|
||||
|
||||
use super::collect_field_values;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{AscDesc, Index, Member, RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -30,7 +30,7 @@ fn create_index() -> TempIndex {
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_sortable_fields(hashset! { S("rank1"), S("letter") });
|
||||
s.set_distinct_field("letter".to_owned());
|
||||
s.set_criteria(vec![Criterion::Words]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -252,7 +252,7 @@ fn test_distinct_placeholder_sort() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Sort]);
|
||||
s.set_ranking_rules(vec![RankingRule::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -387,7 +387,7 @@ fn test_distinct_words() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -440,7 +440,11 @@ fn test_distinct_sort_words() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Sort, Criterion::Words, Criterion::Desc(S("rank1"))]);
|
||||
s.set_ranking_rules(vec![
|
||||
RankingRule::Sort,
|
||||
RankingRule::Words,
|
||||
RankingRule::Desc(S("rank1")),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -513,7 +517,7 @@ fn test_distinct_all_candidates() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Sort]);
|
||||
s.set_ranking_rules(vec![RankingRule::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -536,7 +540,7 @@ fn test_distinct_typo() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Typo]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ Then these rules will only work with
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index_simple_ordered() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -30,7 +30,7 @@ fn create_index_simple_ordered() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness]);
|
||||
s.set_ranking_rules(vec![RankingRule::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -89,7 +89,7 @@ fn create_index_simple_reversed() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness]);
|
||||
s.set_ranking_rules(vec![RankingRule::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -147,7 +147,7 @@ fn create_index_simple_random() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness]);
|
||||
s.set_ranking_rules(vec![RankingRule::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -201,7 +201,7 @@ fn create_index_attribute_starts_with() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness]);
|
||||
s.set_ranking_rules(vec![RankingRule::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -251,7 +251,7 @@ fn create_index_simple_ordered_with_typos() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness]);
|
||||
s.set_ranking_rules(vec![RankingRule::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -350,7 +350,11 @@ fn create_index_with_varying_proximities() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||
s.set_ranking_rules(vec![
|
||||
RankingRule::Exactness,
|
||||
RankingRule::Words,
|
||||
RankingRule::Proximity,
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -404,7 +408,7 @@ fn create_index_with_typo_and_prefix() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness]);
|
||||
s.set_ranking_rules(vec![RankingRule::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -442,7 +446,11 @@ fn create_index_all_equal_except_proximity_between_ignored_terms() -> TempIndex
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||
s.set_ranking_rules(vec![
|
||||
RankingRule::Exactness,
|
||||
RankingRule::Words,
|
||||
RankingRule::Proximity,
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -698,7 +706,7 @@ fn test_exactness_after_words() {
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Exactness]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -747,7 +755,7 @@ fn test_words_after_exactness() {
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words]);
|
||||
s.set_ranking_rules(vec![RankingRule::Exactness, RankingRule::Words]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -796,7 +804,11 @@ fn test_proximity_after_exactness() {
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||
s.set_ranking_rules(vec![
|
||||
RankingRule::Exactness,
|
||||
RankingRule::Words,
|
||||
RankingRule::Proximity,
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -834,7 +846,11 @@ fn test_proximity_after_exactness() {
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||
s.set_ranking_rules(vec![
|
||||
RankingRule::Exactness,
|
||||
RankingRule::Words,
|
||||
RankingRule::Proximity,
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -868,7 +884,11 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Typo]);
|
||||
s.set_ranking_rules(vec![
|
||||
RankingRule::Exactness,
|
||||
RankingRule::Words,
|
||||
RankingRule::Typo,
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -904,7 +924,11 @@ fn test_typo_followed_by_exactness() {
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Typo, Criterion::Exactness]);
|
||||
s.set_ranking_rules(vec![
|
||||
RankingRule::Words,
|
||||
RankingRule::Typo,
|
||||
RankingRule::Exactness,
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ use maplit::hashset;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::score_details::ScoreDetails;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{AscDesc, Criterion, GeoSortStrategy, Member, Search, SearchResult};
|
||||
use crate::{AscDesc, GeoSortStrategy, Member, RankingRule, Search, SearchResult};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -18,7 +18,7 @@ fn create_index() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_sortable_fields(hashset! { S("_geo") });
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Sort]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
index
|
||||
|
||||
@@ -6,10 +6,10 @@ use maplit::{btreemap, hashset};
|
||||
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use crate::{db_snap, Criterion, Index, Object};
|
||||
use crate::{db_snap, Index, Object, RankingRule};
|
||||
pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");
|
||||
|
||||
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
pub fn setup_search_index_with_criteria(criteria: &[RankingRule]) -> Index {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
@@ -20,7 +20,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_criteria(criteria.to_vec());
|
||||
builder.set_ranking_rules(criteria.to_vec());
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
@@ -70,6 +70,6 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
|
||||
#[test]
|
||||
fn snapshot_integration_dataset() {
|
||||
let index = setup_search_index_with_criteria(&[Criterion::Attribute]);
|
||||
let index = setup_search_index_with_criteria(&[RankingRule::Attribute]);
|
||||
db_snap!(index, word_position_docids, @"3c9347a767bceef3beb31465f1e5f3ae");
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ This module tests the following properties:
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -28,7 +28,7 @@ fn create_index() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ use std::collections::BTreeMap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_simple_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -28,7 +28,7 @@ fn create_simple_index() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -94,7 +94,7 @@ fn create_edge_cases_index() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ implemented.
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -17,7 +17,11 @@ fn create_index() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity, Criterion::Typo]);
|
||||
s.set_ranking_rules(vec![
|
||||
RankingRule::Words,
|
||||
RankingRule::Proximity,
|
||||
RankingRule::Typo,
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ use maplit::hashset;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{
|
||||
score_details, AscDesc, Criterion, Member, Search, SearchResult, TermsMatchingStrategy,
|
||||
score_details, AscDesc, Member, RankingRule, Search, SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
@@ -28,7 +28,7 @@ fn create_index() -> TempIndex {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") });
|
||||
s.set_criteria(vec![Criterion::Sort]);
|
||||
s.set_ranking_rules(vec![RankingRule::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -331,7 +331,7 @@ fn test_redacted() {
|
||||
.update_settings(|s| {
|
||||
s.set_displayed_fields(vec!["text".to_owned(), "vague".to_owned()]);
|
||||
s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") });
|
||||
s.set_criteria(vec![Criterion::Sort]);
|
||||
s.set_ranking_rules(vec![RankingRule::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ use std::collections::BTreeMap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -31,7 +31,7 @@ fn create_index() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -457,7 +457,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Typo]);
|
||||
s.set_ranking_rules(vec![RankingRule::Typo]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -495,7 +495,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Typo]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -540,7 +540,7 @@ fn test_typo_bucketing() {
|
||||
drop(txn);
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Typo]);
|
||||
s.set_ranking_rules(vec![RankingRule::Typo]);
|
||||
})
|
||||
.unwrap();
|
||||
let txn = index.read_txn().unwrap();
|
||||
@@ -589,7 +589,7 @@ fn test_typo_synonyms() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Typo]);
|
||||
s.set_ranking_rules(vec![RankingRule::Typo]);
|
||||
|
||||
let mut synonyms = BTreeMap::new();
|
||||
synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
|
||||
|
||||
@@ -17,7 +17,7 @@ because the typo ranking rule before it only used the derivation `beautiful`.
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -26,7 +26,11 @@ fn create_index() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Typo, Criterion::Proximity]);
|
||||
s.set_ranking_rules(vec![
|
||||
RankingRule::Words,
|
||||
RankingRule::Typo,
|
||||
RankingRule::Proximity,
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ account by the proximity ranking rule.
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@@ -23,7 +23,7 @@ fn create_index() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -265,7 +265,7 @@ fn test_words_proximity_tms_last_simple() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -346,7 +346,7 @@ fn test_words_proximity_tms_last_phrase() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -416,7 +416,7 @@ fn test_words_tms_all() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
|
||||
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ pub use grenad_helpers::{
|
||||
};
|
||||
pub use merge_functions::{
|
||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
|
||||
merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps,
|
||||
merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
|
||||
serialize_roaring_bitmap, MergeFn,
|
||||
};
|
||||
|
||||
|
||||
@@ -20,7 +20,10 @@ use slice_group_by::GroupBy;
|
||||
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
|
||||
|
||||
use self::enrich::enrich_documents_batch;
|
||||
pub use self::enrich::{extract_finite_float_from_value, DocumentId};
|
||||
pub use self::enrich::{
|
||||
extract_finite_float_from_value, validate_document_id, validate_document_id_value,
|
||||
validate_geo_from_json, DocumentId,
|
||||
};
|
||||
pub use self::helpers::{
|
||||
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
|
||||
fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||
|
||||
@@ -9,9 +9,9 @@ use time::OffsetDateTime;
|
||||
|
||||
use super::index_documents::{IndexDocumentsConfig, Transform};
|
||||
use super::IndexerConfig;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::error::UserError;
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::ranking_rule::RankingRule;
|
||||
use crate::update::index_documents::IndexDocumentsMethod;
|
||||
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
||||
use crate::{FieldsIdsMap, Index, OrderBy, Result};
|
||||
@@ -110,7 +110,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
displayed_fields: Setting<Vec<String>>,
|
||||
filterable_fields: Setting<HashSet<String>>,
|
||||
sortable_fields: Setting<HashSet<String>>,
|
||||
criteria: Setting<Vec<Criterion>>,
|
||||
ranking_rules: Setting<Vec<RankingRule>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
non_separator_tokens: Setting<BTreeSet<String>>,
|
||||
separator_tokens: Setting<BTreeSet<String>>,
|
||||
@@ -142,7 +142,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
displayed_fields: Setting::NotSet,
|
||||
filterable_fields: Setting::NotSet,
|
||||
sortable_fields: Setting::NotSet,
|
||||
criteria: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
@@ -194,12 +194,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.sortable_fields = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn reset_criteria(&mut self) {
|
||||
self.criteria = Setting::Reset;
|
||||
pub fn reset_ranking_rules(&mut self) {
|
||||
self.ranking_rules = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_criteria(&mut self, criteria: Vec<Criterion>) {
|
||||
self.criteria = Setting::Set(criteria);
|
||||
pub fn set_ranking_rules(&mut self, ranking_rules: Vec<RankingRule>) {
|
||||
self.ranking_rules = Setting::Set(ranking_rules);
|
||||
}
|
||||
|
||||
pub fn reset_stop_words(&mut self) {
|
||||
@@ -696,7 +696,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
|
||||
fn update_criteria(&mut self) -> Result<()> {
|
||||
match &self.criteria {
|
||||
match &self.ranking_rules {
|
||||
Setting::Set(criteria) => {
|
||||
self.index.put_criteria(self.wtxn, criteria)?;
|
||||
}
|
||||
@@ -924,7 +924,7 @@ mod tests {
|
||||
use crate::error::Error;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::update::{ClearDocuments, DeleteDocuments};
|
||||
use crate::{Criterion, Filter, SearchResult};
|
||||
use crate::{Filter, RankingRule, SearchResult};
|
||||
|
||||
#[test]
|
||||
fn set_and_reset_searchable_fields() {
|
||||
@@ -1167,7 +1167,7 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_displayed_fields(vec![S("name")]);
|
||||
settings.set_criteria(vec![Criterion::Asc("age".to_owned())]);
|
||||
settings.set_ranking_rules(vec![RankingRule::Asc("age".to_owned())]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -1473,7 +1473,7 @@ mod tests {
|
||||
.update_settings(|settings| {
|
||||
settings.set_displayed_fields(vec!["hello".to_string()]);
|
||||
settings.set_filterable_fields(hashset! { S("age"), S("toto") });
|
||||
settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
|
||||
settings.set_ranking_rules(vec![RankingRule::Asc(S("toto"))]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -1482,7 +1482,7 @@ mod tests {
|
||||
assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap());
|
||||
// since no documents have been pushed the primary key is still unset
|
||||
assert!(index.primary_key(&rtxn).unwrap().is_none());
|
||||
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
|
||||
assert_eq!(vec![RankingRule::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
|
||||
drop(rtxn);
|
||||
|
||||
// We set toto and age as searchable to force reordering of the fields
|
||||
@@ -1495,7 +1495,7 @@ mod tests {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap());
|
||||
assert!(index.primary_key(&rtxn).unwrap().is_none());
|
||||
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
|
||||
assert_eq!(vec![RankingRule::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1507,7 +1507,7 @@ mod tests {
|
||||
.update_settings(|settings| {
|
||||
settings.set_displayed_fields(vec!["hello".to_string()]);
|
||||
// It is only Asc(toto), there is a facet database but it is denied to filter with toto.
|
||||
settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
|
||||
settings.set_ranking_rules(vec![RankingRule::Asc(S("toto"))]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -1715,7 +1715,7 @@ mod tests {
|
||||
displayed_fields,
|
||||
filterable_fields,
|
||||
sortable_fields,
|
||||
criteria,
|
||||
ranking_rules: criteria,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
|
||||
@@ -2,8 +2,8 @@ use std::collections::HashSet;
|
||||
|
||||
use big_s::S;
|
||||
use milli::update::Settings;
|
||||
use milli::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use Criterion::*;
|
||||
use milli::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
use RankingRule::*;
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
@@ -202,7 +202,7 @@ test_distinct!(
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
1,
|
||||
vec![],
|
||||
3
|
||||
2
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
@@ -212,7 +212,7 @@ test_distinct!(
|
||||
1,
|
||||
2,
|
||||
vec![],
|
||||
3
|
||||
1
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
@@ -222,7 +222,7 @@ test_distinct!(
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
2,
|
||||
vec![],
|
||||
7
|
||||
5
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
@@ -232,5 +232,5 @@ test_distinct!(
|
||||
2,
|
||||
4,
|
||||
vec![],
|
||||
7
|
||||
3
|
||||
);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use either::{Either, Left, Right};
|
||||
use milli::{Criterion, Filter, Search, SearchResult, TermsMatchingStrategy};
|
||||
use Criterion::*;
|
||||
use milli::{Filter, RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
use RankingRule::*;
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ use heed::EnvOpenOptions;
|
||||
use maplit::{btreemap, hashset};
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
|
||||
use milli::{AscDesc, DocumentId, Index, Member, Object, RankingRule, TermsMatchingStrategy};
|
||||
use serde::{Deserialize, Deserializer};
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
@@ -27,7 +27,7 @@ pub const EXTERNAL_DOCUMENTS_IDS: &[&str; 17] =
|
||||
|
||||
pub const CONTENT: &str = include_str!("../assets/test_set.ndjson");
|
||||
|
||||
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
pub fn setup_search_index_with_criteria(criteria: &[RankingRule]) -> Index {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
@@ -38,7 +38,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_criteria(criteria.to_vec());
|
||||
builder.set_ranking_rules(criteria.to_vec());
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
@@ -95,7 +95,7 @@ pub fn internal_to_external_ids(index: &Index, internal_ids: &[DocumentId]) -> V
|
||||
}
|
||||
|
||||
pub fn expected_order(
|
||||
criteria: &[Criterion],
|
||||
criteria: &[RankingRule],
|
||||
optional_words: TermsMatchingStrategy,
|
||||
sort_by: &[AscDesc],
|
||||
) -> Vec<TestDocument> {
|
||||
@@ -107,47 +107,56 @@ pub fn expected_order(
|
||||
let mut new_groups = Vec::new();
|
||||
for group in groups.iter_mut() {
|
||||
match criterion {
|
||||
Criterion::Attribute => {
|
||||
RankingRule::Attribute => {
|
||||
group.sort_by_key(|d| d.attribute_rank);
|
||||
new_groups
|
||||
.extend(group.linear_group_by_key(|d| d.attribute_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Exactness => {
|
||||
RankingRule::Exactness => {
|
||||
group.sort_by_key(|d| d.exact_rank);
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.exact_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Proximity => {
|
||||
RankingRule::Proximity => {
|
||||
group.sort_by_key(|d| d.proximity_rank);
|
||||
new_groups
|
||||
.extend(group.linear_group_by_key(|d| d.proximity_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Sort if sort_by == [AscDesc::Asc(Member::Field(S("tag")))] => {
|
||||
RankingRule::Sort if sort_by == [AscDesc::Asc(Member::Field(S("tag")))] => {
|
||||
group.sort_by_key(|d| d.sort_by_rank);
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Sort if sort_by == [AscDesc::Desc(Member::Field(S("tag")))] => {
|
||||
RankingRule::Sort if sort_by == [AscDesc::Desc(Member::Field(S("tag")))] => {
|
||||
group.sort_by_key(|d| Reverse(d.sort_by_rank));
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Typo => {
|
||||
RankingRule::Typo => {
|
||||
group.sort_by_key(|d| d.typo_rank);
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.typo_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Words => {
|
||||
RankingRule::Words => {
|
||||
group.sort_by_key(|d| d.word_rank);
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.word_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Asc(field_name) if field_name == "asc_desc_rank" => {
|
||||
RankingRule::Asc(field_name) if field_name == "asc_desc_rank" => {
|
||||
group.sort_by_key(|d| d.asc_desc_rank);
|
||||
new_groups
|
||||
.extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Desc(field_name) if field_name == "asc_desc_rank" => {
|
||||
RankingRule::Desc(field_name) if field_name == "asc_desc_rank" => {
|
||||
group.sort_by_key(|d| Reverse(d.asc_desc_rank));
|
||||
new_groups
|
||||
.extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Asc(_) | Criterion::Desc(_) | Criterion::Sort => {
|
||||
RankingRule::FilterBoosting(filter) => {
|
||||
// move the matching documents first, then the ones that don't match
|
||||
group.sort_by_key(|d| if execute_filter(filter, d).is_some() { 0 } else { 1 });
|
||||
new_groups.extend(
|
||||
group
|
||||
.linear_group_by_key(|d| execute_filter(filter, d).is_some())
|
||||
.map(Vec::from),
|
||||
);
|
||||
}
|
||||
RankingRule::Asc(_) | RankingRule::Desc(_) | RankingRule::Sort => {
|
||||
new_groups.push(group.clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
|
||||
use milli::{Index, RankingRule, Search, TermsMatchingStrategy};
|
||||
|
||||
use crate::search::Criterion::{Attribute, Exactness, Proximity};
|
||||
use crate::search::RankingRule::{Attribute, Exactness, Proximity};
|
||||
|
||||
fn set_stop_words(index: &Index, stop_words: &[&str]) {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
@@ -14,7 +14,7 @@ fn set_stop_words(index: &Index, stop_words: &[&str]) {
|
||||
wtxn.commit().unwrap();
|
||||
}
|
||||
|
||||
fn test_phrase_search_with_stop_words_given_criteria(criteria: &[Criterion]) {
|
||||
fn test_phrase_search_with_stop_words_given_criteria(criteria: &[RankingRule]) {
|
||||
let index = super::setup_search_index_with_criteria(criteria);
|
||||
|
||||
// Add stop_words
|
||||
|
||||
@@ -7,9 +7,9 @@ use itertools::Itertools;
|
||||
use maplit::hashset;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
|
||||
use milli::{AscDesc, Index, Member, RankingRule, Search, SearchResult, TermsMatchingStrategy};
|
||||
use rand::Rng;
|
||||
use Criterion::*;
|
||||
use RankingRule::*;
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
@@ -88,7 +88,7 @@ test_criterion!(
|
||||
|
||||
#[test]
|
||||
fn criteria_mixup() {
|
||||
use Criterion::*;
|
||||
use RankingRule::*;
|
||||
let index = search::setup_search_index_with_criteria(&[
|
||||
Words,
|
||||
Attribute,
|
||||
@@ -233,7 +233,7 @@ fn criteria_mixup() {
|
||||
//update criteria
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
builder.set_criteria(criteria.clone());
|
||||
builder.set_ranking_rules(criteria.clone());
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@@ -324,7 +324,7 @@ fn criteria_ascdesc() {
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
builder.set_criteria(vec![criterion.clone()]);
|
||||
builder.set_ranking_rules(vec![criterion.clone()]);
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use big_s::S;
|
||||
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
|
||||
use milli::RankingRule::{Attribute, Exactness, Proximity, Typo, Words};
|
||||
use milli::{AscDesc, Error, Member, Search, TermsMatchingStrategy, UserError};
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
@@ -2,10 +2,10 @@ use std::collections::BTreeSet;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
|
||||
use milli::{Index, RankingRule, Search, TermsMatchingStrategy};
|
||||
use serde_json::json;
|
||||
use tempfile::tempdir;
|
||||
use Criterion::*;
|
||||
use RankingRule::*;
|
||||
|
||||
#[test]
|
||||
fn test_typo_tolerance_one_typo() {
|
||||
|
||||
Reference in New Issue
Block a user