mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-16 09:27:01 +00:00
Compare commits
3 Commits
release-v1
...
improve-in
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4696b8199f | ||
|
|
01b1effec0 | ||
|
|
51fb4d6976 |
8
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
8
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
@@ -7,17 +7,19 @@ assignees: ''
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
Related product team resources: [PRD]() (_internal only_)
|
Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
|
||||||
Related product discussion:
|
Related product discussion:
|
||||||
Related spec: WIP
|
Related spec: WIP
|
||||||
|
|
||||||
## Motivation
|
## Motivation
|
||||||
|
|
||||||
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->
|
<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->
|
<!---Write a quick description of the usage if the usage has already been defined-->
|
||||||
|
|
||||||
|
Refer to the final spec to know the details and the final decisions about the usage.
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
|
|||||||
@@ -8,11 +8,11 @@ env:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run-benchmarks-on-comment:
|
run-benchmarks-on-comment:
|
||||||
if: startsWith(github.event.comment.body, '/benchmark')
|
|
||||||
name: Run and upload benchmarks
|
name: Run and upload benchmarks
|
||||||
runs-on: benchmarks
|
runs-on: benchmarks
|
||||||
timeout-minutes: 4320 # 72h
|
timeout-minutes: 4320 # 72h
|
||||||
steps:
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
- uses: actions-rs/toolchain@v1
|
- uses: actions-rs/toolchain@v1
|
||||||
with:
|
with:
|
||||||
profile: minimal
|
profile: minimal
|
||||||
@@ -25,27 +25,15 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
command: benchmark
|
command: benchmark
|
||||||
reaction-type: "eyes"
|
reaction-type: "eyes"
|
||||||
repo-token: ${{ env.GH_TOKEN }}
|
|
||||||
|
|
||||||
- uses: xt0rted/pull-request-comment-branch@v2
|
|
||||||
id: comment-branch
|
|
||||||
with:
|
|
||||||
repo_token: ${{ env.GH_TOKEN }}
|
|
||||||
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
if: success()
|
|
||||||
with:
|
|
||||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
|
||||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
|
||||||
|
|
||||||
# Set variables
|
# Set variables
|
||||||
- name: Set current branch name
|
- name: Set current branch name
|
||||||
shell: bash
|
shell: bash
|
||||||
run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
|
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
|
||||||
id: current_branch
|
id: current_branch
|
||||||
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
||||||
shell: bash
|
shell: bash
|
||||||
run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
|
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
|
||||||
id: normalized_current_branch
|
id: normalized_current_branch
|
||||||
- name: Set shorter commit SHA
|
- name: Set shorter commit SHA
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -84,14 +72,10 @@ jobs:
|
|||||||
|
|
||||||
# Compute the diff of the benchmarks and send a message on the GitHub PR
|
# Compute the diff of the benchmarks and send a message on the GitHub PR
|
||||||
- name: Compute and send a message in the PR
|
- name: Compute and send a message in the PR
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
|
||||||
run: |
|
run: |
|
||||||
set -x
|
export base=git rev-parse $(git cherry main | head -n 1 | cut -c 3-)~ | cut -c -8
|
||||||
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
|
|
||||||
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
|
|
||||||
echo 'Here are your benchmarks diff 👊' >> body.txt
|
echo 'Here are your benchmarks diff 👊' >> body.txt
|
||||||
echo '```' >> body.txt
|
echo '```' >> body.txt
|
||||||
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
|
./benchmaks/scipts/compare.sh $base ${{ steps.file.outputs.basename }}.json >> body.txt
|
||||||
echo '```' >> body.txt
|
echo '```' >> body.txt
|
||||||
gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt
|
gh pr comment ${GITHUB_REF#refs/heads/} --body-file body.txt
|
||||||
1633
Cargo.lock
generated
1633
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,6 @@
|
|||||||
resolver = "2"
|
resolver = "2"
|
||||||
members = [
|
members = [
|
||||||
"meilisearch",
|
"meilisearch",
|
||||||
"meilitool",
|
|
||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"meilisearch-auth",
|
"meilisearch-auth",
|
||||||
"meili-snap",
|
"meili-snap",
|
||||||
@@ -19,7 +18,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.5.1"
|
version = "1.4.0"
|
||||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||||
description = "Meilisearch HTTP server"
|
description = "Meilisearch HTTP server"
|
||||||
homepage = "https://meilisearch.com"
|
homepage = "https://meilisearch.com"
|
||||||
|
|||||||
11
Dockerfile
11
Dockerfile
@@ -3,7 +3,7 @@ FROM rust:alpine3.16 AS compiler
|
|||||||
|
|
||||||
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
||||||
|
|
||||||
WORKDIR /
|
WORKDIR /meilisearch
|
||||||
|
|
||||||
ARG COMMIT_SHA
|
ARG COMMIT_SHA
|
||||||
ARG COMMIT_DATE
|
ARG COMMIT_DATE
|
||||||
@@ -17,7 +17,7 @@ RUN set -eux; \
|
|||||||
if [ "$apkArch" = "aarch64" ]; then \
|
if [ "$apkArch" = "aarch64" ]; then \
|
||||||
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
||||||
fi && \
|
fi && \
|
||||||
cargo build --release -p meilisearch -p meilitool
|
cargo build --release
|
||||||
|
|
||||||
# Run
|
# Run
|
||||||
FROM alpine:3.16
|
FROM alpine:3.16
|
||||||
@@ -28,10 +28,9 @@ ENV MEILI_SERVER_PROVIDER docker
|
|||||||
RUN apk update --quiet \
|
RUN apk update --quiet \
|
||||||
&& apk add -q --no-cache libgcc tini curl
|
&& apk add -q --no-cache libgcc tini curl
|
||||||
|
|
||||||
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
|
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
||||||
# and it's easy to find.
|
# to find.
|
||||||
COPY --from=compiler /target/release/meilisearch /bin/meilisearch
|
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||||
COPY --from=compiler /target/release/meilitool /bin/meilitool
|
|
||||||
# To stay compatible with the older version of the container (pre v0.27.0) we're
|
# To stay compatible with the older version of the container (pre v0.27.0) we're
|
||||||
# going to symlink the meilisearch binary in the path to `/meilisearch`
|
# going to symlink the meilisearch binary in the path to `/meilisearch`
|
||||||
RUN ln -s /bin/meilisearch /meilisearch
|
RUN ln -s /bin/meilisearch /meilisearch
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
# Profiling Meilisearch
|
# Profiling Meilisearch
|
||||||
|
|
||||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).
|
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Profiling the Indexing Process
|
## Profiling the Indexing Process
|
||||||
|
|
||||||
When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.
|
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||||
|
|
||||||
[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.
|
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
|
||||||
|
|
||||||
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
|
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
|
||||||
|
|
||||||
|
|||||||
@@ -25,12 +25,6 @@
|
|||||||
|
|
||||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
||||||
|
|
||||||
<p align="center" name="demo">
|
<p align="center" name="demo">
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
|||||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
rand_chacha = "0.3.1"
|
rand_chacha = "0.3.1"
|
||||||
roaring = "0.10.1"
|
roaring = { path = "../../roaring-rs" }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = "1.0.70"
|
anyhow = "1.0.70"
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ meilisearch-auth = { path = "../meilisearch-auth" }
|
|||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
once_cell = "1.17.1"
|
once_cell = "1.17.1"
|
||||||
regex = "1.7.3"
|
regex = "1.7.3"
|
||||||
roaring = { version = "0.10.1", features = ["serde"] }
|
roaring = { path = "../../roaring-rs", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
tar = "0.4.38"
|
tar = "0.4.38"
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ meilisearch-auth = { path = "../meilisearch-auth" }
|
|||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
page_size = "0.5.0"
|
page_size = "0.5.0"
|
||||||
puffin = "0.16.0"
|
puffin = "0.16.0"
|
||||||
roaring = { version = "0.10.1", features = ["serde"] }
|
roaring = { path = "../../roaring-rs", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
synchronoise = "1.0.1"
|
synchronoise = "1.0.1"
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ one indexing operation.
|
|||||||
|
|
||||||
use std::collections::{BTreeSet, HashSet};
|
use std::collections::{BTreeSet, HashSet};
|
||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::fmt;
|
|
||||||
use std::fs::{self, File};
|
use std::fs::{self, File};
|
||||||
use std::io::BufWriter;
|
use std::io::BufWriter;
|
||||||
|
|
||||||
@@ -200,29 +199,6 @@ impl Batch {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Batch {
|
|
||||||
/// A text used when we debug the profiling reports.
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
let index_uid = self.index_uid();
|
|
||||||
let tasks = self.ids();
|
|
||||||
match self {
|
|
||||||
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
|
|
||||||
Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
|
|
||||||
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
|
|
||||||
Batch::Dump(_) => f.write_str("Dump")?,
|
|
||||||
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
|
|
||||||
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
|
|
||||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
|
||||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
|
||||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
|
||||||
};
|
|
||||||
match index_uid {
|
|
||||||
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
|
|
||||||
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IndexOperation {
|
impl IndexOperation {
|
||||||
pub fn index_uid(&self) -> &str {
|
pub fn index_uid(&self) -> &str {
|
||||||
match self {
|
match self {
|
||||||
@@ -237,30 +213,6 @@ impl IndexOperation {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for IndexOperation {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
IndexOperation::DocumentOperation { .. } => {
|
|
||||||
f.write_str("IndexOperation::DocumentOperation")
|
|
||||||
}
|
|
||||||
IndexOperation::DocumentDeletion { .. } => {
|
|
||||||
f.write_str("IndexOperation::DocumentDeletion")
|
|
||||||
}
|
|
||||||
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
|
|
||||||
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
|
|
||||||
}
|
|
||||||
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
|
|
||||||
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
|
|
||||||
IndexOperation::DocumentClearAndSetting { .. } => {
|
|
||||||
f.write_str("IndexOperation::DocumentClearAndSetting")
|
|
||||||
}
|
|
||||||
IndexOperation::SettingsAndDocumentOperation { .. } => {
|
|
||||||
f.write_str("IndexOperation::SettingsAndDocumentOperation")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IndexScheduler {
|
impl IndexScheduler {
|
||||||
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
|
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
|
||||||
///
|
///
|
||||||
@@ -629,7 +581,7 @@ impl IndexScheduler {
|
|||||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
puffin::profile_function!(batch.to_string());
|
puffin::profile_function!(format!("{:?}", batch));
|
||||||
|
|
||||||
match batch {
|
match batch {
|
||||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||||
@@ -896,7 +848,7 @@ impl IndexScheduler {
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
// 4. Dump experimental feature settings
|
// 4. Dump experimental feature settings
|
||||||
let features = self.features().runtime_features();
|
let features = self.features()?.runtime_features();
|
||||||
dump.create_experimental_features(features)?;
|
dump.create_experimental_features(features)?;
|
||||||
|
|
||||||
let dump_uid = started_at.format(format_description!(
|
let dump_uid = started_at.format(format_description!(
|
||||||
@@ -923,10 +875,6 @@ impl IndexScheduler {
|
|||||||
self.index_mapper.index(&rtxn, &index_uid)?
|
self.index_mapper.index(&rtxn, &index_uid)?
|
||||||
};
|
};
|
||||||
|
|
||||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
|
||||||
*self.currently_updating_index.write().unwrap() =
|
|
||||||
Some((index_uid.clone(), index.clone()));
|
|
||||||
|
|
||||||
let mut index_wtxn = index.write_txn()?;
|
let mut index_wtxn = index.write_txn()?;
|
||||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||||
index_wtxn.commit()?;
|
index_wtxn.commit()?;
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
use std::sync::{Arc, RwLock};
|
|
||||||
|
|
||||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||||
use meilisearch_types::heed::{Database, Env, RwTxn};
|
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||||
|
|
||||||
use crate::error::FeatureNotEnabledError;
|
use crate::error::FeatureNotEnabledError;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@@ -11,19 +9,20 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
|||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(crate) struct FeatureData {
|
pub(crate) struct FeatureData {
|
||||||
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||||
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
|
instance: InstanceTogglableFeatures,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub struct RoFeatures {
|
pub struct RoFeatures {
|
||||||
runtime: RuntimeTogglableFeatures,
|
runtime: RuntimeTogglableFeatures,
|
||||||
|
instance: InstanceTogglableFeatures,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RoFeatures {
|
impl RoFeatures {
|
||||||
fn new(data: &FeatureData) -> Self {
|
fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
|
||||||
let runtime = data.runtime_features();
|
let runtime = data.runtime_features(txn)?;
|
||||||
Self { runtime }
|
Ok(Self { runtime, instance: data.instance })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
||||||
@@ -44,13 +43,13 @@ impl RoFeatures {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn check_metrics(&self) -> Result<()> {
|
pub fn check_metrics(&self) -> Result<()> {
|
||||||
if self.runtime.metrics {
|
if self.instance.metrics {
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
Err(FeatureNotEnabledError {
|
Err(FeatureNotEnabledError {
|
||||||
disabled_action: "Getting metrics",
|
disabled_action: "Getting metrics",
|
||||||
feature: "metrics",
|
feature: "metrics",
|
||||||
issue_link: "https://github.com/meilisearch/product/discussions/625",
|
issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
|
||||||
}
|
}
|
||||||
.into())
|
.into())
|
||||||
}
|
}
|
||||||
@@ -68,36 +67,15 @@ impl RoFeatures {
|
|||||||
.into())
|
.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn check_puffin(&self) -> Result<()> {
|
|
||||||
if self.runtime.export_puffin_reports {
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Err(FeatureNotEnabledError {
|
|
||||||
disabled_action: "Outputting Puffin reports to disk",
|
|
||||||
feature: "export puffin reports",
|
|
||||||
issue_link: "https://github.com/meilisearch/product/discussions/693",
|
|
||||||
}
|
|
||||||
.into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FeatureData {
|
impl FeatureData {
|
||||||
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
|
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
|
||||||
let mut wtxn = env.write_txn()?;
|
let mut wtxn = env.write_txn()?;
|
||||||
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|
||||||
let txn = env.read_txn()?;
|
Ok(Self { runtime: runtime_features, instance: instance_features })
|
||||||
let persisted_features: RuntimeTogglableFeatures =
|
|
||||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
|
||||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
|
||||||
metrics: instance_features.metrics || persisted_features.metrics,
|
|
||||||
..persisted_features
|
|
||||||
}));
|
|
||||||
|
|
||||||
Ok(Self { persisted: runtime_features_db, runtime })
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn put_runtime_features(
|
pub fn put_runtime_features(
|
||||||
@@ -105,25 +83,16 @@ impl FeatureData {
|
|||||||
mut wtxn: RwTxn,
|
mut wtxn: RwTxn,
|
||||||
features: RuntimeTogglableFeatures,
|
features: RuntimeTogglableFeatures,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|
||||||
// safe to unwrap, the lock will only fail if:
|
|
||||||
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
|
|
||||||
// 2. there's a panic while the thread is held -> it is only used for an assignment here.
|
|
||||||
let mut toggled_features = self.runtime.write().unwrap();
|
|
||||||
*toggled_features = features;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn runtime_features(&self) -> RuntimeTogglableFeatures {
|
fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
|
||||||
// sound to unwrap, the lock will only fail if:
|
Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
|
||||||
// 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
|
|
||||||
// 2. there's a panic while the thread is held -> it is only used for copying the data here
|
|
||||||
*self.runtime.read().unwrap()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn features(&self) -> RoFeatures {
|
pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
|
||||||
RoFeatures::new(self)
|
RoFeatures::new(txn, self)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,7 +30,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
|||||||
index_mapper,
|
index_mapper,
|
||||||
features: _,
|
features: _,
|
||||||
max_number_of_tasks: _,
|
max_number_of_tasks: _,
|
||||||
puffin_frame: _,
|
|
||||||
wake_up: _,
|
wake_up: _,
|
||||||
dumps_path: _,
|
dumps_path: _,
|
||||||
snapshots_path: _,
|
snapshots_path: _,
|
||||||
@@ -39,7 +38,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
|||||||
test_breakpoint_sdr: _,
|
test_breakpoint_sdr: _,
|
||||||
planned_failures: _,
|
planned_failures: _,
|
||||||
run_loop_iteration: _,
|
run_loop_iteration: _,
|
||||||
currently_updating_index: _,
|
|
||||||
} = scheduler;
|
} = scheduler;
|
||||||
|
|
||||||
let rtxn = env.read_txn().unwrap();
|
let rtxn = env.read_txn().unwrap();
|
||||||
|
|||||||
@@ -27,13 +27,12 @@ mod index_mapper;
|
|||||||
mod insta_snapshot;
|
mod insta_snapshot;
|
||||||
mod lru;
|
mod lru;
|
||||||
mod utils;
|
mod utils;
|
||||||
pub mod uuid_codec;
|
mod uuid_codec;
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
pub type TaskId = u32;
|
pub type TaskId = u32;
|
||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::fs::File;
|
|
||||||
use std::ops::{Bound, RangeBounds};
|
use std::ops::{Bound, RangeBounds};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
@@ -53,7 +52,6 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
|||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||||
use puffin::FrameView;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use synchronoise::SignalEvent;
|
use synchronoise::SignalEvent;
|
||||||
use time::format_description::well_known::Rfc3339;
|
use time::format_description::well_known::Rfc3339;
|
||||||
@@ -316,9 +314,6 @@ pub struct IndexScheduler {
|
|||||||
/// the finished tasks automatically.
|
/// the finished tasks automatically.
|
||||||
pub(crate) max_number_of_tasks: usize,
|
pub(crate) max_number_of_tasks: usize,
|
||||||
|
|
||||||
/// A frame to output the indexation profiling files to disk.
|
|
||||||
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
|
|
||||||
|
|
||||||
/// The path used to create the dumps.
|
/// The path used to create the dumps.
|
||||||
pub(crate) dumps_path: PathBuf,
|
pub(crate) dumps_path: PathBuf,
|
||||||
|
|
||||||
@@ -331,10 +326,6 @@ pub struct IndexScheduler {
|
|||||||
/// The path to the version file of Meilisearch.
|
/// The path to the version file of Meilisearch.
|
||||||
pub(crate) version_file_path: PathBuf,
|
pub(crate) version_file_path: PathBuf,
|
||||||
|
|
||||||
/// A few types of long running batches of tasks that act on a single index set this field
|
|
||||||
/// so that a handle to the index is available from other threads (search) in an optimized manner.
|
|
||||||
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
|
|
||||||
|
|
||||||
// ================= test
|
// ================= test
|
||||||
// The next entry is dedicated to the tests.
|
// The next entry is dedicated to the tests.
|
||||||
/// Provide a way to set a breakpoint in multiple part of the scheduler.
|
/// Provide a way to set a breakpoint in multiple part of the scheduler.
|
||||||
@@ -373,12 +364,10 @@ impl IndexScheduler {
|
|||||||
wake_up: self.wake_up.clone(),
|
wake_up: self.wake_up.clone(),
|
||||||
autobatching_enabled: self.autobatching_enabled,
|
autobatching_enabled: self.autobatching_enabled,
|
||||||
max_number_of_tasks: self.max_number_of_tasks,
|
max_number_of_tasks: self.max_number_of_tasks,
|
||||||
puffin_frame: self.puffin_frame.clone(),
|
|
||||||
snapshots_path: self.snapshots_path.clone(),
|
snapshots_path: self.snapshots_path.clone(),
|
||||||
dumps_path: self.dumps_path.clone(),
|
dumps_path: self.dumps_path.clone(),
|
||||||
auth_path: self.auth_path.clone(),
|
auth_path: self.auth_path.clone(),
|
||||||
version_file_path: self.version_file_path.clone(),
|
version_file_path: self.version_file_path.clone(),
|
||||||
currently_updating_index: self.currently_updating_index.clone(),
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -468,14 +457,12 @@ impl IndexScheduler {
|
|||||||
env,
|
env,
|
||||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||||
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
|
|
||||||
autobatching_enabled: options.autobatching_enabled,
|
autobatching_enabled: options.autobatching_enabled,
|
||||||
max_number_of_tasks: options.max_number_of_tasks,
|
max_number_of_tasks: options.max_number_of_tasks,
|
||||||
dumps_path: options.dumps_path,
|
dumps_path: options.dumps_path,
|
||||||
snapshots_path: options.snapshots_path,
|
snapshots_path: options.snapshots_path,
|
||||||
auth_path: options.auth_path,
|
auth_path: options.auth_path,
|
||||||
version_file_path: options.version_file_path,
|
version_file_path: options.version_file_path,
|
||||||
currently_updating_index: Arc::new(RwLock::new(None)),
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
test_breakpoint_sdr,
|
test_breakpoint_sdr,
|
||||||
@@ -585,46 +572,17 @@ impl IndexScheduler {
|
|||||||
run.wake_up.wait();
|
run.wake_up.wait();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let puffin_enabled = run.features().check_puffin().is_ok();
|
|
||||||
puffin::set_scopes_on(puffin_enabled);
|
|
||||||
puffin::GlobalProfiler::lock().new_frame();
|
|
||||||
|
|
||||||
match run.tick() {
|
match run.tick() {
|
||||||
Ok(TickOutcome::TickAgain(_)) => (),
|
Ok(TickOutcome::TickAgain(_)) => (),
|
||||||
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
|
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::error!("{e}");
|
log::error!("{}", e);
|
||||||
// Wait one second when an irrecoverable error occurs.
|
// Wait one second when an irrecoverable error occurs.
|
||||||
if !e.is_recoverable() {
|
if !e.is_recoverable() {
|
||||||
std::thread::sleep(Duration::from_secs(1));
|
std::thread::sleep(Duration::from_secs(1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Let's write the previous frame to disk but only if
|
|
||||||
// the user wanted to profile with puffin.
|
|
||||||
if puffin_enabled {
|
|
||||||
let mut frame_view = run.puffin_frame.lock();
|
|
||||||
if !frame_view.is_empty() {
|
|
||||||
let now = OffsetDateTime::now_utc();
|
|
||||||
let mut file = match File::create(format!("{}.puffin", now)) {
|
|
||||||
Ok(file) => file,
|
|
||||||
Err(e) => {
|
|
||||||
log::error!("{e}");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if let Err(e) = frame_view.save_to_writer(&mut file) {
|
|
||||||
log::error!("{e}");
|
|
||||||
}
|
|
||||||
if let Err(e) = file.sync_all() {
|
|
||||||
log::error!("{e}");
|
|
||||||
}
|
|
||||||
// We erase this frame view as it is no more useful. We want to
|
|
||||||
// measure the new frames now that we exported the previous ones.
|
|
||||||
*frame_view = FrameView::default();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -658,13 +616,6 @@ impl IndexScheduler {
|
|||||||
/// If you need to fetch information from or perform an action on all indexes,
|
/// If you need to fetch information from or perform an action on all indexes,
|
||||||
/// see the `try_for_each_index` function.
|
/// see the `try_for_each_index` function.
|
||||||
pub fn index(&self, name: &str) -> Result<Index> {
|
pub fn index(&self, name: &str) -> Result<Index> {
|
||||||
if let Some((current_name, current_index)) =
|
|
||||||
self.currently_updating_index.read().unwrap().as_ref()
|
|
||||||
{
|
|
||||||
if current_name == name {
|
|
||||||
return Ok(current_index.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
self.index_mapper.index(&rtxn, name)
|
self.index_mapper.index(&rtxn, name)
|
||||||
}
|
}
|
||||||
@@ -1111,6 +1062,8 @@ impl IndexScheduler {
|
|||||||
self.breakpoint(Breakpoint::Start);
|
self.breakpoint(Breakpoint::Start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
puffin::GlobalProfiler::lock().new_frame();
|
||||||
|
|
||||||
self.cleanup_task_queue()?;
|
self.cleanup_task_queue()?;
|
||||||
|
|
||||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||||
@@ -1146,9 +1099,6 @@ impl IndexScheduler {
|
|||||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||||
};
|
};
|
||||||
|
|
||||||
// Reset the currently updating index to relinquish the index handle
|
|
||||||
*self.currently_updating_index.write().unwrap() = None;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
||||||
|
|
||||||
@@ -1309,8 +1259,9 @@ impl IndexScheduler {
|
|||||||
Ok(IndexStats { is_indexing, inner_stats: index_stats })
|
Ok(IndexStats { is_indexing, inner_stats: index_stats })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn features(&self) -> RoFeatures {
|
pub fn features(&self) -> Result<RoFeatures> {
|
||||||
self.features.features()
|
let rtxn = self.read_txn()?;
|
||||||
|
self.features.features(rtxn)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ hmac = "0.12.1"
|
|||||||
maplit = "1.0.2"
|
maplit = "1.0.2"
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
roaring = { version = "0.10.1", features = ["serde"] }
|
roaring = { path = "../../roaring-rs", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
sha2 = "0.10.6"
|
sha2 = "0.10.6"
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ flate2 = "1.0.25"
|
|||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
memmap2 = "0.7.1"
|
memmap2 = "0.7.1"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
roaring = { version = "0.10.1", features = ["serde"] }
|
roaring = { path = "../../roaring-rs", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde-cs = "0.2.4"
|
serde-cs = "0.2.4"
|
||||||
serde_json = "1.0.95"
|
serde_json = "1.0.95"
|
||||||
@@ -50,7 +50,6 @@ hebrew = ["milli/hebrew"]
|
|||||||
japanese = ["milli/japanese"]
|
japanese = ["milli/japanese"]
|
||||||
# thai specialized tokenization
|
# thai specialized tokenization
|
||||||
thai = ["milli/thai"]
|
thai = ["milli/thai"]
|
||||||
|
|
||||||
# allow greek specialized tokenization
|
# allow greek specialized tokenization
|
||||||
greek = ["milli/greek"]
|
greek = ["milli/greek"]
|
||||||
# allow khmer specialized tokenization
|
|
||||||
khmer = ["milli/khmer"]
|
|
||||||
|
|||||||
@@ -5,8 +5,6 @@ use serde::{Deserialize, Serialize};
|
|||||||
pub struct RuntimeTogglableFeatures {
|
pub struct RuntimeTogglableFeatures {
|
||||||
pub score_details: bool,
|
pub score_details: bool,
|
||||||
pub vector_store: bool,
|
pub vector_store: bool,
|
||||||
pub metrics: bool,
|
|
||||||
pub export_puffin_reports: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone, Copy)]
|
#[derive(Default, Debug, Clone, Copy)]
|
||||||
|
|||||||
@@ -69,7 +69,8 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
|
|||||||
pin-project-lite = "0.2.9"
|
pin-project-lite = "0.2.9"
|
||||||
platform-dirs = "0.3.0"
|
platform-dirs = "0.3.0"
|
||||||
prometheus = { version = "0.13.3", features = ["process"] }
|
prometheus = { version = "0.13.3", features = ["process"] }
|
||||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
puffin = "0.16.0"
|
||||||
|
puffin_http = { version = "0.13.0", optional = true }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
rayon = "1.7.0"
|
rayon = "1.7.0"
|
||||||
regex = "1.7.3"
|
regex = "1.7.3"
|
||||||
@@ -134,6 +135,7 @@ zip = { version = "0.6.4", optional = true }
|
|||||||
[features]
|
[features]
|
||||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||||
analytics = ["segment"]
|
analytics = ["segment"]
|
||||||
|
profile-with-puffin = ["dep:puffin_http"]
|
||||||
mini-dashboard = [
|
mini-dashboard = [
|
||||||
"actix-web-static-files",
|
"actix-web-static-files",
|
||||||
"static-files",
|
"static-files",
|
||||||
@@ -150,7 +152,6 @@ hebrew = ["meilisearch-types/hebrew"]
|
|||||||
japanese = ["meilisearch-types/japanese"]
|
japanese = ["meilisearch-types/japanese"]
|
||||||
thai = ["meilisearch-types/thai"]
|
thai = ["meilisearch-types/thai"]
|
||||||
greek = ["meilisearch-types/greek"]
|
greek = ["meilisearch-types/greek"]
|
||||||
khmer = ["meilisearch-types/khmer"]
|
|
||||||
|
|
||||||
[package.metadata.mini-dashboard]
|
[package.metadata.mini-dashboard]
|
||||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
|
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
|
||||||
|
|||||||
@@ -114,7 +114,10 @@ pub fn create_app(
|
|||||||
.configure(routes::configure)
|
.configure(routes::configure)
|
||||||
.configure(|s| dashboard(s, enable_dashboard));
|
.configure(|s| dashboard(s, enable_dashboard));
|
||||||
|
|
||||||
let app = app.wrap(middleware::RouteMetrics);
|
let app = app.wrap(actix_web::middleware::Condition::new(
|
||||||
|
opt.experimental_enable_metrics,
|
||||||
|
middleware::RouteMetrics,
|
||||||
|
));
|
||||||
app.wrap(
|
app.wrap(
|
||||||
Cors::default()
|
Cors::default()
|
||||||
.send_wildcard()
|
.send_wildcard()
|
||||||
|
|||||||
@@ -30,6 +30,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
|||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
let (opt, config_read_from) = Opt::try_build()?;
|
let (opt, config_read_from) = Opt::try_build()?;
|
||||||
|
|
||||||
|
#[cfg(feature = "profile-with-puffin")]
|
||||||
|
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
|
||||||
|
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
|
||||||
|
|
||||||
anyhow::ensure!(
|
anyhow::ensure!(
|
||||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||||
|
|||||||
@@ -3,10 +3,8 @@
|
|||||||
use std::future::{ready, Ready};
|
use std::future::{ready, Ready};
|
||||||
|
|
||||||
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
|
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
|
||||||
use actix_web::web::Data;
|
|
||||||
use actix_web::Error;
|
use actix_web::Error;
|
||||||
use futures_util::future::LocalBoxFuture;
|
use futures_util::future::LocalBoxFuture;
|
||||||
use index_scheduler::IndexScheduler;
|
|
||||||
use prometheus::HistogramTimer;
|
use prometheus::HistogramTimer;
|
||||||
|
|
||||||
pub struct RouteMetrics;
|
pub struct RouteMetrics;
|
||||||
@@ -49,27 +47,19 @@ where
|
|||||||
|
|
||||||
fn call(&self, req: ServiceRequest) -> Self::Future {
|
fn call(&self, req: ServiceRequest) -> Self::Future {
|
||||||
let mut histogram_timer: Option<HistogramTimer> = None;
|
let mut histogram_timer: Option<HistogramTimer> = None;
|
||||||
|
let request_path = req.path();
|
||||||
// calling unwrap here is safe because index scheduler is added to app data while creating actix app.
|
let is_registered_resource = req.resource_map().has_resource(request_path);
|
||||||
// also, the tests will fail if this is not present.
|
if is_registered_resource {
|
||||||
let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
|
let request_method = req.method().to_string();
|
||||||
let features = index_scheduler.features();
|
histogram_timer = Some(
|
||||||
|
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
|
||||||
if features.check_metrics().is_ok() {
|
|
||||||
let request_path = req.path();
|
|
||||||
let is_registered_resource = req.resource_map().has_resource(request_path);
|
|
||||||
if is_registered_resource {
|
|
||||||
let request_method = req.method().to_string();
|
|
||||||
histogram_timer = Some(
|
|
||||||
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
|
|
||||||
.with_label_values(&[&request_method, request_path])
|
|
||||||
.start_timer(),
|
|
||||||
);
|
|
||||||
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
|
|
||||||
.with_label_values(&[&request_method, request_path])
|
.with_label_values(&[&request_method, request_path])
|
||||||
.inc();
|
.start_timer(),
|
||||||
}
|
);
|
||||||
};
|
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
|
||||||
|
.with_label_values(&[&request_method, request_path])
|
||||||
|
.inc();
|
||||||
|
}
|
||||||
|
|
||||||
let fut = self.service.call(req);
|
let fut = self.service.call(req);
|
||||||
|
|
||||||
|
|||||||
@@ -29,12 +29,12 @@ async fn get_features(
|
|||||||
>,
|
>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: Data<dyn Analytics>,
|
analytics: Data<dyn Analytics>,
|
||||||
) -> HttpResponse {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features()?;
|
||||||
|
|
||||||
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
|
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
|
||||||
debug!("returns: {:?}", features.runtime_features());
|
debug!("returns: {:?}", features.runtime_features());
|
||||||
HttpResponse::Ok().json(features.runtime_features())
|
Ok(HttpResponse::Ok().json(features.runtime_features()))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserr)]
|
#[derive(Debug, Deserr)]
|
||||||
@@ -44,10 +44,6 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub score_details: Option<bool>,
|
pub score_details: Option<bool>,
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub vector_store: Option<bool>,
|
pub vector_store: Option<bool>,
|
||||||
#[deserr(default)]
|
|
||||||
pub metrics: Option<bool>,
|
|
||||||
#[deserr(default)]
|
|
||||||
pub export_puffin_reports: Option<bool>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn patch_features(
|
async fn patch_features(
|
||||||
@@ -59,36 +55,26 @@ async fn patch_features(
|
|||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: Data<dyn Analytics>,
|
analytics: Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features()?;
|
||||||
|
|
||||||
let old_features = features.runtime_features();
|
let old_features = features.runtime_features();
|
||||||
|
|
||||||
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
|
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
|
||||||
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
|
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
|
||||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
|
||||||
export_puffin_reports: new_features
|
|
||||||
.0
|
|
||||||
.export_puffin_reports
|
|
||||||
.unwrap_or(old_features.export_puffin_reports),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||||
// the it renames to camelCase, which we don't want for analytics.
|
// the it renames to camelCase, which we don't want for analytics.
|
||||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||||
let meilisearch_types::features::RuntimeTogglableFeatures {
|
let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
|
||||||
score_details,
|
new_features;
|
||||||
vector_store,
|
|
||||||
metrics,
|
|
||||||
export_puffin_reports,
|
|
||||||
} = new_features;
|
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
"Experimental features Updated".to_string(),
|
"Experimental features Updated".to_string(),
|
||||||
json!({
|
json!({
|
||||||
"score_details": score_details,
|
"score_details": score_details,
|
||||||
"vector_store": vector_store,
|
"vector_store": vector_store,
|
||||||
"metrics": metrics,
|
|
||||||
"export_puffin_reports": export_puffin_reports,
|
|
||||||
}),
|
}),
|
||||||
Some(&req),
|
Some(&req),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ pub async fn search(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features()?;
|
||||||
let search_result = tokio::task::spawn_blocking(move || {
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
perform_facet_search(&index, search_query, facet_query, facet_name, features)
|
perform_facet_search(&index, search_query, facet_query, facet_name, features)
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -157,7 +157,7 @@ pub async fn search_with_url_query(
|
|||||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features()?;
|
||||||
let search_result =
|
let search_result =
|
||||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
@@ -192,7 +192,7 @@ pub async fn search_with_post(
|
|||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
|
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features()?;
|
||||||
let search_result =
|
let search_result =
|
||||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ pub async fn get_metrics(
|
|||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||||
auth_controller: Data<AuthController>,
|
auth_controller: Data<AuthController>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
index_scheduler.features().check_metrics()?;
|
index_scheduler.features()?.check_metrics()?;
|
||||||
let auth_filters = index_scheduler.filters();
|
let auth_filters = index_scheduler.filters();
|
||||||
if !auth_filters.all_indexes_authorized() {
|
if !auth_filters.all_indexes_authorized() {
|
||||||
let mut error = ResponseError::from(AuthenticationError::InvalidToken);
|
let mut error = ResponseError::from(AuthenticationError::InvalidToken);
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ pub async fn multi_search_with_post(
|
|||||||
let queries = params.into_inner().queries;
|
let queries = params.into_inner().queries;
|
||||||
|
|
||||||
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features()?;
|
||||||
|
|
||||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||||
|
|||||||
@@ -2,12 +2,10 @@ use std::collections::{HashMap, HashSet};
|
|||||||
|
|
||||||
use ::time::format_description::well_known::Rfc3339;
|
use ::time::format_description::well_known::Rfc3339;
|
||||||
use maplit::{hashmap, hashset};
|
use maplit::{hashmap, hashset};
|
||||||
use meilisearch::Opt;
|
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use tempfile::TempDir;
|
|
||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
|
|
||||||
use crate::common::{default_settings, Server, Value};
|
use crate::common::{Server, Value};
|
||||||
use crate::json;
|
use crate::json;
|
||||||
|
|
||||||
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
|
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
|
||||||
@@ -197,9 +195,7 @@ async fn access_authorized_master_key() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn access_authorized_restricted_index() {
|
async fn access_authorized_restricted_index() {
|
||||||
let dir = TempDir::new().unwrap();
|
let mut server = Server::new_auth().await;
|
||||||
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
|
|
||||||
let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
|
|
||||||
for ((method, route), actions) in AUTHORIZATIONS.iter() {
|
for ((method, route), actions) in AUTHORIZATIONS.iter() {
|
||||||
for action in actions {
|
for action in actions {
|
||||||
// create a new API key letting only the needed action.
|
// create a new API key letting only the needed action.
|
||||||
|
|||||||
@@ -5,11 +5,9 @@ pub mod service;
|
|||||||
|
|
||||||
use std::fmt::{self, Display};
|
use std::fmt::{self, Display};
|
||||||
|
|
||||||
#[allow(unused)]
|
|
||||||
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
|
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
|
||||||
use meili_snap::json_string;
|
use meili_snap::json_string;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
#[allow(unused)]
|
|
||||||
pub use server::{default_settings, Server};
|
pub use server::{default_settings, Server};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
|||||||
@@ -202,10 +202,6 @@ impl Server {
|
|||||||
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
|
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
|
||||||
self.service.patch("/experimental-features", value).await
|
self.service.patch("/experimental-features", value).await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_metrics(&self) -> (Value, StatusCode) {
|
|
||||||
self.service.get("/metrics").await
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||||
@@ -225,7 +221,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
|||||||
skip_index_budget: true,
|
skip_index_budget: true,
|
||||||
..Parser::parse_from(None as Option<&str>)
|
..Parser::parse_from(None as Option<&str>)
|
||||||
},
|
},
|
||||||
experimental_enable_metrics: false,
|
experimental_enable_metrics: true,
|
||||||
..Parser::parse_from(None as Option<&str>)
|
..Parser::parse_from(None as Option<&str>)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
use meilisearch::Opt;
|
use crate::common::Server;
|
||||||
use tempfile::TempDir;
|
|
||||||
|
|
||||||
use crate::common::{default_settings, Server};
|
|
||||||
use crate::json;
|
use crate::json;
|
||||||
|
|
||||||
/// Feature name to test against.
|
/// Feature name to test against.
|
||||||
@@ -19,9 +16,7 @@ async fn experimental_features() {
|
|||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": false,
|
"vectorStore": false
|
||||||
"metrics": false,
|
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@@ -31,9 +26,7 @@ async fn experimental_features() {
|
|||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": true,
|
"vectorStore": true
|
||||||
"metrics": false,
|
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@@ -43,9 +36,7 @@ async fn experimental_features() {
|
|||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": true,
|
"vectorStore": true
|
||||||
"metrics": false,
|
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@@ -56,9 +47,7 @@ async fn experimental_features() {
|
|||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": true,
|
"vectorStore": true
|
||||||
"metrics": false,
|
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@@ -69,73 +58,11 @@ async fn experimental_features() {
|
|||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": true,
|
"vectorStore": true
|
||||||
"metrics": false,
|
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn experimental_feature_metrics() {
|
|
||||||
// instance flag for metrics enables metrics at startup
|
|
||||||
let dir = TempDir::new().unwrap();
|
|
||||||
let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
|
|
||||||
let server = Server::new_with_options(enable_metrics).await.unwrap();
|
|
||||||
|
|
||||||
let (response, code) = server.get_features().await;
|
|
||||||
|
|
||||||
meili_snap::snapshot!(code, @"200 OK");
|
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
|
||||||
{
|
|
||||||
"scoreDetails": false,
|
|
||||||
"vectorStore": false,
|
|
||||||
"metrics": true,
|
|
||||||
"exportPuffinReports": false
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let (response, code) = server.get_metrics().await;
|
|
||||||
meili_snap::snapshot!(code, @"200 OK");
|
|
||||||
|
|
||||||
// metrics are not returned in json format
|
|
||||||
// so the test server will return null
|
|
||||||
meili_snap::snapshot!(response, @"null");
|
|
||||||
|
|
||||||
// disabling metrics results in invalid request
|
|
||||||
let (response, code) = server.set_features(json!({"metrics": false})).await;
|
|
||||||
meili_snap::snapshot!(code, @"200 OK");
|
|
||||||
meili_snap::snapshot!(response["metrics"], @"false");
|
|
||||||
|
|
||||||
let (response, code) = server.get_metrics().await;
|
|
||||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
|
||||||
{
|
|
||||||
"message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
|
|
||||||
"code": "feature_not_enabled",
|
|
||||||
"type": "invalid_request",
|
|
||||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
|
|
||||||
// enabling metrics via HTTP results in valid request
|
|
||||||
let (response, code) = server.set_features(json!({"metrics": true})).await;
|
|
||||||
meili_snap::snapshot!(code, @"200 OK");
|
|
||||||
meili_snap::snapshot!(response["metrics"], @"true");
|
|
||||||
|
|
||||||
let (response, code) = server.get_metrics().await;
|
|
||||||
meili_snap::snapshot!(code, @"200 OK");
|
|
||||||
meili_snap::snapshot!(response, @"null");
|
|
||||||
|
|
||||||
// startup without flag respects persisted metrics value
|
|
||||||
let disable_metrics =
|
|
||||||
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
|
|
||||||
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
|
|
||||||
let (response, code) = server_no_flag.get_metrics().await;
|
|
||||||
meili_snap::snapshot!(code, @"200 OK");
|
|
||||||
meili_snap::snapshot!(response, @"null");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn errors() {
|
async fn errors() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
@@ -146,7 +73,7 @@ async fn errors() {
|
|||||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
|
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
|
||||||
"code": "bad_request",
|
"code": "bad_request",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||||
|
|||||||
@@ -1,241 +0,0 @@
|
|||||||
use meili_snap::snapshot;
|
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
|
|
||||||
use crate::common::{Server, Value};
|
|
||||||
use crate::json;
|
|
||||||
|
|
||||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
|
||||||
json!([
|
|
||||||
{
|
|
||||||
"id": 1,
|
|
||||||
"description": "Leather Jacket",
|
|
||||||
"brand": "Lee Jeans",
|
|
||||||
"product_id": "123456",
|
|
||||||
"color": "Brown"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 2,
|
|
||||||
"description": "Leather Jacket",
|
|
||||||
"brand": "Lee Jeans",
|
|
||||||
"product_id": "123456",
|
|
||||||
"color": "Black"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 3,
|
|
||||||
"description": "Leather Jacket",
|
|
||||||
"brand": "Lee Jeans",
|
|
||||||
"product_id": "123456",
|
|
||||||
"color": "Blue"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 4,
|
|
||||||
"description": "T-Shirt",
|
|
||||||
"brand": "Nike",
|
|
||||||
"product_id": "789012",
|
|
||||||
"color": "Red"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 5,
|
|
||||||
"description": "T-Shirt",
|
|
||||||
"brand": "Nike",
|
|
||||||
"product_id": "789012",
|
|
||||||
"color": "Blue"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 6,
|
|
||||||
"description": "Running Shoes",
|
|
||||||
"brand": "Adidas",
|
|
||||||
"product_id": "456789",
|
|
||||||
"color": "Black"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 7,
|
|
||||||
"description": "Running Shoes",
|
|
||||||
"brand": "Adidas",
|
|
||||||
"product_id": "456789",
|
|
||||||
"color": "White"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 8,
|
|
||||||
"description": "Hoodie",
|
|
||||||
"brand": "Puma",
|
|
||||||
"product_id": "987654",
|
|
||||||
"color": "Gray"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 9,
|
|
||||||
"description": "Sweater",
|
|
||||||
"brand": "Gap",
|
|
||||||
"product_id": "234567",
|
|
||||||
"color": "Green"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 10,
|
|
||||||
"description": "Sweater",
|
|
||||||
"brand": "Gap",
|
|
||||||
"product_id": "234567",
|
|
||||||
"color": "Red"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 11,
|
|
||||||
"description": "Sweater",
|
|
||||||
"brand": "Gap",
|
|
||||||
"product_id": "234567",
|
|
||||||
"color": "Blue"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 12,
|
|
||||||
"description": "Jeans",
|
|
||||||
"brand": "Levi's",
|
|
||||||
"product_id": "345678",
|
|
||||||
"color": "Indigo"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 13,
|
|
||||||
"description": "Jeans",
|
|
||||||
"brand": "Levi's",
|
|
||||||
"product_id": "345678",
|
|
||||||
"color": "Black"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 14,
|
|
||||||
"description": "Jeans",
|
|
||||||
"brand": "Levi's",
|
|
||||||
"product_id": "345678",
|
|
||||||
"color": "Stone Wash"
|
|
||||||
}
|
|
||||||
])
|
|
||||||
});
|
|
||||||
|
|
||||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
|
|
||||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
|
|
||||||
|
|
||||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn distinct_search_with_offset_no_ranking() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = server.index("test");
|
|
||||||
|
|
||||||
let documents = DOCUMENTS.clone();
|
|
||||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
|
||||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
|
||||||
index.wait_task(1).await;
|
|
||||||
|
|
||||||
fn get_hits(response: &Value) -> Vec<&str> {
|
|
||||||
let hits_array = response["hits"].as_array().unwrap();
|
|
||||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
|
||||||
}
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"2");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
|
||||||
snapshot!(response["estimatedTotalHits"] , @"11");
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"2");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
|
||||||
snapshot!(response["estimatedTotalHits"], @"10");
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"2");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
|
||||||
snapshot!(response["estimatedTotalHits"], @"6");
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"1");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
|
|
||||||
snapshot!(response["estimatedTotalHits"], @"6");
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"0");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
|
||||||
snapshot!(response["estimatedTotalHits"], @"6");
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"0");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
|
||||||
snapshot!(response["estimatedTotalHits"], @"6");
|
|
||||||
}
|
|
||||||
|
|
||||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4130
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn distinct_search_with_pagination_no_ranking() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = server.index("test");
|
|
||||||
|
|
||||||
let documents = DOCUMENTS.clone();
|
|
||||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
|
||||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
|
||||||
index.wait_task(1).await;
|
|
||||||
|
|
||||||
fn get_hits(response: &Value) -> Vec<&str> {
|
|
||||||
let hits_array = response["hits"].as_array().unwrap();
|
|
||||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
|
||||||
}
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"0");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
|
||||||
snapshot!(response["page"], @"0");
|
|
||||||
snapshot!(response["totalPages"], @"3");
|
|
||||||
snapshot!(response["totalHits"], @"6");
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"2");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
|
||||||
snapshot!(response["page"], @"1");
|
|
||||||
snapshot!(response["totalPages"], @"3");
|
|
||||||
snapshot!(response["totalHits"], @"6");
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"2");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
|
||||||
snapshot!(response["page"], @"2");
|
|
||||||
snapshot!(response["totalPages"], @"3");
|
|
||||||
snapshot!(response["totalHits"], @"6");
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"2");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
|
||||||
snapshot!(response["page"], @"3");
|
|
||||||
snapshot!(response["totalPages"], @"3");
|
|
||||||
snapshot!(response["totalHits"], @"6");
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"0");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
|
||||||
snapshot!(response["page"], @"4");
|
|
||||||
snapshot!(response["totalPages"], @"3");
|
|
||||||
snapshot!(response["totalHits"], @"6");
|
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
|
|
||||||
let hits = get_hits(&response);
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(hits.len(), @"3");
|
|
||||||
snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
|
|
||||||
snapshot!(response["page"], @"2");
|
|
||||||
snapshot!(response["totalPages"], @"2");
|
|
||||||
snapshot!(response["totalHits"], @"6");
|
|
||||||
}
|
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
// This modules contains all the test concerning search. Each particular feature of the search
|
// This modules contains all the test concerning search. Each particular feature of the search
|
||||||
// should be tested in its own module to isolate tests and keep the tests readable.
|
// should be tested in its own module to isolate tests and keep the tests readable.
|
||||||
|
|
||||||
mod distinct;
|
|
||||||
mod errors;
|
mod errors;
|
||||||
mod facet_search;
|
mod facet_search;
|
||||||
mod formatted;
|
mod formatted;
|
||||||
@@ -817,7 +816,7 @@ async fn experimental_feature_score_details() {
|
|||||||
},
|
},
|
||||||
"proximity": {
|
"proximity": {
|
||||||
"order": 2,
|
"order": 2,
|
||||||
"score": 0.75
|
"score": 0.875
|
||||||
},
|
},
|
||||||
"attribute": {
|
"attribute": {
|
||||||
"order": 3,
|
"order": 3,
|
||||||
|
|||||||
@@ -335,35 +335,3 @@ async fn exactness_ranking_rule_order() {
|
|||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn search_on_exact_field() {
|
|
||||||
let server = Server::new().await;
|
|
||||||
let index = index_with_documents(
|
|
||||||
&server,
|
|
||||||
&json!([
|
|
||||||
{
|
|
||||||
"title": "Captain Marvel",
|
|
||||||
"exact": "Captain Marivel",
|
|
||||||
"id": "1",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Captain Marivel",
|
|
||||||
"exact": "Captain the Marvel",
|
|
||||||
"id": "2",
|
|
||||||
}]),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
let (response, code) =
|
|
||||||
index.update_settings_typo_tolerance(json!({ "disableOnAttributes": ["exact"] })).await;
|
|
||||||
assert_eq!(202, code, "{:?}", response);
|
|
||||||
index.wait_task(1).await;
|
|
||||||
// Searching on an exact attribute should only return the document matching without typo.
|
|
||||||
index
|
|
||||||
.search(json!({"q": "Marvel", "attributesToSearchOn": ["exact"]}), |response, code| {
|
|
||||||
snapshot!(code, @"200 OK");
|
|
||||||
snapshot!(response["hits"].as_array().unwrap().len(), @"1");
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,19 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "meilitool"
|
|
||||||
description = "A CLI to edit a Meilisearch database from the command line"
|
|
||||||
version.workspace = true
|
|
||||||
authors.workspace = true
|
|
||||||
homepage.workspace = true
|
|
||||||
readme.workspace = true
|
|
||||||
edition.workspace = true
|
|
||||||
license.workspace = true
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
anyhow = "1.0.75"
|
|
||||||
clap = { version = "4.2.1", features = ["derive"] }
|
|
||||||
dump = { path = "../dump" }
|
|
||||||
file-store = { path = "../file-store" }
|
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
|
||||||
time = { version = "0.3.30", features = ["formatting"] }
|
|
||||||
uuid = { version = "1.5.0", features = ["v4"], default-features = false }
|
|
||||||
@@ -1,312 +0,0 @@
|
|||||||
use std::fs::{read_dir, read_to_string, remove_file, File};
|
|
||||||
use std::io::BufWriter;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use clap::{Parser, Subcommand};
|
|
||||||
use dump::{DumpWriter, IndexMetadata};
|
|
||||||
use file_store::FileStore;
|
|
||||||
use meilisearch_auth::AuthController;
|
|
||||||
use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str};
|
|
||||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn};
|
|
||||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
|
||||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
|
||||||
use meilisearch_types::tasks::{Status, Task};
|
|
||||||
use meilisearch_types::versioning::check_version_file;
|
|
||||||
use meilisearch_types::Index;
|
|
||||||
use time::macros::format_description;
|
|
||||||
use time::OffsetDateTime;
|
|
||||||
use uuid_codec::UuidCodec;
|
|
||||||
|
|
||||||
mod uuid_codec;
|
|
||||||
|
|
||||||
#[derive(Parser)]
|
|
||||||
#[command(author, version, about, long_about = None)]
|
|
||||||
struct Cli {
|
|
||||||
/// The database path where the Meilisearch is running.
|
|
||||||
#[arg(long, default_value = "data.ms/")]
|
|
||||||
db_path: PathBuf,
|
|
||||||
|
|
||||||
#[command(subcommand)]
|
|
||||||
command: Command,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Subcommand)]
|
|
||||||
enum Command {
|
|
||||||
/// Clears the task queue and make it empty.
|
|
||||||
///
|
|
||||||
/// This command can be safely executed even if Meilisearch is running and processing tasks.
|
|
||||||
/// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
|
|
||||||
/// even the ones that were processing. However, it's highly possible that you see the processing
|
|
||||||
/// tasks in the queue again with an associated internal error message.
|
|
||||||
ClearTaskQueue,
|
|
||||||
|
|
||||||
/// Exports a dump from the Meilisearch database.
|
|
||||||
///
|
|
||||||
/// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
|
|
||||||
/// If tasks are being processed while a dump is being exported there are chances for the dump to be
|
|
||||||
/// malformed with missing tasks.
|
|
||||||
///
|
|
||||||
/// TODO Verify this claim or make sure it cannot happen and we can export dumps
|
|
||||||
/// without caring about killing Meilisearch first!
|
|
||||||
ExportADump {
|
|
||||||
/// The directory in which the dump will be created.
|
|
||||||
#[arg(long, default_value = "dumps/")]
|
|
||||||
dump_dir: PathBuf,
|
|
||||||
|
|
||||||
/// Skip dumping the enqueued or processing tasks.
|
|
||||||
///
|
|
||||||
/// Can be useful when there are a lot of them and it is not particularly useful
|
|
||||||
/// to keep them. Note that only the enqueued tasks takes up space so skipping
|
|
||||||
/// the processed ones is not particularly interesting.
|
|
||||||
#[arg(long)]
|
|
||||||
skip_enqueued_tasks: bool,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() -> anyhow::Result<()> {
|
|
||||||
let Cli { db_path, command } = Cli::parse();
|
|
||||||
|
|
||||||
check_version_file(&db_path).context("While checking the version file")?;
|
|
||||||
|
|
||||||
match command {
|
|
||||||
Command::ClearTaskQueue => clear_task_queue(db_path),
|
|
||||||
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
|
||||||
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Clears the task queue located at `db_path`.
|
|
||||||
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
|
||||||
let path = db_path.join("tasks");
|
|
||||||
let env = EnvOpenOptions::new()
|
|
||||||
.max_dbs(100)
|
|
||||||
.open(&path)
|
|
||||||
.with_context(|| format!("While trying to open {:?}", path.display()))?;
|
|
||||||
|
|
||||||
eprintln!("Deleting tasks from the database...");
|
|
||||||
|
|
||||||
let mut wtxn = env.write_txn()?;
|
|
||||||
let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
|
|
||||||
let total = all_tasks.len(&wtxn)?;
|
|
||||||
let status = try_opening_poly_database(&env, &wtxn, "status")?;
|
|
||||||
let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
|
|
||||||
let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
|
|
||||||
let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
|
|
||||||
let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
|
|
||||||
let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
|
|
||||||
let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
|
|
||||||
|
|
||||||
try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
|
|
||||||
try_clearing_poly_database(&mut wtxn, status, "status")?;
|
|
||||||
try_clearing_poly_database(&mut wtxn, kind, "kind")?;
|
|
||||||
try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
|
|
||||||
try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
|
|
||||||
try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
|
|
||||||
try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
|
|
||||||
try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
|
|
||||||
|
|
||||||
wtxn.commit().context("While committing the transaction")?;
|
|
||||||
|
|
||||||
eprintln!("Successfully deleted {total} tasks from the tasks database!");
|
|
||||||
eprintln!("Deleting the content files from disk...");
|
|
||||||
|
|
||||||
let mut count = 0usize;
|
|
||||||
let update_files = db_path.join("update_files");
|
|
||||||
let entries = read_dir(&update_files).with_context(|| {
|
|
||||||
format!("While trying to read the content of {:?}", update_files.display())
|
|
||||||
})?;
|
|
||||||
for result in entries {
|
|
||||||
match result {
|
|
||||||
Ok(ent) => match remove_file(ent.path()) {
|
|
||||||
Ok(_) => count += 1,
|
|
||||||
Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
|
|
||||||
},
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
eprintln!("Sucessfully deleted {count} content files from disk!");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn try_opening_database<KC: 'static, DC: 'static>(
|
|
||||||
env: &Env,
|
|
||||||
rtxn: &RoTxn,
|
|
||||||
db_name: &str,
|
|
||||||
) -> anyhow::Result<Database<KC, DC>> {
|
|
||||||
env.open_database(rtxn, Some(db_name))
|
|
||||||
.with_context(|| format!("While opening the {db_name:?} database"))?
|
|
||||||
.with_context(|| format!("Missing the {db_name:?} database"))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn try_opening_poly_database(
|
|
||||||
env: &Env,
|
|
||||||
rtxn: &RoTxn,
|
|
||||||
db_name: &str,
|
|
||||||
) -> anyhow::Result<PolyDatabase> {
|
|
||||||
env.open_poly_database(rtxn, Some(db_name))
|
|
||||||
.with_context(|| format!("While opening the {db_name:?} poly database"))?
|
|
||||||
.with_context(|| format!("Missing the {db_name:?} poly database"))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn try_clearing_poly_database(
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
database: PolyDatabase,
|
|
||||||
db_name: &str,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Exports a dump into the dump directory.
|
|
||||||
fn export_a_dump(
|
|
||||||
db_path: PathBuf,
|
|
||||||
dump_dir: PathBuf,
|
|
||||||
skip_enqueued_tasks: bool,
|
|
||||||
) -> Result<(), anyhow::Error> {
|
|
||||||
let started_at = OffsetDateTime::now_utc();
|
|
||||||
|
|
||||||
// 1. Extracts the instance UID from disk
|
|
||||||
let instance_uid_path = db_path.join("instance-uid");
|
|
||||||
let instance_uid = match read_to_string(&instance_uid_path) {
|
|
||||||
Ok(content) => match content.trim().parse() {
|
|
||||||
Ok(uuid) => Some(uuid),
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("Impossible to parse instance-uid: {e}");
|
|
||||||
None
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
|
|
||||||
let file_store =
|
|
||||||
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
|
|
||||||
|
|
||||||
let index_scheduler_path = db_path.join("tasks");
|
|
||||||
let env = EnvOpenOptions::new()
|
|
||||||
.max_dbs(100)
|
|
||||||
.open(&index_scheduler_path)
|
|
||||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
|
||||||
|
|
||||||
eprintln!("Dumping the keys...");
|
|
||||||
|
|
||||||
// 2. dump the keys
|
|
||||||
let auth_store = AuthController::new(&db_path, &None)
|
|
||||||
.with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
|
|
||||||
let mut dump_keys = dump.create_keys()?;
|
|
||||||
let mut count = 0;
|
|
||||||
for key in auth_store.list_keys()? {
|
|
||||||
dump_keys.push_key(&key)?;
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
dump_keys.flush()?;
|
|
||||||
|
|
||||||
eprintln!("Successfully dumped {count} keys!");
|
|
||||||
|
|
||||||
let rtxn = env.read_txn()?;
|
|
||||||
let all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>> =
|
|
||||||
try_opening_database(&env, &rtxn, "all-tasks")?;
|
|
||||||
let index_mapping: Database<Str, UuidCodec> =
|
|
||||||
try_opening_database(&env, &rtxn, "index-mapping")?;
|
|
||||||
|
|
||||||
if skip_enqueued_tasks {
|
|
||||||
eprintln!("Skip dumping the enqueued tasks...");
|
|
||||||
} else {
|
|
||||||
eprintln!("Dumping the enqueued tasks...");
|
|
||||||
|
|
||||||
// 3. dump the tasks
|
|
||||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
|
||||||
let mut count = 0;
|
|
||||||
for ret in all_tasks.iter(&rtxn)? {
|
|
||||||
let (_, t) = ret?;
|
|
||||||
let status = t.status;
|
|
||||||
let content_file = t.content_uuid();
|
|
||||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
|
||||||
|
|
||||||
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
|
||||||
if let Some(content_file_uuid) = content_file {
|
|
||||||
if status == Status::Enqueued {
|
|
||||||
let content_file = file_store.get_update(content_file_uuid)?;
|
|
||||||
|
|
||||||
let reader =
|
|
||||||
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
|
||||||
format!("While reading content file {:?}", content_file_uuid)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
|
|
||||||
while let Some(doc) = cursor.next_document().with_context(|| {
|
|
||||||
format!("While iterating on content file {:?}", content_file_uuid)
|
|
||||||
})? {
|
|
||||||
dump_content_file
|
|
||||||
.push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
|
|
||||||
}
|
|
||||||
dump_content_file.flush()?;
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dump_tasks.flush()?;
|
|
||||||
|
|
||||||
eprintln!("Successfully dumped {count} enqueued tasks!");
|
|
||||||
}
|
|
||||||
|
|
||||||
eprintln!("Dumping the indexes...");
|
|
||||||
|
|
||||||
// 4. Dump the indexes
|
|
||||||
let mut count = 0;
|
|
||||||
for result in index_mapping.iter(&rtxn)? {
|
|
||||||
let (uid, uuid) = result?;
|
|
||||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
|
||||||
let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
|
|
||||||
format!("While trying to open the index at path {:?}", index_path.display())
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let rtxn = index.read_txn()?;
|
|
||||||
let metadata = IndexMetadata {
|
|
||||||
uid: uid.to_owned(),
|
|
||||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
|
||||||
created_at: index.created_at(&rtxn)?,
|
|
||||||
updated_at: index.updated_at(&rtxn)?,
|
|
||||||
};
|
|
||||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
|
||||||
|
|
||||||
// 4.1. Dump the documents
|
|
||||||
for ret in index.all_documents(&rtxn)? {
|
|
||||||
let (_id, doc) = ret?;
|
|
||||||
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
|
||||||
index_dumper.push_document(&document)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4.2. Dump the settings
|
|
||||||
let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
|
|
||||||
index_dumper.settings(&settings)?;
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
eprintln!("Successfully dumped {count} indexes!");
|
|
||||||
// We will not dump experimental feature settings
|
|
||||||
eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
|
|
||||||
|
|
||||||
let dump_uid = started_at.format(format_description!(
|
|
||||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
|
||||||
)).unwrap();
|
|
||||||
|
|
||||||
let path = dump_dir.join(format!("{}.dump", dump_uid));
|
|
||||||
let file = File::create(&path)?;
|
|
||||||
dump.persist_to(BufWriter::new(file))?;
|
|
||||||
|
|
||||||
eprintln!("Dump exported at path {:?}", path.display());
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::convert::TryInto;
|
|
||||||
|
|
||||||
use meilisearch_types::heed::{BytesDecode, BytesEncode};
|
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
/// A heed codec for value of struct Uuid.
|
|
||||||
pub struct UuidCodec;
|
|
||||||
|
|
||||||
impl<'a> BytesDecode<'a> for UuidCodec {
|
|
||||||
type DItem = Uuid;
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
bytes.try_into().ok().map(Uuid::from_bytes)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BytesEncode<'_> for UuidCodec {
|
|
||||||
type EItem = Uuid;
|
|
||||||
|
|
||||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
|
||||||
Some(Cow::Borrowed(item.as_bytes()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -17,7 +17,7 @@ bincode = "1.3.3"
|
|||||||
bstr = "1.4.0"
|
bstr = "1.4.0"
|
||||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||||
byteorder = "1.4.3"
|
byteorder = "1.4.3"
|
||||||
charabia = { version = "0.8.5", default-features = false }
|
charabia = { version = "0.8.3", default-features = false }
|
||||||
concat-arrays = "0.1.2"
|
concat-arrays = "0.1.2"
|
||||||
crossbeam-channel = "0.5.8"
|
crossbeam-channel = "0.5.8"
|
||||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||||
@@ -42,7 +42,7 @@ once_cell = "1.17.1"
|
|||||||
ordered-float = "3.6.0"
|
ordered-float = "3.6.0"
|
||||||
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
||||||
rayon = "1.7.0"
|
rayon = "1.7.0"
|
||||||
roaring = "0.10.1"
|
roaring = { path = "../../roaring-rs" }
|
||||||
rstar = { version = "0.11.0", features = ["serde"] }
|
rstar = { version = "0.11.0", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
@@ -82,7 +82,7 @@ md5 = "0.7.0"
|
|||||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
|
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
|
||||||
|
|
||||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||||
# For more information on this feature, see heed's Cargo.toml
|
# For more information on this feature, see heed's Cargo.toml
|
||||||
@@ -106,6 +106,3 @@ thai = ["charabia/thai"]
|
|||||||
|
|
||||||
# allow greek specialized tokenization
|
# allow greek specialized tokenization
|
||||||
greek = ["charabia/greek"]
|
greek = ["charabia/greek"]
|
||||||
|
|
||||||
# allow khmer specialized tokenization
|
|
||||||
khmer = ["charabia/khmer"]
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
|
||||||
use std::{io, str};
|
use std::{io, str};
|
||||||
|
|
||||||
use obkv::KvReader;
|
use obkv::KvReader;
|
||||||
@@ -20,14 +19,14 @@ use crate::FieldId;
|
|||||||
pub struct EnrichedDocumentsBatchReader<R> {
|
pub struct EnrichedDocumentsBatchReader<R> {
|
||||||
documents: DocumentsBatchReader<R>,
|
documents: DocumentsBatchReader<R>,
|
||||||
primary_key: String,
|
primary_key: String,
|
||||||
external_ids: grenad::ReaderCursor<BufReader<File>>,
|
external_ids: grenad::ReaderCursor<File>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
|
impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
documents: DocumentsBatchReader<R>,
|
documents: DocumentsBatchReader<R>,
|
||||||
primary_key: String,
|
primary_key: String,
|
||||||
external_ids: grenad::Reader<BufReader<File>>,
|
external_ids: grenad::Reader<File>,
|
||||||
) -> Result<Self, Error> {
|
) -> Result<Self, Error> {
|
||||||
if documents.documents_count() as u64 == external_ids.len() {
|
if documents.documents_count() as u64 == external_ids.len() {
|
||||||
Ok(EnrichedDocumentsBatchReader {
|
Ok(EnrichedDocumentsBatchReader {
|
||||||
@@ -76,7 +75,7 @@ pub struct EnrichedDocument<'a> {
|
|||||||
pub struct EnrichedDocumentsBatchCursor<R> {
|
pub struct EnrichedDocumentsBatchCursor<R> {
|
||||||
documents: DocumentsBatchCursor<R>,
|
documents: DocumentsBatchCursor<R>,
|
||||||
primary_key: String,
|
primary_key: String,
|
||||||
external_ids: grenad::ReaderCursor<BufReader<File>>,
|
external_ids: grenad::ReaderCursor<File>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R> EnrichedDocumentsBatchCursor<R> {
|
impl<R> EnrichedDocumentsBatchCursor<R> {
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::convert::TryInto;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
|
||||||
@@ -56,22 +57,30 @@ impl CboRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Merge serialized CboRoaringBitmaps in a buffer.
|
/// Merge serialized CboRoaringBitmaps in a buffer.
|
||||||
|
/// The buffer must be empty before calling the function.
|
||||||
///
|
///
|
||||||
/// if the merged values length is under the threshold, values are directly
|
/// if the merged values length is under the threshold, values are directly
|
||||||
/// serialized in the buffer else a RoaringBitmap is created from the
|
/// serialized in the buffer else a RoaringBitmap is created from the
|
||||||
/// values and is serialized in the buffer.
|
/// values and is serialized in the buffer.
|
||||||
pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
|
pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
|
||||||
|
debug_assert!(buffer.is_empty());
|
||||||
|
|
||||||
let mut roaring = RoaringBitmap::new();
|
let mut roaring = RoaringBitmap::new();
|
||||||
let mut vec = Vec::new();
|
let mut vec = Vec::new();
|
||||||
|
|
||||||
for bytes in slices {
|
for bytes in slices {
|
||||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||||
let mut reader = bytes.as_ref();
|
debug_assert!(bytes.len() % size_of::<u32>() == 0);
|
||||||
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
vec.reserve(bytes.len() / size_of::<u32>());
|
||||||
vec.push(integer);
|
|
||||||
|
for bytes in bytes.chunks_exact(size_of::<u32>()) {
|
||||||
|
// unwrap can't happens since we ensured that everything
|
||||||
|
// was a multiple of size_of<u32>.
|
||||||
|
let v = u32::from_ne_bytes(bytes.try_into().unwrap());
|
||||||
|
vec.push(v);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?;
|
roaring.union_with_serialized_unchecked(bytes.as_ref())?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,7 +94,7 @@ impl CboRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// We can unwrap safely because the vector is sorted upper.
|
// We can unwrap safely because the vector is sorted upper.
|
||||||
let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter()).unwrap();
|
let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
|
||||||
roaring.serialize_into(buffer)?;
|
roaring.serialize_into(buffer)?;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -186,8 +195,11 @@ mod tests {
|
|||||||
|
|
||||||
let medium_data: Vec<_> =
|
let medium_data: Vec<_> =
|
||||||
medium_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
medium_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
|
||||||
buffer.clear();
|
// TODO: used for profiling purpose, get rids of it once the function is optimized
|
||||||
CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
|
for _ in 0..100000 {
|
||||||
|
buffer.clear();
|
||||||
|
CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
|
||||||
let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
|
let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||||
#![allow(clippy::type_complexity)]
|
#![allow(clippy::type_complexity)]
|
||||||
|
#![feature(test)]
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::cmp;
|
|||||||
|
|
||||||
use crate::{relative_from_absolute_position, Position};
|
use crate::{relative_from_absolute_position, Position};
|
||||||
|
|
||||||
pub const MAX_DISTANCE: u32 = 4;
|
pub const MAX_DISTANCE: u32 = 8;
|
||||||
|
|
||||||
pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
||||||
if lhs <= rhs {
|
if lhs <= rhs {
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
|
|||||||
use std::ops::Bound::{self, Excluded, Included};
|
use std::ops::Bound::{self, Excluded, Included};
|
||||||
|
|
||||||
use either::Either;
|
use either::Either;
|
||||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
|
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ use once_cell::sync::Lazy;
|
|||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
|
|
||||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
||||||
use self::new::PartialSearchResult;
|
use self::new::PartialSearchResult;
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||||
|
|||||||
@@ -53,22 +53,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
if excluded.contains(docid) {
|
if excluded.contains(docid) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
||||||
results.push(docid);
|
results.push(docid);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut all_candidates = universe - excluded;
|
let mut all_candidates = universe - excluded;
|
||||||
all_candidates.extend(results.iter().copied());
|
all_candidates.extend(results.iter().copied());
|
||||||
// drain the results of the skipped elements
|
|
||||||
// this **must** be done **after** writing the entire results in `all_candidates` to ensure
|
|
||||||
// e.g. estimatedTotalHits is correct.
|
|
||||||
if results.len() >= from {
|
|
||||||
results.drain(..from);
|
|
||||||
} else {
|
|
||||||
results.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
return Ok(BucketSortOutput {
|
return Ok(BucketSortOutput {
|
||||||
scores: vec![Default::default(); results.len()],
|
scores: vec![Default::default(); results.len()],
|
||||||
docids: results,
|
docids: results,
|
||||||
|
|||||||
@@ -157,8 +157,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
match &self.restricted_fids {
|
match &self.restricted_fids {
|
||||||
Some(restricted_fids) => {
|
Some(restricted_fids) => {
|
||||||
let interned = self.word_interner.get(word).as_str();
|
let interned = self.word_interner.get(word).as_str();
|
||||||
let keys: Vec<_> =
|
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
|
||||||
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
|
|
||||||
|
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
@@ -183,29 +182,13 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
word: Interned<String>,
|
word: Interned<String>,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
match &self.restricted_fids {
|
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||||
Some(restricted_fids) => {
|
self.txn,
|
||||||
let interned = self.word_interner.get(word).as_str();
|
word,
|
||||||
let keys: Vec<_> =
|
self.word_interner.get(word).as_str(),
|
||||||
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
|
&mut self.db_cache.exact_word_docids,
|
||||||
|
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
)
|
||||||
self.txn,
|
|
||||||
word,
|
|
||||||
&keys[..],
|
|
||||||
&mut self.db_cache.exact_word_docids,
|
|
||||||
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
|
|
||||||
merge_cbo_roaring_bitmaps,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
|
||||||
self.txn,
|
|
||||||
word,
|
|
||||||
self.word_interner.get(word).as_str(),
|
|
||||||
&mut self.db_cache.exact_word_docids,
|
|
||||||
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
|
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
|
||||||
@@ -236,8 +219,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
match &self.restricted_fids {
|
match &self.restricted_fids {
|
||||||
Some(restricted_fids) => {
|
Some(restricted_fids) => {
|
||||||
let interned = self.word_interner.get(prefix).as_str();
|
let interned = self.word_interner.get(prefix).as_str();
|
||||||
let keys: Vec<_> =
|
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
|
||||||
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
|
|
||||||
|
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||||
self.txn,
|
self.txn,
|
||||||
@@ -262,29 +244,13 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
prefix: Interned<String>,
|
prefix: Interned<String>,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
match &self.restricted_fids {
|
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||||
Some(restricted_fids) => {
|
self.txn,
|
||||||
let interned = self.word_interner.get(prefix).as_str();
|
prefix,
|
||||||
let keys: Vec<_> =
|
self.word_interner.get(prefix).as_str(),
|
||||||
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
|
&mut self.db_cache.exact_word_prefix_docids,
|
||||||
|
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
|
||||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
)
|
||||||
self.txn,
|
|
||||||
prefix,
|
|
||||||
&keys[..],
|
|
||||||
&mut self.db_cache.exact_word_prefix_docids,
|
|
||||||
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
|
||||||
merge_cbo_roaring_bitmaps,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
|
||||||
self.txn,
|
|
||||||
prefix,
|
|
||||||
self.word_interner.get(prefix).as_str(),
|
|
||||||
&mut self.db_cache.exact_word_prefix_docids,
|
|
||||||
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_pair_proximity_docids(
|
pub fn get_db_word_pair_proximity_docids(
|
||||||
|
|||||||
@@ -51,8 +51,7 @@ use crate::error::FieldIdMapMissingEntry;
|
|||||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
use crate::search::new::distinct::apply_distinct_rule;
|
use crate::search::new::distinct::apply_distinct_rule;
|
||||||
use crate::{
|
use crate::{
|
||||||
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError,
|
AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32,
|
||||||
BEU32,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A structure used throughout the execution of a search query.
|
/// A structure used throughout the execution of a search query.
|
||||||
@@ -64,7 +63,7 @@ pub struct SearchContext<'ctx> {
|
|||||||
pub phrase_interner: DedupInterner<Phrase>,
|
pub phrase_interner: DedupInterner<Phrase>,
|
||||||
pub term_interner: Interner<QueryTerm>,
|
pub term_interner: Interner<QueryTerm>,
|
||||||
pub phrase_docids: PhraseDocIdsCache,
|
pub phrase_docids: PhraseDocIdsCache,
|
||||||
pub restricted_fids: Option<RestrictedFids>,
|
pub restricted_fids: Option<Vec<u16>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'ctx> SearchContext<'ctx> {
|
impl<'ctx> SearchContext<'ctx> {
|
||||||
@@ -84,9 +83,8 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
|
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
|
||||||
let fids_map = self.index.fields_ids_map(self.txn)?;
|
let fids_map = self.index.fields_ids_map(self.txn)?;
|
||||||
let searchable_names = self.index.searchable_fields(self.txn)?;
|
let searchable_names = self.index.searchable_fields(self.txn)?;
|
||||||
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
|
|
||||||
|
|
||||||
let mut restricted_fids = RestrictedFids::default();
|
let mut restricted_fids = Vec::new();
|
||||||
let mut contains_wildcard = false;
|
let mut contains_wildcard = false;
|
||||||
for field_name in searchable_attributes {
|
for field_name in searchable_attributes {
|
||||||
if field_name == "*" {
|
if field_name == "*" {
|
||||||
@@ -125,11 +123,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if exact_attributes_ids.contains(&fid) {
|
restricted_fids.push(fid);
|
||||||
restricted_fids.exact.push(fid);
|
|
||||||
} else {
|
|
||||||
restricted_fids.tolerant.push(fid);
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
|
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
|
||||||
@@ -153,18 +147,6 @@ impl Word {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default)]
|
|
||||||
pub struct RestrictedFids {
|
|
||||||
pub tolerant: Vec<FieldId>,
|
|
||||||
pub exact: Vec<FieldId>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl RestrictedFids {
|
|
||||||
pub fn contains(&self, fid: &FieldId) -> bool {
|
|
||||||
self.tolerant.contains(fid) || self.exact.contains(fid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
|
/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
|
||||||
fn resolve_maximally_reduced_query_graph(
|
fn resolve_maximally_reduced_query_graph(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
@@ -452,18 +434,7 @@ pub fn execute_search(
|
|||||||
let mut search = Search::default();
|
let mut search = Search::default();
|
||||||
let docids = match ctx.index.vector_hnsw(ctx.txn)? {
|
let docids = match ctx.index.vector_hnsw(ctx.txn)? {
|
||||||
Some(hnsw) => {
|
Some(hnsw) => {
|
||||||
if let Some(expected_size) = hnsw.iter().map(|(_, point)| point.len()).next() {
|
|
||||||
if vector.len() != expected_size {
|
|
||||||
return Err(UserError::InvalidVectorDimensions {
|
|
||||||
expected: expected_size,
|
|
||||||
found: vector.len(),
|
|
||||||
}
|
|
||||||
.into());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let vector = NDotProductPoint::new(vector.clone());
|
let vector = NDotProductPoint::new(vector.clone());
|
||||||
|
|
||||||
let neighbors = hnsw.search(&vector, &mut search);
|
let neighbors = hnsw.search(&vector, &mut search);
|
||||||
|
|
||||||
let mut docids = Vec::new();
|
let mut docids = Vec::new();
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ use std::hash::Hash;
|
|||||||
pub use cheapest_paths::PathVisitor;
|
pub use cheapest_paths::PathVisitor;
|
||||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||||
pub use dead_ends_cache::DeadEndsCache;
|
pub use dead_ends_cache::DeadEndsCache;
|
||||||
pub use exactness::ExactnessGraph;
|
pub use exactness::{ExactnessCondition, ExactnessGraph};
|
||||||
pub use fid::{FidCondition, FidGraph};
|
pub use fid::{FidCondition, FidGraph};
|
||||||
pub use position::{PositionCondition, PositionGraph};
|
pub use position::{PositionCondition, PositionGraph};
|
||||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
#![allow(clippy::too_many_arguments)]
|
#![allow(clippy::too_many_arguments)]
|
||||||
|
|
||||||
use super::ProximityCondition;
|
use super::ProximityCondition;
|
||||||
use crate::proximity::MAX_DISTANCE;
|
|
||||||
use crate::search::new::interner::{DedupInterner, Interned};
|
use crate::search::new::interner::{DedupInterner, Interned};
|
||||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
use crate::search::new::SearchContext;
|
use crate::search::new::SearchContext;
|
||||||
@@ -36,7 +35,7 @@ pub fn build_edges(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut conditions = vec![];
|
let mut conditions = vec![];
|
||||||
for cost in right_ngram_max..(((MAX_DISTANCE as usize) - 1) + right_ngram_max) {
|
for cost in right_ngram_max..(7 + right_ngram_max) {
|
||||||
conditions.push((
|
conditions.push((
|
||||||
cost as u32,
|
cost as u32,
|
||||||
conditions_interner.insert(ProximityCondition::Uninit {
|
conditions_interner.insert(ProximityCondition::Uninit {
|
||||||
@@ -48,7 +47,7 @@ pub fn build_edges(
|
|||||||
}
|
}
|
||||||
|
|
||||||
conditions.push((
|
conditions.push((
|
||||||
((MAX_DISTANCE - 1) + (right_ngram_max as u32)),
|
(7 + right_ngram_max) as u32,
|
||||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||||
));
|
));
|
||||||
|
|
||||||
|
|||||||
@@ -273,7 +273,7 @@ fn test_proximity_simple() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
s.query("the quick brown fox jumps over the lazy dog");
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 2, 3, 5, 1, 0]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 5, 2, 3, 0, 1]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
@@ -282,11 +282,11 @@ fn test_proximity_simple() {
|
|||||||
"\"the quickbrown fox jumps over the lazy dog\"",
|
"\"the quickbrown fox jumps over the lazy dog\"",
|
||||||
"\"the really quick brown fox jumps over the lazy dog\"",
|
"\"the really quick brown fox jumps over the lazy dog\"",
|
||||||
"\"the really quick brown fox jumps over the very lazy dog\"",
|
"\"the really quick brown fox jumps over the very lazy dog\"",
|
||||||
|
"\"brown quick fox jumps over the lazy dog\"",
|
||||||
"\"the quick brown fox jumps over the lazy. dog\"",
|
"\"the quick brown fox jumps over the lazy. dog\"",
|
||||||
"\"dog the quick brown fox jumps over the lazy\"",
|
"\"dog the quick brown fox jumps over the lazy\"",
|
||||||
"\"brown quick fox jumps over the lazy dog\"",
|
|
||||||
"\"the. quick brown fox jumps over the lazy. dog\"",
|
|
||||||
"\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
|
"\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
|
||||||
|
"\"the. quick brown fox jumps over the lazy. dog\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
|
|||||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||||
s.query("best s");
|
s.query("best s");
|
||||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]");
|
||||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
|
||||||
@@ -382,9 +382,9 @@ fn test_proximity_prefix_db() {
|
|||||||
"\"summer best\"",
|
"\"summer best\"",
|
||||||
"\"this is the best meal of summer\"",
|
"\"this is the best meal of summer\"",
|
||||||
"\"summer x best\"",
|
"\"summer x best\"",
|
||||||
|
"\"this is the best meal of the summer\"",
|
||||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||||
"\"this is the best cooked meal of the summer\"",
|
"\"this is the best cooked meal of the summer\"",
|
||||||
"\"this is the best meal of the summer\"",
|
|
||||||
"\"summer x y best\"",
|
"\"summer x y best\"",
|
||||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||||
]
|
]
|
||||||
@@ -396,7 +396,7 @@ fn test_proximity_prefix_db() {
|
|||||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||||
s.query("best su");
|
s.query("best su");
|
||||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]");
|
||||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
|
||||||
@@ -406,10 +406,10 @@ fn test_proximity_prefix_db() {
|
|||||||
"\"summer best\"",
|
"\"summer best\"",
|
||||||
"\"this is the best meal of summer\"",
|
"\"this is the best meal of summer\"",
|
||||||
"\"summer x best\"",
|
"\"summer x best\"",
|
||||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
|
||||||
"\"this is the best cooked meal of the summer\"",
|
|
||||||
"\"this is the best meal of the summer\"",
|
"\"this is the best meal of the summer\"",
|
||||||
"\"summer x y best\"",
|
"\"summer x y best\"",
|
||||||
|
"\"this is the best cooked meal of the summer\"",
|
||||||
|
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
@@ -447,7 +447,7 @@ fn test_proximity_prefix_db() {
|
|||||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||||
s.query("best wint");
|
s.query("best wint");
|
||||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]");
|
||||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
|
||||||
@@ -457,10 +457,10 @@ fn test_proximity_prefix_db() {
|
|||||||
"\"winter best\"",
|
"\"winter best\"",
|
||||||
"\"this is the best meal of winter\"",
|
"\"this is the best meal of winter\"",
|
||||||
"\"winter x best\"",
|
"\"winter x best\"",
|
||||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
|
||||||
"\"this is the best cooked meal of the winter\"",
|
|
||||||
"\"this is the best meal of the winter\"",
|
"\"this is the best meal of the winter\"",
|
||||||
"\"winter x y best\"",
|
"\"winter x y best\"",
|
||||||
|
"\"this is the best cooked meal of the winter\"",
|
||||||
|
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@@ -471,7 +471,7 @@ fn test_proximity_prefix_db() {
|
|||||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||||
s.query("best wi");
|
s.query("best wi");
|
||||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]");
|
||||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
|
||||||
@@ -481,9 +481,9 @@ fn test_proximity_prefix_db() {
|
|||||||
"\"winter best\"",
|
"\"winter best\"",
|
||||||
"\"this is the best meal of winter\"",
|
"\"this is the best meal of winter\"",
|
||||||
"\"winter x best\"",
|
"\"winter x best\"",
|
||||||
|
"\"this is the best meal of the winter\"",
|
||||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||||
"\"this is the best cooked meal of the winter\"",
|
"\"this is the best cooked meal of the winter\"",
|
||||||
"\"this is the best meal of the winter\"",
|
|
||||||
"\"winter x y best\"",
|
"\"winter x y best\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
|
|||||||
@@ -68,8 +68,8 @@ fn test_trap_basic() {
|
|||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
Typo(
|
Typo(
|
||||||
@@ -82,8 +82,8 @@ fn test_trap_basic() {
|
|||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
Typo(
|
Typo(
|
||||||
|
|||||||
@@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 9,
|
rank: 35,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 9,
|
rank: 35,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 9,
|
rank: 35,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -23,8 +23,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 25,
|
rank: 57,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -49,8 +49,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 24,
|
rank: 56,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -75,8 +75,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 9,
|
rank: 35,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -101,8 +101,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 10,
|
rank: 22,
|
||||||
max_rank: 10,
|
max_rank: 22,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 10,
|
rank: 22,
|
||||||
max_rank: 10,
|
max_rank: 22,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -153,8 +153,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 10,
|
rank: 22,
|
||||||
max_rank: 10,
|
max_rank: 22,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -179,8 +179,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 9,
|
rank: 21,
|
||||||
max_rank: 10,
|
max_rank: 22,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -205,8 +205,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 5,
|
rank: 17,
|
||||||
max_rank: 10,
|
max_rank: 22,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -231,8 +231,8 @@ expression: "format!(\"{document_ids_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 5,
|
rank: 17,
|
||||||
max_rank: 10,
|
max_rank: 22,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -3,35 +3,59 @@ source: milli/src/search/new/tests/proximity.rs
|
|||||||
expression: "format!(\"{document_scores:#?}\")"
|
expression: "format!(\"{document_scores:#?}\")"
|
||||||
---
|
---
|
||||||
[
|
[
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 8,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 7,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 6,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 6,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 5,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 5,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 4,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 3,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 2,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 2,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -39,31 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 1,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 1,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 1,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -6,32 +6,40 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 3,
|
rank: 7,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 2,
|
rank: 6,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 2,
|
rank: 6,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 5,
|
||||||
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -39,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -47,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -55,15 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 1,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -6,32 +6,40 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 3,
|
rank: 7,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 2,
|
rank: 6,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 2,
|
rank: 6,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 5,
|
||||||
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -39,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -47,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -55,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -63,15 +71,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 1,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -3,35 +3,59 @@ source: milli/src/search/new/tests/proximity.rs
|
|||||||
expression: "format!(\"{document_scores:#?}\")"
|
expression: "format!(\"{document_scores:#?}\")"
|
||||||
---
|
---
|
||||||
[
|
[
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 8,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 7,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 6,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 6,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 5,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 5,
|
||||||
|
max_rank: 8,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 4,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 3,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 2,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 2,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -39,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -47,31 +71,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 1,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 1,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 1,
|
|
||||||
max_rank: 4,
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -15,7 +15,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -47,7 +47,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -55,7 +55,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -63,7 +63,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -6,24 +6,24 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -31,7 +31,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -39,7 +39,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -6,16 +6,16 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -23,7 +23,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 5,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 3,
|
rank: 7,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 7,
|
rank: 15,
|
||||||
max_rank: 7,
|
max_rank: 15,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 7,
|
max_rank: 15,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 22,
|
rank: 50,
|
||||||
max_rank: 22,
|
max_rank: 50,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 22,
|
rank: 50,
|
||||||
max_rank: 22,
|
max_rank: 50,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 21,
|
rank: 49,
|
||||||
max_rank: 22,
|
max_rank: 50,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 21,
|
rank: 49,
|
||||||
max_rank: 22,
|
max_rank: 50,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 20,
|
rank: 48,
|
||||||
max_rank: 22,
|
max_rank: 50,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 17,
|
rank: 41,
|
||||||
max_rank: 22,
|
max_rank: 50,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 16,
|
rank: 40,
|
||||||
max_rank: 22,
|
max_rank: 50,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 19,
|
rank: 43,
|
||||||
max_rank: 19,
|
max_rank: 43,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 16,
|
rank: 36,
|
||||||
max_rank: 16,
|
max_rank: 36,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 13,
|
rank: 31,
|
||||||
max_rank: 16,
|
max_rank: 36,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 10,
|
rank: 22,
|
||||||
max_rank: 10,
|
max_rank: 22,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -166,8 +166,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 7,
|
rank: 15,
|
||||||
max_rank: 7,
|
max_rank: 15,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -180,8 +180,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 7,
|
rank: 15,
|
||||||
max_rank: 7,
|
max_rank: 15,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -194,8 +194,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 7,
|
rank: 15,
|
||||||
max_rank: 7,
|
max_rank: 15,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -208,8 +208,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 4,
|
rank: 8,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 19,
|
rank: 43,
|
||||||
max_rank: 19,
|
max_rank: 43,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 19,
|
rank: 43,
|
||||||
max_rank: 19,
|
max_rank: 43,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 18,
|
rank: 42,
|
||||||
max_rank: 19,
|
max_rank: 43,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 18,
|
rank: 42,
|
||||||
max_rank: 19,
|
max_rank: 43,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 17,
|
rank: 41,
|
||||||
max_rank: 19,
|
max_rank: 43,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 14,
|
rank: 34,
|
||||||
max_rank: 19,
|
max_rank: 43,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 13,
|
rank: 33,
|
||||||
max_rank: 19,
|
max_rank: 43,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 16,
|
rank: 36,
|
||||||
max_rank: 16,
|
max_rank: 36,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 13,
|
rank: 29,
|
||||||
max_rank: 13,
|
max_rank: 29,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 10,
|
rank: 24,
|
||||||
max_rank: 13,
|
max_rank: 29,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 7,
|
rank: 15,
|
||||||
max_rank: 7,
|
max_rank: 15,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 25,
|
rank: 57,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 25,
|
rank: 57,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 24,
|
rank: 56,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 24,
|
rank: 56,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 23,
|
rank: 55,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 22,
|
rank: 54,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 21,
|
rank: 53,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 20,
|
rank: 52,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 20,
|
rank: 51,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 19,
|
rank: 48,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 19,
|
rank: 47,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -178,6 +178,62 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
max_matching_words: 9,
|
max_matching_words: 9,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 50,
|
||||||
|
max_rank: 50,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 7,
|
||||||
|
max_matching_words: 9,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 43,
|
||||||
|
max_rank: 43,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 7,
|
||||||
|
max_matching_words: 9,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 38,
|
||||||
|
max_rank: 43,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 5,
|
||||||
|
max_matching_words: 9,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 29,
|
||||||
|
max_rank: 29,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 4,
|
||||||
|
max_matching_words: 9,
|
||||||
|
},
|
||||||
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 22,
|
rank: 22,
|
||||||
@@ -188,42 +244,14 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
[
|
[
|
||||||
Words(
|
Words(
|
||||||
Words {
|
Words {
|
||||||
matching_words: 7,
|
matching_words: 4,
|
||||||
max_matching_words: 9,
|
max_matching_words: 9,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 19,
|
rank: 22,
|
||||||
max_rank: 19,
|
max_rank: 22,
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Words(
|
|
||||||
Words {
|
|
||||||
matching_words: 7,
|
|
||||||
max_matching_words: 9,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 16,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Words(
|
|
||||||
Words {
|
|
||||||
matching_words: 5,
|
|
||||||
max_matching_words: 9,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 13,
|
|
||||||
max_rank: 13,
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -236,36 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 10,
|
rank: 22,
|
||||||
max_rank: 10,
|
max_rank: 22,
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Words(
|
|
||||||
Words {
|
|
||||||
matching_words: 4,
|
|
||||||
max_matching_words: 9,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 10,
|
|
||||||
max_rank: 10,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Words(
|
|
||||||
Words {
|
|
||||||
matching_words: 4,
|
|
||||||
max_matching_words: 9,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 10,
|
|
||||||
max_rank: 10,
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 7,
|
rank: 15,
|
||||||
max_rank: 7,
|
max_rank: 15,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 25,
|
rank: 57,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -26,8 +26,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 24,
|
rank: 56,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -40,8 +40,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 23,
|
rank: 55,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -54,8 +54,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 22,
|
rank: 54,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -68,8 +68,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 22,
|
rank: 54,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -82,8 +82,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 22,
|
rank: 54,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -96,8 +96,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 21,
|
rank: 53,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -110,8 +110,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 21,
|
rank: 53,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -124,8 +124,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 20,
|
rank: 52,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -138,8 +138,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 18,
|
rank: 47,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -152,8 +152,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 18,
|
rank: 45,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -167,7 +167,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -178,6 +178,62 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
max_matching_words: 9,
|
max_matching_words: 9,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 47,
|
||||||
|
max_rank: 50,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 7,
|
||||||
|
max_matching_words: 9,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 40,
|
||||||
|
max_rank: 43,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 7,
|
||||||
|
max_matching_words: 9,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 35,
|
||||||
|
max_rank: 43,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 5,
|
||||||
|
max_matching_words: 9,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Proximity(
|
||||||
|
Rank {
|
||||||
|
rank: 26,
|
||||||
|
max_rank: 29,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 4,
|
||||||
|
max_matching_words: 9,
|
||||||
|
},
|
||||||
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 19,
|
rank: 19,
|
||||||
@@ -188,42 +244,14 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
[
|
[
|
||||||
Words(
|
Words(
|
||||||
Words {
|
Words {
|
||||||
matching_words: 7,
|
matching_words: 4,
|
||||||
max_matching_words: 9,
|
max_matching_words: 9,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 16,
|
rank: 19,
|
||||||
max_rank: 19,
|
max_rank: 22,
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Words(
|
|
||||||
Words {
|
|
||||||
matching_words: 7,
|
|
||||||
max_matching_words: 9,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 13,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Words(
|
|
||||||
Words {
|
|
||||||
matching_words: 5,
|
|
||||||
max_matching_words: 9,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 10,
|
|
||||||
max_rank: 13,
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -236,36 +264,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 7,
|
rank: 19,
|
||||||
max_rank: 10,
|
max_rank: 22,
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Words(
|
|
||||||
Words {
|
|
||||||
matching_words: 4,
|
|
||||||
max_matching_words: 9,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 7,
|
|
||||||
max_rank: 10,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
[
|
|
||||||
Words(
|
|
||||||
Words {
|
|
||||||
matching_words: 4,
|
|
||||||
max_matching_words: 9,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Proximity(
|
|
||||||
Rank {
|
|
||||||
rank: 7,
|
|
||||||
max_rank: 10,
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -278,8 +278,8 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 5,
|
rank: 13,
|
||||||
max_rank: 7,
|
max_rank: 15,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -6,88 +6,88 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 25,
|
rank: 57,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 25,
|
rank: 57,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 24,
|
rank: 56,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 24,
|
rank: 56,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 23,
|
rank: 55,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 22,
|
rank: 54,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 21,
|
rank: 53,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 20,
|
rank: 52,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 20,
|
rank: 51,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 19,
|
rank: 48,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 19,
|
rank: 47,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@@ -95,7 +95,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
|||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 1,
|
rank: 1,
|
||||||
max_rank: 25,
|
max_rank: 57,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -351,5 +351,5 @@ fn test_redacted() {
|
|||||||
.map(|scores| score_details::ScoreDetails::to_json_map(scores.iter()))
|
.map(|scores| score_details::ScoreDetails::to_json_map(scores.iter()))
|
||||||
.collect();
|
.collect();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 4, 5, 22, 23, 13, 1, 3, 12, 21, 11, 20, 6, 7, 8, 9, 10, 14, 15]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 4, 5, 22, 23, 13, 1, 3, 12, 21, 11, 20, 6, 7, 8, 9, 10, 14, 15]");
|
||||||
insta::assert_json_snapshot!(document_scores_json);
|
// insta::assert_json_snapshot!(document_scores_json);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -259,8 +259,8 @@ fn test_ignore_stop_words() {
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 3,
|
rank: 7,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
Fid(
|
Fid(
|
||||||
@@ -411,8 +411,8 @@ fn test_stop_words_in_phrase() {
|
|||||||
),
|
),
|
||||||
Proximity(
|
Proximity(
|
||||||
Rank {
|
Rank {
|
||||||
rank: 2,
|
rank: 6,
|
||||||
max_rank: 4,
|
max_rank: 8,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
Fid(
|
Fid(
|
||||||
|
|||||||
@@ -277,7 +277,7 @@ fn test_words_proximity_tms_last_simple() {
|
|||||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||||
|
|
||||||
// 7 is better than 6 because of the proximity between "the" and its surrounding terms
|
// 7 is better than 6 because of the proximity between "the" and its surrounding terms
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
@@ -289,10 +289,10 @@ fn test_words_proximity_tms_last_simple() {
|
|||||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||||
"\"the brown quick fox jumps over the lazy dog\"",
|
"\"the brown quick fox jumps over the lazy dog\"",
|
||||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
|
||||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
|
||||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||||
|
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||||
|
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||||
"\"the quick brown fox jumps over the lazy\"",
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
"\"the quick brown fox jumps over the\"",
|
"\"the quick brown fox jumps over the\"",
|
||||||
@@ -312,7 +312,7 @@ fn test_words_proximity_tms_last_simple() {
|
|||||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||||
|
|
||||||
// 10 is better than 9 because of the proximity between "quick" and "brown"
|
// 10 is better than 9 because of the proximity between "quick" and "brown"
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 15, 16, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
@@ -326,8 +326,8 @@ fn test_words_proximity_tms_last_simple() {
|
|||||||
"\"the great quick brown fox jumps over the lazy dog\"",
|
"\"the great quick brown fox jumps over the lazy dog\"",
|
||||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
|
||||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||||
|
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||||
"\"the quick brown fox jumps over the lazy\"",
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
"\"the quick brown fox jumps over the\"",
|
"\"the quick brown fox jumps over the\"",
|
||||||
@@ -427,7 +427,7 @@ fn test_words_tms_all() {
|
|||||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||||
|
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 16, 19, 15, 20, 22]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
|
||||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
@@ -439,10 +439,10 @@ fn test_words_tms_all() {
|
|||||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||||
"\"the brown quick fox jumps over the lazy dog\"",
|
"\"the brown quick fox jumps over the lazy dog\"",
|
||||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
|
||||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
|
||||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||||
|
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||||
|
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
|
|||||||
@@ -108,17 +108,15 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
self.delete_document(docid);
|
self.delete_document(docid);
|
||||||
Some(docid)
|
Some(docid)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
|
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
|
||||||
self.execute_inner()?;
|
self.execute_inner()?;
|
||||||
|
|
||||||
Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
|
Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
|
pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||||
|
|
||||||
// We retrieve the current documents ids that are in the database.
|
// We retrieve the current documents ids that are in the database.
|
||||||
@@ -478,8 +476,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
C: for<'a> BytesDecode<'a, DItem = RoaringBitmap>
|
C: for<'a> BytesDecode<'a, DItem = RoaringBitmap>
|
||||||
+ for<'a> BytesEncode<'a, EItem = RoaringBitmap>,
|
+ for<'a> BytesEncode<'a, EItem = RoaringBitmap>,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let (bytes, mut docids) = result?;
|
let (bytes, mut docids) = result?;
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
@@ -502,8 +498,6 @@ fn remove_from_word_prefix_docids(
|
|||||||
db: &Database<Str, RoaringBitmapCodec>,
|
db: &Database<Str, RoaringBitmapCodec>,
|
||||||
to_remove: &RoaringBitmap,
|
to_remove: &RoaringBitmap,
|
||||||
) -> Result<fst::Set<Vec<u8>>> {
|
) -> Result<fst::Set<Vec<u8>>> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let mut prefixes_to_delete = fst::SetBuilder::memory();
|
let mut prefixes_to_delete = fst::SetBuilder::memory();
|
||||||
|
|
||||||
// We iterate over the word prefix docids database and remove the deleted documents ids
|
// We iterate over the word prefix docids database and remove the deleted documents ids
|
||||||
@@ -534,8 +528,6 @@ fn remove_from_word_docids(
|
|||||||
words_to_keep: &mut BTreeSet<String>,
|
words_to_keep: &mut BTreeSet<String>,
|
||||||
words_to_remove: &mut BTreeSet<String>,
|
words_to_remove: &mut BTreeSet<String>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
// We create an iterator to be able to get the content and delete the word docids.
|
// We create an iterator to be able to get the content and delete the word docids.
|
||||||
// It's faster to acquire a cursor to get and delete or put, as we avoid traversing
|
// It's faster to acquire a cursor to get and delete or put, as we avoid traversing
|
||||||
// the LMDB B-Tree two times but only once.
|
// the LMDB B-Tree two times but only once.
|
||||||
@@ -567,8 +559,6 @@ fn remove_docids_from_field_id_docid_facet_value(
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
to_remove: &RoaringBitmap,
|
to_remove: &RoaringBitmap,
|
||||||
) -> heed::Result<HashSet<Vec<u8>>> {
|
) -> heed::Result<HashSet<Vec<u8>>> {
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let db = match facet_type {
|
let db = match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
|
index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
|
||||||
@@ -604,8 +594,6 @@ fn remove_docids_from_facet_id_docids<'a, C>(
|
|||||||
where
|
where
|
||||||
C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
|
C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
|
||||||
{
|
{
|
||||||
puffin::profile_function!();
|
|
||||||
|
|
||||||
let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
|
let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
|
||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let (bytes, mut docids) = result?;
|
let (bytes, mut docids) = result?;
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
|
||||||
|
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
@@ -31,7 +30,7 @@ pub struct FacetsUpdateBulk<'i> {
|
|||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
field_ids: Vec<FieldId>,
|
field_ids: Vec<FieldId>,
|
||||||
// None if level 0 does not need to be updated
|
// None if level 0 does not need to be updated
|
||||||
new_data: Option<grenad::Reader<BufReader<File>>>,
|
new_data: Option<grenad::Reader<File>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'i> FacetsUpdateBulk<'i> {
|
impl<'i> FacetsUpdateBulk<'i> {
|
||||||
@@ -39,7 +38,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
field_ids: Vec<FieldId>,
|
field_ids: Vec<FieldId>,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
new_data: grenad::Reader<BufReader<File>>,
|
new_data: grenad::Reader<File>,
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
) -> FacetsUpdateBulk<'i> {
|
) -> FacetsUpdateBulk<'i> {
|
||||||
@@ -188,7 +187,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
&self,
|
&self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
|
) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
|
||||||
let mut all_docids = RoaringBitmap::new();
|
let mut all_docids = RoaringBitmap::new();
|
||||||
let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
|
let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
|
||||||
for bitmap in bitmaps {
|
for bitmap in bitmaps {
|
||||||
@@ -260,7 +259,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
field_id: u16,
|
field_id: u16,
|
||||||
level: u8,
|
level: u8,
|
||||||
handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
|
handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
|
||||||
) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
|
) -> Result<Vec<grenad::Reader<File>>> {
|
||||||
if level == 0 {
|
if level == 0 {
|
||||||
self.read_level_0(rtxn, field_id, handle_group)?;
|
self.read_level_0(rtxn, field_id, handle_group)?;
|
||||||
// Level 0 is already in the database
|
// Level 0 is already in the database
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
|
||||||
|
|
||||||
use heed::types::{ByteSlice, DecodeIgnore};
|
use heed::types::{ByteSlice, DecodeIgnore};
|
||||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||||
@@ -35,14 +34,14 @@ pub struct FacetsUpdateIncremental<'i> {
|
|||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
inner: FacetsUpdateIncrementalInner,
|
inner: FacetsUpdateIncrementalInner,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
new_data: grenad::Reader<BufReader<File>>,
|
new_data: grenad::Reader<File>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'i> FacetsUpdateIncremental<'i> {
|
impl<'i> FacetsUpdateIncremental<'i> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
new_data: grenad::Reader<BufReader<File>>,
|
new_data: grenad::Reader<File>,
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
max_group_size: u8,
|
max_group_size: u8,
|
||||||
|
|||||||
@@ -78,7 +78,6 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
|||||||
|
|
||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
use charabia::normalizer::{Normalize, NormalizerOption};
|
use charabia::normalizer::{Normalize, NormalizerOption};
|
||||||
@@ -109,17 +108,13 @@ pub struct FacetsUpdate<'i> {
|
|||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
new_data: grenad::Reader<BufReader<File>>,
|
new_data: grenad::Reader<File>,
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
max_group_size: u8,
|
max_group_size: u8,
|
||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
}
|
}
|
||||||
impl<'i> FacetsUpdate<'i> {
|
impl<'i> FacetsUpdate<'i> {
|
||||||
pub fn new(
|
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
||||||
index: &'i Index,
|
|
||||||
facet_type: FacetType,
|
|
||||||
new_data: grenad::Reader<BufReader<File>>,
|
|
||||||
) -> Self {
|
|
||||||
let database = match facet_type {
|
let database = match facet_type {
|
||||||
FacetType::String => index
|
FacetType::String => index
|
||||||
.facet_id_string_docids
|
.facet_id_string_docids
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use std::io::{BufWriter, Read, Seek};
|
use std::io::{Read, Seek};
|
||||||
use std::result::Result as StdResult;
|
use std::result::Result as StdResult;
|
||||||
use std::{fmt, iter};
|
use std::{fmt, iter};
|
||||||
|
|
||||||
@@ -35,7 +35,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
|||||||
|
|
||||||
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
|
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
|
||||||
let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];
|
let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];
|
||||||
|
|
||||||
// The primary key *field id* that has already been set for this index or the one
|
// The primary key *field id* that has already been set for this index or the one
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
|
||||||
use std::{io, mem, str};
|
use std::{io, mem, str};
|
||||||
|
|
||||||
use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
|
use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
|
||||||
@@ -32,7 +31,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
allowed_separators: Option<&[&str]>,
|
allowed_separators: Option<&[&str]>,
|
||||||
dictionary: Option<&[&str]>,
|
dictionary: Option<&[&str]>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
) -> Result<(RoaringBitmap, grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
|
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_positions_per_attributes = max_positions_per_attributes
|
let max_positions_per_attributes = max_positions_per_attributes
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io;
|
||||||
|
|
||||||
use heed::{BytesDecode, BytesEncode};
|
use heed::{BytesDecode, BytesEncode};
|
||||||
|
|
||||||
@@ -19,7 +19,7 @@ use crate::Result;
|
|||||||
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||||
docid_fid_facet_number: grenad::Reader<R>,
|
docid_fid_facet_number: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io;
|
||||||
|
|
||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
|||||||
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||||
docid_fid_facet_string: grenad::Reader<R>,
|
docid_fid_facet_string: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::collections::{BTreeMap, HashSet};
|
use std::collections::{BTreeMap, HashSet};
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io;
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
|
||||||
use heed::zerocopy::AsBytes;
|
use heed::zerocopy::AsBytes;
|
||||||
@@ -17,11 +17,11 @@ use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET
|
|||||||
|
|
||||||
/// The extracted facet values stored in grenad files by type.
|
/// The extracted facet values stored in grenad files by type.
|
||||||
pub struct ExtractedFacetValues {
|
pub struct ExtractedFacetValues {
|
||||||
pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
|
pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
|
||||||
pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
|
pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
|
||||||
pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
|
pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
|
||||||
pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
|
pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
|
||||||
pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
|
pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extracts the facet values of each faceted field of each document.
|
/// Extracts the facet values of each faceted field of each document.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io;
|
||||||
|
|
||||||
use grenad::Sorter;
|
use grenad::Sorter;
|
||||||
|
|
||||||
@@ -21,7 +21,7 @@ use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};
|
|||||||
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||||
docid_word_positions: grenad::Reader<R>,
|
docid_word_positions: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io;
|
||||||
|
|
||||||
use concat_arrays::concat_arrays;
|
use concat_arrays::concat_arrays;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
@@ -18,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let mut writer = create_writer(
|
let mut writer = create_writer(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io;
|
||||||
|
|
||||||
use bytemuck::cast_slice;
|
use bytemuck::cast_slice;
|
||||||
use serde_json::{from_slice, Value};
|
use serde_json::{from_slice, Value};
|
||||||
@@ -18,7 +18,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
vectors_fid: FieldId,
|
vectors_fid: FieldId,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let mut writer = create_writer(
|
let mut writer = create_writer(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io;
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@@ -26,7 +26,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
docid_word_positions: grenad::Reader<R>,
|
docid_word_positions: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
exact_attributes: &HashSet<FieldId>,
|
exact_attributes: &HashSet<FieldId>,
|
||||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io;
|
||||||
|
|
||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
||||||
@@ -14,7 +14,7 @@ use crate::{relative_from_absolute_position, DocumentId, Result};
|
|||||||
pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
||||||
docid_word_positions: grenad::Reader<R>,
|
docid_word_positions: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::collections::{BinaryHeap, HashMap};
|
use std::collections::{BinaryHeap, HashMap};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
|
||||||
use std::{cmp, io, mem, str, vec};
|
use std::{cmp, io, mem, str, vec};
|
||||||
|
|
||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
@@ -21,7 +20,7 @@ use crate::{DocumentId, Result};
|
|||||||
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||||
docid_word_positions: grenad::Reader<R>,
|
docid_word_positions: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io;
|
||||||
|
|
||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
||||||
@@ -17,7 +17,7 @@ use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Resu
|
|||||||
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||||
docid_word_positions: grenad::Reader<R>,
|
docid_word_positions: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ mod extract_word_position_docids;
|
|||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
|
||||||
|
|
||||||
use crossbeam_channel::Sender;
|
use crossbeam_channel::Sender;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
@@ -40,8 +39,8 @@ use crate::{FieldId, Result};
|
|||||||
/// Send data in grenad file over provided Sender.
|
/// Send data in grenad file over provided Sender.
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub(crate) fn data_from_obkv_documents(
|
pub(crate) fn data_from_obkv_documents(
|
||||||
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
|
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
|
||||||
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
|
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
searchable_fields: Option<HashSet<FieldId>>,
|
searchable_fields: Option<HashSet<FieldId>>,
|
||||||
@@ -153,7 +152,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||||
docid_word_positions_chunks.clone(),
|
docid_word_positions_chunks.clone(),
|
||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
@@ -163,7 +162,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
"word-pair-proximity-docids",
|
"word-pair-proximity-docids",
|
||||||
);
|
);
|
||||||
|
|
||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||||
docid_word_positions_chunks.clone(),
|
docid_word_positions_chunks.clone(),
|
||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
@@ -173,11 +172,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
"field-id-wordcount-docids",
|
"field-id-wordcount-docids",
|
||||||
);
|
);
|
||||||
|
|
||||||
spawn_extraction_task::<
|
spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
|
||||||
_,
|
|
||||||
_,
|
|
||||||
Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)>,
|
|
||||||
>(
|
|
||||||
docid_word_positions_chunks.clone(),
|
docid_word_positions_chunks.clone(),
|
||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
@@ -190,7 +185,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
"word-docids",
|
"word-docids",
|
||||||
);
|
);
|
||||||
|
|
||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||||
docid_word_positions_chunks.clone(),
|
docid_word_positions_chunks.clone(),
|
||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
@@ -199,7 +194,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
TypedChunk::WordPositionDocids,
|
TypedChunk::WordPositionDocids,
|
||||||
"word-position-docids",
|
"word-position-docids",
|
||||||
);
|
);
|
||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||||
docid_word_positions_chunks,
|
docid_word_positions_chunks,
|
||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
@@ -209,7 +204,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
"word-fid-docids",
|
"word-fid-docids",
|
||||||
);
|
);
|
||||||
|
|
||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||||
docid_fid_facet_strings_chunks,
|
docid_fid_facet_strings_chunks,
|
||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
@@ -219,7 +214,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
"field-id-facet-string-docids",
|
"field-id-facet-string-docids",
|
||||||
);
|
);
|
||||||
|
|
||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||||
docid_fid_facet_numbers_chunks,
|
docid_fid_facet_numbers_chunks,
|
||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx,
|
lmdb_writer_sx,
|
||||||
@@ -274,7 +269,7 @@ fn spawn_extraction_task<FE, FS, M>(
|
|||||||
/// Extract chunked data and send it into lmdb_writer_sx sender:
|
/// Extract chunked data and send it into lmdb_writer_sx sender:
|
||||||
/// - documents
|
/// - documents
|
||||||
fn send_original_documents_data(
|
fn send_original_documents_data(
|
||||||
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
original_documents_chunk: Result<grenad::Reader<File>>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
vectors_field_id: Option<FieldId>,
|
vectors_field_id: Option<FieldId>,
|
||||||
@@ -316,7 +311,7 @@ fn send_original_documents_data(
|
|||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
fn send_and_extract_flattened_documents_data(
|
fn send_and_extract_flattened_documents_data(
|
||||||
flattened_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
flattened_documents_chunk: Result<grenad::Reader<File>>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
searchable_fields: &Option<HashSet<FieldId>>,
|
searchable_fields: &Option<HashSet<FieldId>>,
|
||||||
@@ -333,10 +328,7 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
grenad::Reader<CursorClonableMmap>,
|
grenad::Reader<CursorClonableMmap>,
|
||||||
(
|
(
|
||||||
grenad::Reader<CursorClonableMmap>,
|
grenad::Reader<CursorClonableMmap>,
|
||||||
(
|
(grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
|
||||||
grenad::Reader<BufReader<File>>,
|
|
||||||
(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>),
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
)> {
|
)> {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader, BufWriter, Seek};
|
use std::io::{self, Seek};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use grenad::{CompressionType, Sorter};
|
use grenad::{CompressionType, Sorter};
|
||||||
@@ -17,13 +17,13 @@ pub fn create_writer<R: io::Write>(
|
|||||||
typ: grenad::CompressionType,
|
typ: grenad::CompressionType,
|
||||||
level: Option<u32>,
|
level: Option<u32>,
|
||||||
file: R,
|
file: R,
|
||||||
) -> grenad::Writer<BufWriter<R>> {
|
) -> grenad::Writer<R> {
|
||||||
let mut builder = grenad::Writer::builder();
|
let mut builder = grenad::Writer::builder();
|
||||||
builder.compression_type(typ);
|
builder.compression_type(typ);
|
||||||
if let Some(level) = level {
|
if let Some(level) = level {
|
||||||
builder.compression_level(level);
|
builder.compression_level(level);
|
||||||
}
|
}
|
||||||
builder.build(BufWriter::new(file))
|
builder.build(file)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn create_sorter(
|
pub fn create_sorter(
|
||||||
@@ -53,7 +53,7 @@ pub fn create_sorter(
|
|||||||
pub fn sorter_into_reader(
|
pub fn sorter_into_reader(
|
||||||
sorter: grenad::Sorter<MergeFn>,
|
sorter: grenad::Sorter<MergeFn>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
let mut writer = create_writer(
|
let mut writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
@@ -64,18 +64,16 @@ pub fn sorter_into_reader(
|
|||||||
writer_into_reader(writer)
|
writer_into_reader(writer)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn writer_into_reader(
|
pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
|
||||||
writer: grenad::Writer<BufWriter<File>>,
|
let mut file = writer.into_inner()?;
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
|
||||||
let mut file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
|
|
||||||
file.rewind()?;
|
file.rewind()?;
|
||||||
grenad::Reader::new(BufReader::new(file)).map_err(Into::into)
|
grenad::Reader::new(file).map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn as_cloneable_grenad(
|
pub unsafe fn as_cloneable_grenad(
|
||||||
reader: &grenad::Reader<BufReader<File>>,
|
reader: &grenad::Reader<File>,
|
||||||
) -> Result<grenad::Reader<CursorClonableMmap>> {
|
) -> Result<grenad::Reader<CursorClonableMmap>> {
|
||||||
let file = reader.get_ref().get_ref();
|
let file = reader.get_ref();
|
||||||
let mmap = memmap2::Mmap::map(file)?;
|
let mmap = memmap2::Mmap::map(file)?;
|
||||||
let cursor = io::Cursor::new(ClonableMmap::from(mmap));
|
let cursor = io::Cursor::new(ClonableMmap::from(mmap));
|
||||||
let reader = grenad::Reader::new(cursor)?;
|
let reader = grenad::Reader::new(cursor)?;
|
||||||
@@ -91,8 +89,8 @@ where
|
|||||||
fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
|
fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
|
impl MergeableReader for Vec<grenad::Reader<File>> {
|
||||||
type Output = grenad::Reader<BufReader<File>>;
|
type Output = grenad::Reader<File>;
|
||||||
|
|
||||||
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
|
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
|
||||||
let mut merger = MergerBuilder::new(merge_fn);
|
let mut merger = MergerBuilder::new(merge_fn);
|
||||||
@@ -101,8 +99,8 @@ impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||||
type Output = (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>);
|
type Output = (grenad::Reader<File>, grenad::Reader<File>);
|
||||||
|
|
||||||
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
|
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
|
||||||
let mut m1 = MergerBuilder::new(merge_fn);
|
let mut m1 = MergerBuilder::new(merge_fn);
|
||||||
@@ -127,7 +125,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<BufReader<File>>> {
|
fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<File>> {
|
||||||
let merger = self.0.build();
|
let merger = self.0.build();
|
||||||
let mut writer = create_writer(
|
let mut writer = create_writer(
|
||||||
params.chunk_compression_type,
|
params.chunk_compression_type,
|
||||||
@@ -178,7 +176,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
|
|||||||
reader: grenad::Reader<R>,
|
reader: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
documents_chunk_size: usize,
|
documents_chunk_size: usize,
|
||||||
) -> Result<impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>>> {
|
) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> {
|
||||||
let mut continue_reading = true;
|
let mut continue_reading = true;
|
||||||
let mut cursor = reader.into_cursor()?;
|
let mut cursor = reader.into_cursor()?;
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ pub use grenad_helpers::{
|
|||||||
};
|
};
|
||||||
pub use merge_functions::{
|
pub use merge_functions::{
|
||||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
|
concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
|
||||||
merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
|
||||||
serialize_roaring_bitmap, MergeFn,
|
serialize_roaring_bitmap, MergeFn,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,10 @@ use slice_group_by::GroupBy;
|
|||||||
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
|
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
|
||||||
|
|
||||||
use self::enrich::enrich_documents_batch;
|
use self::enrich::enrich_documents_batch;
|
||||||
pub use self::enrich::{extract_finite_float_from_value, DocumentId};
|
pub use self::enrich::{
|
||||||
|
extract_finite_float_from_value, validate_document_id, validate_document_id_value,
|
||||||
|
validate_geo_from_json, DocumentId,
|
||||||
|
};
|
||||||
pub use self::helpers::{
|
pub use self::helpers::{
|
||||||
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
|
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
|
||||||
fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||||
|
|||||||
@@ -659,10 +659,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
new_documents_ids: self.new_documents_ids,
|
new_documents_ids: self.new_documents_ids,
|
||||||
replaced_documents_ids: self.replaced_documents_ids,
|
replaced_documents_ids: self.replaced_documents_ids,
|
||||||
documents_count: self.documents_count,
|
documents_count: self.documents_count,
|
||||||
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
|
original_documents,
|
||||||
flattened_documents: flattened_documents
|
flattened_documents,
|
||||||
.into_inner()
|
|
||||||
.map_err(|err| err.into_error())?,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -781,10 +779,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
new_documents_ids: documents_ids,
|
new_documents_ids: documents_ids,
|
||||||
replaced_documents_ids: RoaringBitmap::default(),
|
replaced_documents_ids: RoaringBitmap::default(),
|
||||||
documents_count,
|
documents_count,
|
||||||
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
|
original_documents,
|
||||||
flattened_documents: flattened_documents
|
flattened_documents,
|
||||||
.into_inner()
|
|
||||||
.map_err(|err| err.into_error())?,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let new_facets = output.compute_real_facets(wtxn, self.index)?;
|
let new_facets = output.compute_real_facets(wtxn, self.index)?;
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::borrow::Cow;
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io;
|
||||||
|
|
||||||
use bytemuck::allocation::pod_collect_to_vec;
|
use bytemuck::allocation::pod_collect_to_vec;
|
||||||
use charabia::{Language, Script};
|
use charabia::{Language, Script};
|
||||||
@@ -27,22 +27,22 @@ pub(crate) enum TypedChunk {
|
|||||||
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
||||||
FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
|
FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
|
||||||
Documents(grenad::Reader<CursorClonableMmap>),
|
Documents(grenad::Reader<CursorClonableMmap>),
|
||||||
FieldIdWordcountDocids(grenad::Reader<BufReader<File>>),
|
FieldIdWordcountDocids(grenad::Reader<File>),
|
||||||
NewDocumentsIds(RoaringBitmap),
|
NewDocumentsIds(RoaringBitmap),
|
||||||
WordDocids {
|
WordDocids {
|
||||||
word_docids_reader: grenad::Reader<BufReader<File>>,
|
word_docids_reader: grenad::Reader<File>,
|
||||||
exact_word_docids_reader: grenad::Reader<BufReader<File>>,
|
exact_word_docids_reader: grenad::Reader<File>,
|
||||||
},
|
},
|
||||||
WordPositionDocids(grenad::Reader<BufReader<File>>),
|
WordPositionDocids(grenad::Reader<File>),
|
||||||
WordFidDocids(grenad::Reader<BufReader<File>>),
|
WordFidDocids(grenad::Reader<File>),
|
||||||
WordPairProximityDocids(grenad::Reader<BufReader<File>>),
|
WordPairProximityDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>),
|
FieldIdFacetStringDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>),
|
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetExistsDocids(grenad::Reader<BufReader<File>>),
|
FieldIdFacetExistsDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
|
FieldIdFacetIsNullDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
|
FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
|
||||||
GeoPoints(grenad::Reader<BufReader<File>>),
|
GeoPoints(grenad::Reader<File>),
|
||||||
VectorPoints(grenad::Reader<BufReader<File>>),
|
VectorPoints(grenad::Reader<File>),
|
||||||
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::io::{BufReader, BufWriter};
|
use std::io::BufReader;
|
||||||
|
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
@@ -119,9 +119,9 @@ pub fn insert_into_database(
|
|||||||
pub fn write_into_lmdb_database_without_merging(
|
pub fn write_into_lmdb_database_without_merging(
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
database: heed::PolyDatabase,
|
database: heed::PolyDatabase,
|
||||||
writer: grenad::Writer<BufWriter<std::fs::File>>,
|
writer: grenad::Writer<std::fs::File>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
|
let file = writer.into_inner()?;
|
||||||
let reader = grenad::Reader::new(BufReader::new(file))?;
|
let reader = grenad::Reader::new(BufReader::new(file))?;
|
||||||
if database.is_empty(wtxn)? {
|
if database.is_empty(wtxn)? {
|
||||||
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
|
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
|
||||||
|
|||||||
@@ -20,4 +20,7 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
|||||||
3 at a [100, ]
|
3 at a [100, ]
|
||||||
3 rings a [101, ]
|
3 rings a [101, ]
|
||||||
3 the a [101, ]
|
3 the a [101, ]
|
||||||
|
4 at b [100, ]
|
||||||
|
4 at be [100, ]
|
||||||
|
4 bell a [101, ]
|
||||||
|
|
||||||
|
|||||||
@@ -30,4 +30,10 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
|||||||
3 bell 5 [101, ]
|
3 bell 5 [101, ]
|
||||||
3 rings am [101, ]
|
3 rings am [101, ]
|
||||||
3 the at [101, ]
|
3 the at [101, ]
|
||||||
|
4 an house [100, ]
|
||||||
|
4 at beautiful [100, ]
|
||||||
|
4 bell am [101, ]
|
||||||
|
4 the 5 [101, ]
|
||||||
|
5 at house [100, ]
|
||||||
|
5 the am [101, ]
|
||||||
|
|
||||||
|
|||||||
@@ -28,4 +28,8 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
|||||||
3 rings a [101, ]
|
3 rings a [101, ]
|
||||||
3 rings am [101, ]
|
3 rings am [101, ]
|
||||||
3 the a [101, ]
|
3 the a [101, ]
|
||||||
|
4 at b [100, ]
|
||||||
|
4 at be [100, ]
|
||||||
|
4 bell a [101, ]
|
||||||
|
4 bell am [101, ]
|
||||||
|
|
||||||
|
|||||||
@@ -7,4 +7,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
|||||||
2 bell a [51, ]
|
2 bell a [51, ]
|
||||||
3 rings a [51, ]
|
3 rings a [51, ]
|
||||||
3 the a [51, ]
|
3 the a [51, ]
|
||||||
|
4 bell a [51, ]
|
||||||
|
|
||||||
|
|||||||
@@ -12,4 +12,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
|||||||
3 at a [50, ]
|
3 at a [50, ]
|
||||||
3 rings a [51, ]
|
3 rings a [51, ]
|
||||||
3 the a [51, ]
|
3 the a [51, ]
|
||||||
|
4 bell a [51, ]
|
||||||
|
|
||||||
|
|||||||
@@ -7,4 +7,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
|||||||
2 bell a [51, ]
|
2 bell a [51, ]
|
||||||
3 rings a [51, ]
|
3 rings a [51, ]
|
||||||
3 the a [51, ]
|
3 the a [51, ]
|
||||||
|
4 bell a [51, ]
|
||||||
|
|
||||||
|
|||||||
@@ -12,4 +12,5 @@ source: milli/src/update/prefix_word_pairs/mod.rs
|
|||||||
3 at a [50, ]
|
3 at a [50, ]
|
||||||
3 rings a [51, ]
|
3 rings a [51, ]
|
||||||
3 the a [51, ]
|
3 the a [51, ]
|
||||||
|
4 bell a [51, ]
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user