mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-22 22:30:58 +00:00
Compare commits
73 Commits
prototype-
...
prototype-
Author | SHA1 | Date | |
---|---|---|---|
c9860c7913 | |||
03ffabe889 | |||
1f4fc9c229 | |||
8cc3c54117 | |||
467b49153d | |||
0c3fa8cbc4 | |||
bddc168d83 | |||
84a36002d7 | |||
170e063b80 | |||
6376c342c1 | |||
5b563f872b | |||
ec9b52d608 | |||
34c67ac389 | |||
d050c9b4ae | |||
7dd1226faf | |||
1575456594 | |||
add2ceef67 | |||
548c8247c2 | |||
181ca48482 | |||
5751f5c640 | |||
d32eb11329 | |||
3d23b388bc | |||
85626cff8e | |||
58dac8af42 | |||
0dbf1a16ff | |||
462b4c0080 | |||
0d4482625a | |||
56a0d91ecd | |||
b366acdae6 | |||
7cb7e37ba8 | |||
33b7c574ea | |||
d3575fb028 | |||
39cbb499c2 | |||
ebef6bc24d | |||
d59b7db8d0 | |||
263e825619 | |||
69354a6144 | |||
2b5d9042d1 | |||
5b57fbab08 | |||
b11f85a635 | |||
a2d6dc8571 | |||
ee1701157f | |||
8c649d8061 | |||
492fc086f0 | |||
a2d0c73b41 | |||
48865470d7 | |||
c810df4d9f | |||
5e3df76699 | |||
02765fb267 | |||
841165d529 | |||
ea4a266f08 | |||
49f069ed97 | |||
be16b99d40 | |||
ec0c09d17c | |||
a9230f6e6c | |||
54f0ee1ed2 | |||
ce5647e730 | |||
b57b818b67 | |||
f7ea94e5f4 | |||
53382bb1b8 | |||
5b004a2583 | |||
13416ccbf7 | |||
2614e7d9ca | |||
e7244aa485 | |||
9cacc82307 | |||
4c6fddb1cb | |||
ca52021079 | |||
ee6f79d60b | |||
e4c24ca6a3 | |||
2bae9550c8 | |||
32c78ac8b1 | |||
5fe7c4545a | |||
2042229927 |
3
.github/workflows/benchmarks-pr.yml
vendored
3
.github/workflows/benchmarks-pr.yml
vendored
@ -90,7 +90,8 @@ jobs:
|
||||
set -x
|
||||
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
|
||||
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
|
||||
echo 'Here are your benchmarks diff 👊' >> body.txt
|
||||
export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
|
||||
echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
|
||||
echo '```' >> body.txt
|
||||
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
|
||||
echo '```' >> body.txt
|
||||
|
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@ -50,7 +50,7 @@ jobs:
|
||||
needs: check-version
|
||||
steps:
|
||||
- name: Create PR to Homebrew
|
||||
uses: mislav/bump-homebrew-formula-action@v2
|
||||
uses: mislav/bump-homebrew-formula-action@v3
|
||||
with:
|
||||
formula-name: meilisearch
|
||||
formula-path: Formula/m/meilisearch.rb
|
||||
|
2
.github/workflows/publish-docker-images.yml
vendored
2
.github/workflows/publish-docker-images.yml
vendored
@ -63,7 +63,7 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
4
.github/workflows/sdks-tests.yml
vendored
4
.github/workflows/sdks-tests.yml
vendored
@ -160,7 +160,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
cache: 'yarn'
|
||||
- name: Install dependencies
|
||||
@ -318,7 +318,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js-plugins
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
cache: yarn
|
||||
- name: Install dependencies
|
||||
|
10
.github/workflows/test-suite.yml
vendored
10
.github/workflows/test-suite.yml
vendored
@ -43,7 +43,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -149,7 +149,7 @@ jobs:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -168,7 +168,7 @@ jobs:
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -187,7 +187,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
935
Cargo.lock
generated
935
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -2,6 +2,7 @@
|
||||
resolver = "2"
|
||||
members = [
|
||||
"meilisearch",
|
||||
"meilitool",
|
||||
"meilisearch-types",
|
||||
"meilisearch-auth",
|
||||
"meili-snap",
|
||||
@ -18,7 +19,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.4.1"
|
||||
version = "1.5.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
11
Dockerfile
11
Dockerfile
@ -3,7 +3,7 @@ FROM rust:alpine3.16 AS compiler
|
||||
|
||||
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
||||
|
||||
WORKDIR /meilisearch
|
||||
WORKDIR /
|
||||
|
||||
ARG COMMIT_SHA
|
||||
ARG COMMIT_DATE
|
||||
@ -17,7 +17,7 @@ RUN set -eux; \
|
||||
if [ "$apkArch" = "aarch64" ]; then \
|
||||
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
||||
fi && \
|
||||
cargo build --release
|
||||
cargo build --release -p meilisearch -p meilitool
|
||||
|
||||
# Run
|
||||
FROM alpine:3.16
|
||||
@ -28,9 +28,10 @@ ENV MEILI_SERVER_PROVIDER docker
|
||||
RUN apk update --quiet \
|
||||
&& apk add -q --no-cache libgcc tini curl
|
||||
|
||||
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
||||
# to find.
|
||||
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
|
||||
# and it's easy to find.
|
||||
COPY --from=compiler /target/release/meilisearch /bin/meilisearch
|
||||
COPY --from=compiler /target/release/meilitool /bin/meilitool
|
||||
# To stay compatible with the older version of the container (pre v0.27.0) we're
|
||||
# going to symlink the meilisearch binary in the path to `/meilisearch`
|
||||
RUN ln -s /bin/meilisearch /meilisearch
|
||||
|
@ -25,12 +25,6 @@
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
|
||||
---
|
||||
|
||||
### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
|
||||
|
||||
---
|
||||
|
||||
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
||||
|
||||
<p align="center" name="demo">
|
||||
|
@ -36,7 +36,7 @@ fn setup_index() -> Index {
|
||||
}
|
||||
|
||||
fn setup_settings<'t>(
|
||||
wtxn: &mut RwTxn<'t, '_>,
|
||||
wtxn: &mut RwTxn<'t>,
|
||||
index: &'t Index,
|
||||
primary_key: &str,
|
||||
searchable_fields: &[&str],
|
||||
|
@ -267,6 +267,7 @@ pub(crate) mod test {
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
proximity_precision: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::Set(FacetingSettings {
|
||||
max_values_per_facet: Setting::Set(111),
|
||||
|
@ -345,6 +345,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
dictionary: v6::Setting::NotSet,
|
||||
synonyms: settings.synonyms.into(),
|
||||
distinct_attribute: settings.distinct_attribute.into(),
|
||||
proximity_precision: v6::Setting::NotSet,
|
||||
typo_tolerance: match settings.typo_tolerance {
|
||||
v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
|
||||
enabled: typo.enabled.into(),
|
||||
|
@ -13,12 +13,12 @@ use crate::{Result, Version};
|
||||
|
||||
mod compat;
|
||||
|
||||
pub(self) mod v1;
|
||||
pub(self) mod v2;
|
||||
pub(self) mod v3;
|
||||
pub(self) mod v4;
|
||||
pub(self) mod v5;
|
||||
pub(self) mod v6;
|
||||
mod v1;
|
||||
mod v2;
|
||||
mod v3;
|
||||
mod v4;
|
||||
mod v5;
|
||||
mod v6;
|
||||
|
||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
|
||||
|
@ -56,8 +56,7 @@ pub enum RankingRule {
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
static ASC_DESC_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
|
||||
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(asc|desc)\(([\w_-]+)\)").unwrap());
|
||||
|
||||
impl FromStr for RankingRule {
|
||||
type Err = ();
|
||||
|
@ -564,10 +564,10 @@ pub mod tests {
|
||||
|
||||
#[test]
|
||||
fn parse_escaped() {
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
|
||||
// but it also works with other sequencies
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
|
||||
}
|
||||
|
@ -270,8 +270,8 @@ pub mod test {
|
||||
("aaaa", "", rtok("", "aaaa"), "aaaa"),
|
||||
(r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
|
||||
(r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
|
||||
(r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
|
||||
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
|
||||
(r"aa\\\aa", r#""#, rtok("", r"aa\\\aa"), r"aa\\\aa"),
|
||||
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r"aa\\"), r"aa\\"),
|
||||
(r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
|
||||
(r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
|
||||
];
|
||||
@ -301,12 +301,12 @@ pub mod test {
|
||||
);
|
||||
// simple quote
|
||||
assert_eq!(
|
||||
unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
|
||||
unescape(Span::new_extra(r"Hello \'World\'", ""), '\''),
|
||||
r#"Hello 'World'"#.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
|
||||
r#"Hello \\'World\\'"#.to_string()
|
||||
unescape(Span::new_extra(r"Hello \\\'World\\\'", ""), '\''),
|
||||
r"Hello \\'World\\'".to_string()
|
||||
);
|
||||
}
|
||||
|
||||
@ -335,19 +335,19 @@ pub mod test {
|
||||
("\"cha'nnel\"", "cha'nnel", false),
|
||||
("I'm tamo", "I", false),
|
||||
// escaped thing but not quote
|
||||
(r#""\\""#, r#"\"#, true),
|
||||
(r#""\\\\\\""#, r#"\\\"#, true),
|
||||
(r#""aa\\aa""#, r#"aa\aa"#, true),
|
||||
(r#""\\""#, r"\", true),
|
||||
(r#""\\\\\\""#, r"\\\", true),
|
||||
(r#""aa\\aa""#, r"aa\aa", true),
|
||||
// with double quote
|
||||
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
|
||||
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
|
||||
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
|
||||
(r#""\"\"""#, r#""""#, true),
|
||||
// with simple quote
|
||||
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
|
||||
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
|
||||
(r"'Hello \'world\''", r#"Hello 'world'"#, true),
|
||||
(r"'Hello \\\'world\\\''", r"Hello \'world\'", true),
|
||||
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
|
||||
(r#"'\'\''"#, r#"''"#, true),
|
||||
(r"'\'\''", r#"''"#, true),
|
||||
];
|
||||
|
||||
for (input, expected, escaped) in test_case {
|
||||
|
@ -113,7 +113,7 @@ fn main() {
|
||||
index.documents(&wtxn, res.documents_ids).unwrap();
|
||||
progression.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
wtxn.abort().unwrap();
|
||||
wtxn.abort();
|
||||
});
|
||||
if let err @ Err(_) = handle.join() {
|
||||
stop.store(true, Ordering::Relaxed);
|
||||
|
@ -22,7 +22,7 @@ log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
puffin = "0.16.0"
|
||||
puffin = { version = "0.16.0", features = ["serialization"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
@ -32,7 +32,7 @@ use meilisearch_types::milli::heed::CompactionOption;
|
||||
use meilisearch_types::milli::update::{
|
||||
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Filter, BEU32};
|
||||
use meilisearch_types::milli::{self, Filter};
|
||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||
@ -715,7 +715,7 @@ impl IndexScheduler {
|
||||
|
||||
// 2. Snapshot the index-scheduler LMDB env
|
||||
//
|
||||
// When we call copy_to_path, LMDB opens a read transaction by itself,
|
||||
// When we call copy_to_file, LMDB opens a read transaction by itself,
|
||||
// we can't provide our own. It is an issue as we would like to know
|
||||
// the update files to copy but new ones can be enqueued between the copy
|
||||
// of the env and the new transaction we open to retrieve the enqueued tasks.
|
||||
@ -728,7 +728,7 @@ impl IndexScheduler {
|
||||
// 2.1 First copy the LMDB env of the index-scheduler
|
||||
let dst = temp_snapshot_dir.path().join("tasks");
|
||||
fs::create_dir_all(&dst)?;
|
||||
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 2.2 Create a read transaction on the index-scheduler
|
||||
let rtxn = self.env.read_txn()?;
|
||||
@ -753,7 +753,7 @@ impl IndexScheduler {
|
||||
let index = self.index_mapper.index(&rtxn, name)?;
|
||||
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
||||
fs::create_dir_all(&dst)?;
|
||||
index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
}
|
||||
|
||||
drop(rtxn);
|
||||
@ -766,7 +766,7 @@ impl IndexScheduler {
|
||||
.map_size(1024 * 1024 * 1024) // 1 GiB
|
||||
.max_dbs(2)
|
||||
.open(&self.auth_path)?;
|
||||
auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
|
||||
// 5. Copy and tarball the flat snapshot
|
||||
// 5.1 Find the original name of the database
|
||||
@ -822,6 +822,10 @@ impl IndexScheduler {
|
||||
// 2. dump the tasks
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
for ret in self.all_tasks.iter(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
@ -842,6 +846,9 @@ impl IndexScheduler {
|
||||
|
||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file) = content_file {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
if status == Status::Enqueued {
|
||||
let content_file = self.file_store.get_update(content_file)?;
|
||||
|
||||
@ -881,6 +888,9 @@ impl IndexScheduler {
|
||||
|
||||
// 3.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
let (_id, doc) = ret?;
|
||||
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
index_dumper.push_document(&document)?;
|
||||
@ -900,6 +910,9 @@ impl IndexScheduler {
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
@ -920,6 +933,10 @@ impl IndexScheduler {
|
||||
self.index_mapper.index(&rtxn, &index_uid)?
|
||||
};
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
*self.currently_updating_index.write().unwrap() =
|
||||
Some((index_uid.clone(), index.clone()));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||
index_wtxn.commit()?;
|
||||
@ -1089,7 +1106,7 @@ impl IndexScheduler {
|
||||
for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
|
||||
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
swap_index_uid_in_task(&mut task, (lhs, rhs));
|
||||
self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?;
|
||||
self.all_tasks.put(wtxn, &task_id, &task)?;
|
||||
}
|
||||
|
||||
// 4. remove the task from indexuid = before_name
|
||||
@ -1115,7 +1132,7 @@ impl IndexScheduler {
|
||||
/// The list of processed tasks.
|
||||
fn apply_index_operation<'i>(
|
||||
&self,
|
||||
index_wtxn: &mut RwTxn<'i, '_>,
|
||||
index_wtxn: &mut RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
) -> Result<Vec<Task>> {
|
||||
@ -1326,6 +1343,9 @@ impl IndexScheduler {
|
||||
|
||||
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
|
||||
let checked_settings = settings.clone().check();
|
||||
if checked_settings.proximity_precision.set().is_some() {
|
||||
self.features.features().check_proximity_precision()?;
|
||||
}
|
||||
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
|
||||
apply_settings_to_builder(&checked_settings, &mut builder);
|
||||
|
||||
@ -1462,10 +1482,9 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
for task in to_delete_tasks.iter() {
|
||||
self.all_tasks.delete(wtxn, &BEU32::new(task))?;
|
||||
self.all_tasks.delete(wtxn, &task)?;
|
||||
}
|
||||
for canceled_by in affected_canceled_by {
|
||||
let canceled_by = BEU32::new(canceled_by);
|
||||
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
|
||||
tasks -= &to_delete_tasks;
|
||||
if tasks.is_empty() {
|
||||
@ -1513,14 +1532,14 @@ impl IndexScheduler {
|
||||
task.details = task.details.map(|d| d.to_failed());
|
||||
self.update_task(wtxn, &task)?;
|
||||
}
|
||||
self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?;
|
||||
self.canceled_by.put(wtxn, &cancel_task_id, &tasks_to_cancel)?;
|
||||
|
||||
Ok(content_files_to_delete)
|
||||
}
|
||||
}
|
||||
|
||||
fn delete_document_by_filter<'a>(
|
||||
wtxn: &mut RwTxn<'a, '_>,
|
||||
wtxn: &mut RwTxn<'a>,
|
||||
filter: &serde_json::Value,
|
||||
indexer_config: &IndexerConfig,
|
||||
must_stop_processing: MustStopProcessing,
|
||||
|
@ -108,6 +108,8 @@ pub enum Error {
|
||||
TaskDeletionWithEmptyQuery,
|
||||
#[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
|
||||
TaskCancelationWithEmptyQuery,
|
||||
#[error("Aborted task")]
|
||||
AbortedTask,
|
||||
|
||||
#[error(transparent)]
|
||||
Dump(#[from] dump::Error),
|
||||
@ -175,6 +177,7 @@ impl Error {
|
||||
| Error::TaskNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli(_)
|
||||
@ -236,6 +239,9 @@ impl ErrorCode for Error {
|
||||
Error::TaskDatabaseUpdate(_) => Code::Internal,
|
||||
Error::CreateBatch(_) => Code::Internal,
|
||||
|
||||
// This one should never be seen by the end user
|
||||
Error::AbortedTask => Code::Internal,
|
||||
|
||||
#[cfg(test)]
|
||||
Error::PlannedFailure => Code::Internal,
|
||||
}
|
||||
|
@ -81,6 +81,19 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_proximity_precision(&self) -> Result<()> {
|
||||
if self.runtime.proximity_precision {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Using `proximityPrecision` index setting",
|
||||
feature: "proximity precision",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/710",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
|
@ -1,12 +1,8 @@
|
||||
/// the map size to use when we don't succeed in reading it in indexes.
|
||||
const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::heed::flags::Flags;
|
||||
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
|
||||
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
|
||||
use meilisearch_types::milli::Index;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
@ -236,7 +232,7 @@ impl IndexMap {
|
||||
enable_mdb_writemap: bool,
|
||||
map_size_growth: usize,
|
||||
) {
|
||||
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
|
||||
let map_size = index.map_size() + map_size_growth;
|
||||
let closing_event = index.prepare_for_closing();
|
||||
let generation = self.next_generation();
|
||||
self.unavailable.insert(
|
||||
@ -309,7 +305,7 @@ fn create_or_open_index(
|
||||
options.map_size(clamp_to_page_size(map_size));
|
||||
options.max_readers(1024);
|
||||
if enable_mdb_writemap {
|
||||
unsafe { options.flag(Flags::MdbWriteMap) };
|
||||
unsafe { options.flags(EnvFlags::WRITE_MAP) };
|
||||
}
|
||||
|
||||
if let Some((created, updated)) = date {
|
||||
@ -388,7 +384,7 @@ mod tests {
|
||||
|
||||
fn assert_index_size(index: Index, expected: usize) {
|
||||
let expected = clamp_to_page_size(expected);
|
||||
let index_map_size = index.map_size().unwrap();
|
||||
let index_map_size = index.map_size();
|
||||
assert_eq!(index_map_size, expected);
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::Write;
|
||||
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Details, Task};
|
||||
@ -39,6 +39,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
test_breakpoint_sdr: _,
|
||||
planned_failures: _,
|
||||
run_loop_iteration: _,
|
||||
currently_updating_index: _,
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
@ -114,7 +115,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
|
||||
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
@ -124,10 +125,7 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_date_db(
|
||||
rtxn: &RoTxn,
|
||||
db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
) -> String {
|
||||
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
@ -247,10 +245,7 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>)
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snapshot_canceled_by(
|
||||
rtxn: &RoTxn,
|
||||
db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
|
||||
) -> String {
|
||||
pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
|
@ -27,7 +27,7 @@ mod index_mapper;
|
||||
mod insta_snapshot;
|
||||
mod lru;
|
||||
mod utils;
|
||||
mod uuid_codec;
|
||||
pub mod uuid_codec;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
@ -47,8 +47,9 @@ pub use features::RoFeatures;
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
||||
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
@ -64,8 +65,7 @@ use uuid::Uuid;
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
|
||||
|
||||
pub(crate) type BEI128 =
|
||||
meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
|
||||
pub(crate) type BEI128 = I128<BE>;
|
||||
|
||||
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
|
||||
///
|
||||
@ -278,7 +278,7 @@ pub struct IndexScheduler {
|
||||
pub(crate) file_store: FileStore,
|
||||
|
||||
// The main database, it contains all the tasks accessible by their Id.
|
||||
pub(crate) all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>>,
|
||||
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
|
||||
|
||||
/// All the tasks ids grouped by their status.
|
||||
// TODO we should not be able to serialize a `Status::Processing` in this database.
|
||||
@ -289,16 +289,16 @@ pub struct IndexScheduler {
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
|
||||
/// Store the tasks that were canceled by a task uid
|
||||
pub(crate) canceled_by: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
|
||||
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
|
||||
|
||||
/// Store the task ids of tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Store the task ids of finished tasks which started being processed at a specific date
|
||||
pub(crate) started_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Store the task ids of tasks which finished at a specific date
|
||||
pub(crate) finished_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
|
||||
/// In charge of creating, opening, storing and returning indexes.
|
||||
pub(crate) index_mapper: IndexMapper,
|
||||
@ -331,6 +331,10 @@ pub struct IndexScheduler {
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
/// A few types of long running batches of tasks that act on a single index set this field
|
||||
/// so that a handle to the index is available from other threads (search) in an optimized manner.
|
||||
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
|
||||
|
||||
// ================= test
|
||||
// The next entry is dedicated to the tests.
|
||||
/// Provide a way to set a breakpoint in multiple part of the scheduler.
|
||||
@ -374,6 +378,7 @@ impl IndexScheduler {
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
currently_updating_index: self.currently_updating_index.clone(),
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
||||
#[cfg(test)]
|
||||
@ -470,6 +475,7 @@ impl IndexScheduler {
|
||||
snapshots_path: options.snapshots_path,
|
||||
auth_path: options.auth_path,
|
||||
version_file_path: options.version_file_path,
|
||||
currently_updating_index: Arc::new(RwLock::new(None)),
|
||||
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr,
|
||||
@ -652,6 +658,13 @@ impl IndexScheduler {
|
||||
/// If you need to fetch information from or perform an action on all indexes,
|
||||
/// see the `try_for_each_index` function.
|
||||
pub fn index(&self, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
self.currently_updating_index.read().unwrap().as_ref()
|
||||
{
|
||||
if current_name == name {
|
||||
return Ok(current_index.clone());
|
||||
}
|
||||
}
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, name)
|
||||
}
|
||||
@ -717,9 +730,7 @@ impl IndexScheduler {
|
||||
if let Some(canceled_by) = &query.canceled_by {
|
||||
let mut all_canceled_tasks = RoaringBitmap::new();
|
||||
for cancel_task_uid in canceled_by {
|
||||
if let Some(canceled_by_uid) =
|
||||
self.canceled_by.get(rtxn, &BEU32::new(*cancel_task_uid))?
|
||||
{
|
||||
if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, cancel_task_uid)? {
|
||||
all_canceled_tasks |= canceled_by_uid;
|
||||
}
|
||||
}
|
||||
@ -970,7 +981,7 @@ impl IndexScheduler {
|
||||
|
||||
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
|
||||
{
|
||||
return Err(Error::NoSpaceLeftInTaskQueue);
|
||||
}
|
||||
@ -996,7 +1007,7 @@ impl IndexScheduler {
|
||||
// Get rid of the mutability.
|
||||
let task = task;
|
||||
|
||||
self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?;
|
||||
self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
self.update_index(&mut wtxn, index, |bitmap| {
|
||||
@ -1133,6 +1144,9 @@ impl IndexScheduler {
|
||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||
};
|
||||
|
||||
// Reset the currently updating index to relinquish the index handle
|
||||
*self.currently_updating_index.write().unwrap() = None;
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
||||
|
||||
@ -1167,10 +1181,11 @@ impl IndexScheduler {
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli(milli::Error::InternalError(
|
||||
milli::InternalError::AbortedIndexation,
|
||||
))) => {
|
||||
)))
|
||||
| Err(Error::AbortedTask) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::AbortedIndexation);
|
||||
wtxn.abort().map_err(Error::HeedTransaction)?;
|
||||
wtxn.abort();
|
||||
|
||||
// We make sure that we don't call `stop_processing` on the `processing_tasks`,
|
||||
// this is because we want to let the next tick call `create_next_batch` and keep
|
||||
@ -1191,7 +1206,7 @@ impl IndexScheduler {
|
||||
let index_uid = index_uid.unwrap();
|
||||
// fixme: handle error more gracefully? not sure when this could happen
|
||||
self.index_mapper.resize_index(&wtxn, &index_uid)?;
|
||||
wtxn.abort().map_err(Error::HeedTransaction)?;
|
||||
wtxn.abort();
|
||||
|
||||
return Ok(TickOutcome::TickAgain(0));
|
||||
}
|
||||
@ -1337,7 +1352,7 @@ impl IndexScheduler {
|
||||
|
||||
pub struct Dump<'a> {
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
wtxn: RwTxn<'a, 'a>,
|
||||
wtxn: RwTxn<'a>,
|
||||
|
||||
indexes: HashMap<String, RoaringBitmap>,
|
||||
statuses: HashMap<Status, RoaringBitmap>,
|
||||
@ -1452,7 +1467,7 @@ impl<'a> Dump<'a> {
|
||||
},
|
||||
};
|
||||
|
||||
self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?;
|
||||
self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
match self.indexes.get_mut(index) {
|
||||
@ -1494,8 +1509,8 @@ impl<'a> Dump<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid);
|
||||
self.statuses.entry(task.status).or_default().insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
@ -4323,4 +4338,26 @@ mod tests {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_processing_dump() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
|
||||
let dump_cancellation = KindWithContent::TaskCancelation {
|
||||
query: "cancel dump".to_owned(),
|
||||
tasks: RoaringBitmap::from_iter([0]),
|
||||
};
|
||||
let _ = index_scheduler.register(dump_creation).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
|
||||
handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
|
||||
|
||||
let _ = index_scheduler.register(dump_cancellation).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
|
||||
|
||||
snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
|
||||
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,35 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,45 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [1,]
|
||||
canceled [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"taskCancelation" [1,]
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
1 [0,]
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -0,0 +1,38 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"taskCancelation" [1,]
|
||||
"dumpCreation" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -3,9 +3,9 @@
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ops::Bound;
|
||||
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
|
||||
use meilisearch_types::heed::types::DecodeIgnore;
|
||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::milli::CboRoaringBitmapCodec;
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
@ -18,7 +18,7 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
|
||||
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k.get() + 1))
|
||||
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
|
||||
}
|
||||
|
||||
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
|
||||
@ -26,7 +26,7 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
|
||||
Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
|
||||
Ok(self.all_tasks.get(rtxn, &task_id)?)
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
|
||||
@ -88,7 +88,7 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -169,11 +169,11 @@ impl IndexScheduler {
|
||||
|
||||
pub(crate) fn insert_task_datetime(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
time: OffsetDateTime,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
let timestamp = BEI128::new(time.unix_timestamp_nanos());
|
||||
let timestamp = time.unix_timestamp_nanos();
|
||||
let mut task_ids = database.get(wtxn, ×tamp)?.unwrap_or_default();
|
||||
task_ids.insert(task_id);
|
||||
database.put(wtxn, ×tamp, &RoaringBitmap::from_iter(task_ids))?;
|
||||
@ -182,11 +182,11 @@ pub(crate) fn insert_task_datetime(
|
||||
|
||||
pub(crate) fn remove_task_datetime(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
time: OffsetDateTime,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
let timestamp = BEI128::new(time.unix_timestamp_nanos());
|
||||
let timestamp = time.unix_timestamp_nanos();
|
||||
if let Some(mut existing) = database.get(wtxn, ×tamp)? {
|
||||
existing.remove(task_id);
|
||||
if existing.is_empty() {
|
||||
@ -202,7 +202,7 @@ pub(crate) fn remove_task_datetime(
|
||||
pub(crate) fn keep_tasks_within_datetimes(
|
||||
rtxn: &RoTxn,
|
||||
tasks: &mut RoaringBitmap,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
database: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
after: Option<OffsetDateTime>,
|
||||
before: Option<OffsetDateTime>,
|
||||
) -> Result<()> {
|
||||
@ -213,8 +213,8 @@ pub(crate) fn keep_tasks_within_datetimes(
|
||||
(Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
|
||||
};
|
||||
let mut collected_task_ids = RoaringBitmap::new();
|
||||
let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
|
||||
let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
|
||||
let start = map_bound(start, |b| b.unix_timestamp_nanos());
|
||||
let end = map_bound(end, |b| b.unix_timestamp_nanos());
|
||||
let iter = database.range(rtxn, &(start, end))?;
|
||||
for r in iter {
|
||||
let (_timestamp, task_ids) = r?;
|
||||
@ -337,8 +337,6 @@ impl IndexScheduler {
|
||||
let rtxn = self.env.read_txn().unwrap();
|
||||
for task in self.all_tasks.iter(&rtxn).unwrap() {
|
||||
let (task_id, task) = task.unwrap();
|
||||
let task_id = task_id.get();
|
||||
|
||||
let task_index_uid = task.index_uid().map(ToOwned::to_owned);
|
||||
|
||||
let Task {
|
||||
@ -361,16 +359,13 @@ impl IndexScheduler {
|
||||
.unwrap()
|
||||
.contains(task.uid));
|
||||
}
|
||||
let db_enqueued_at = self
|
||||
.enqueued_at
|
||||
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let db_enqueued_at =
|
||||
self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap();
|
||||
assert!(db_enqueued_at.contains(task_id));
|
||||
if let Some(started_at) = started_at {
|
||||
let db_started_at = self
|
||||
.started_at
|
||||
.get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
|
||||
.get(&rtxn, &started_at.unix_timestamp_nanos())
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert!(db_started_at.contains(task_id));
|
||||
@ -378,7 +373,7 @@ impl IndexScheduler {
|
||||
if let Some(finished_at) = finished_at {
|
||||
let db_finished_at = self
|
||||
.finished_at
|
||||
.get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
|
||||
.get(&rtxn, &finished_at.unix_timestamp_nanos())
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert!(db_finished_at.contains(task_id));
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilisearch_types::heed::{BytesDecode, BytesEncode};
|
||||
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// A heed codec for value of struct Uuid.
|
||||
@ -10,15 +10,15 @@ pub struct UuidCodec;
|
||||
impl<'a> BytesDecode<'a> for UuidCodec {
|
||||
type DItem = Uuid;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
bytes.try_into().ok().map(Uuid::from_bytes)
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesEncode<'_> for UuidCodec {
|
||||
type EItem = Uuid;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
Some(Cow::Borrowed(item.as_bytes()))
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item.as_bytes()))
|
||||
}
|
||||
}
|
||||
|
@ -4,17 +4,20 @@ use std::collections::HashSet;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use std::result::Result as StdResult;
|
||||
use std::str;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use hmac::{Hmac, Mac};
|
||||
use meilisearch_types::heed::BoxedError;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::KeyId;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
|
||||
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
|
||||
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
|
||||
use sha2::Sha256;
|
||||
use thiserror::Error;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::fmt::Hyphenated;
|
||||
use uuid::Uuid;
|
||||
@ -30,7 +33,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat
|
||||
#[derive(Clone)]
|
||||
pub struct HeedAuthStore {
|
||||
env: Arc<Env>,
|
||||
keys: Database<ByteSlice, SerdeJson<Key>>,
|
||||
keys: Database<Bytes, SerdeJson<Key>>,
|
||||
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
|
||||
should_close_on_drop: bool,
|
||||
}
|
||||
@ -276,7 +279,7 @@ impl HeedAuthStore {
|
||||
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
|
||||
let mut iter = self
|
||||
.action_keyid_index_expiration
|
||||
.remap_types::<ByteSlice, DecodeIgnore>()
|
||||
.remap_types::<Bytes, DecodeIgnore>()
|
||||
.prefix_iter_mut(wtxn, key.as_bytes())?;
|
||||
while iter.next().transpose()?.is_some() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
@ -294,23 +297,24 @@ pub struct KeyIdActionCodec;
|
||||
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
|
||||
type DItem = (KeyId, Action, Option<&'a [u8]>);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (key_id_bytes, action_bytes) = try_split_array_at(bytes)?;
|
||||
let (action_bytes, index) = match try_split_array_at(action_bytes)? {
|
||||
(action, []) => (action, None),
|
||||
(action, index) => (action, Some(index)),
|
||||
};
|
||||
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
|
||||
let (key_id_bytes, action_bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
|
||||
let (&action_byte, index) =
|
||||
match try_split_array_at(action_bytes).ok_or(SliceTooShortError)? {
|
||||
([action], []) => (action, None),
|
||||
([action], index) => (action, Some(index)),
|
||||
};
|
||||
let key_id = Uuid::from_bytes(*key_id_bytes);
|
||||
let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?;
|
||||
let action = Action::from_repr(action_byte).ok_or(InvalidActionError { action_byte })?;
|
||||
|
||||
Some((key_id, action, index))
|
||||
Ok((key_id, action, index))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
|
||||
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
|
||||
|
||||
fn bytes_encode((key_id, action, index): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
bytes.extend_from_slice(key_id.as_bytes());
|
||||
@ -320,10 +324,20 @@ impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
|
||||
bytes.extend_from_slice(index);
|
||||
}
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("the slice is too short")]
|
||||
pub struct SliceTooShortError;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("cannot construct a valid Action from {action_byte}")]
|
||||
pub struct InvalidActionError {
|
||||
pub action_byte: u8,
|
||||
}
|
||||
|
||||
pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String {
|
||||
// format uid as hyphenated allowing user to generate their own keys.
|
||||
let mut uid_buffer = [0; Hyphenated::LENGTH];
|
||||
|
@ -15,7 +15,7 @@ actix-web = { version = "4.3.1", default-features = false }
|
||||
anyhow = "1.0.70"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.2.1"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
deserr = { version = "0.6.0", features = ["actix-web"] }
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"]
|
||||
japanese = ["milli/japanese"]
|
||||
# thai specialized tokenization
|
||||
thai = ["milli/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["milli/greek"]
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["milli/khmer"]
|
||||
|
@ -252,6 +252,7 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
||||
@ -386,11 +387,11 @@ impl ErrorCode for HeedError {
|
||||
HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile,
|
||||
HeedError::Io(e) => e.error_code(),
|
||||
HeedError::Mdb(_)
|
||||
| HeedError::Encoding
|
||||
| HeedError::Decoding
|
||||
| HeedError::Encoding(_)
|
||||
| HeedError::Decoding(_)
|
||||
| HeedError::InvalidDatabaseTyping
|
||||
| HeedError::DatabaseClosing
|
||||
| HeedError::BadOpenOptions => Code::Internal,
|
||||
| HeedError::BadOpenOptions { .. } => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub vector_store: bool,
|
||||
pub metrics: bool,
|
||||
pub export_puffin_reports: bool,
|
||||
pub proximity_precision: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
|
@ -8,6 +8,7 @@ use std::str::FromStr;
|
||||
|
||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||
use fst::IntoStreamer;
|
||||
use milli::proximity::ProximityPrecision;
|
||||
use milli::update::Setting;
|
||||
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
@ -186,6 +187,9 @@ pub struct Settings<T> {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
|
||||
pub distinct_attribute: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
|
||||
pub proximity_precision: Setting<ProximityPrecisionView>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
|
||||
pub typo_tolerance: Setting<TypoSettings>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
@ -214,6 +218,7 @@ impl Settings<Checked> {
|
||||
separator_tokens: Setting::Reset,
|
||||
dictionary: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
proximity_precision: Setting::Reset,
|
||||
typo_tolerance: Setting::Reset,
|
||||
faceting: Setting::Reset,
|
||||
pagination: Setting::Reset,
|
||||
@ -234,6 +239,7 @@ impl Settings<Checked> {
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
proximity_precision,
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
@ -252,6 +258,7 @@ impl Settings<Checked> {
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
proximity_precision,
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
@ -296,6 +303,7 @@ impl Settings<Unchecked> {
|
||||
separator_tokens: self.separator_tokens,
|
||||
dictionary: self.dictionary,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
proximity_precision: self.proximity_precision,
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
faceting: self.faceting,
|
||||
pagination: self.pagination,
|
||||
@ -390,6 +398,12 @@ pub fn apply_settings_to_builder(
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.proximity_precision {
|
||||
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
|
||||
Setting::Reset => builder.reset_proximity_precision(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.typo_tolerance {
|
||||
Setting::Set(ref value) => {
|
||||
match value.enabled {
|
||||
@ -509,6 +523,8 @@ pub fn settings(
|
||||
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||
|
||||
let proximity_precision = index.proximity_precision(rtxn)?.map(ProximityPrecisionView::from);
|
||||
|
||||
let synonyms = index.user_defined_synonyms(rtxn)?;
|
||||
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
@ -532,7 +548,10 @@ pub fn settings(
|
||||
|
||||
let faceting = FacetingSettings {
|
||||
max_values_per_facet: Setting::Set(
|
||||
index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||
index
|
||||
.max_values_per_facet(rtxn)?
|
||||
.map(|x| x as usize)
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||
),
|
||||
sort_facet_values_by: Setting::Set(
|
||||
index
|
||||
@ -545,7 +564,10 @@ pub fn settings(
|
||||
|
||||
let pagination = PaginationSettings {
|
||||
max_total_hits: Setting::Set(
|
||||
index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||
index
|
||||
.pagination_max_total_hits(rtxn)?
|
||||
.map(|x| x as usize)
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||
),
|
||||
};
|
||||
|
||||
@ -569,6 +591,10 @@ pub fn settings(
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
proximity_precision: match proximity_precision {
|
||||
Some(precision) => Setting::Set(precision),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
synonyms: Setting::Set(synonyms),
|
||||
typo_tolerance: Setting::Set(typo_tolerance),
|
||||
faceting: Setting::Set(faceting),
|
||||
@ -673,6 +699,31 @@ impl From<RankingRuleView> for Criterion {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub enum ProximityPrecisionView {
|
||||
WordScale,
|
||||
AttributeScale,
|
||||
}
|
||||
|
||||
impl From<ProximityPrecision> for ProximityPrecisionView {
|
||||
fn from(value: ProximityPrecision) -> Self {
|
||||
match value {
|
||||
ProximityPrecision::WordScale => ProximityPrecisionView::WordScale,
|
||||
ProximityPrecision::AttributeScale => ProximityPrecisionView::AttributeScale,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl From<ProximityPrecisionView> for ProximityPrecision {
|
||||
fn from(value: ProximityPrecisionView) -> Self {
|
||||
match value {
|
||||
ProximityPrecisionView::WordScale => ProximityPrecision::WordScale,
|
||||
ProximityPrecisionView::AttributeScale => ProximityPrecision::AttributeScale,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use super::*;
|
||||
@ -692,6 +743,7 @@ pub(crate) mod test {
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
proximity_precision: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::NotSet,
|
||||
pagination: Setting::NotSet,
|
||||
@ -716,6 +768,7 @@ pub(crate) mod test {
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
proximity_precision: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::NotSet,
|
||||
pagination: Setting::NotSet,
|
||||
|
@ -39,7 +39,7 @@ byte-unit = { version = "4.0.19", default-features = false, features = [
|
||||
bytes = "1.4.0"
|
||||
clap = { version = "4.2.1", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
deserr = { version = "0.6.0", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.8.1"
|
||||
env_logger = "0.10.0"
|
||||
@ -150,6 +150,7 @@ hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
thai = ["meilisearch-types/thai"]
|
||||
greek = ["meilisearch-types/greek"]
|
||||
khmer = ["meilisearch-types/khmer"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
|
||||
|
@ -397,6 +397,7 @@ pub fn configure_data(
|
||||
.app_data(web::Data::from(analytics))
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
.limit(http_payload_size_limit)
|
||||
.content_type(|mime| mime == mime::APPLICATION_JSON)
|
||||
.error_handler(|err, req: &HttpRequest| match err {
|
||||
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
|
||||
|
@ -48,6 +48,8 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub metrics: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub export_puffin_reports: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub proximity_precision: Option<bool>,
|
||||
}
|
||||
|
||||
async fn patch_features(
|
||||
@ -70,6 +72,10 @@ async fn patch_features(
|
||||
.0
|
||||
.export_puffin_reports
|
||||
.unwrap_or(old_features.export_puffin_reports),
|
||||
proximity_precision: new_features
|
||||
.0
|
||||
.proximity_precision
|
||||
.unwrap_or(old_features.proximity_precision),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
@ -80,6 +86,7 @@ async fn patch_features(
|
||||
vector_store,
|
||||
metrics,
|
||||
export_puffin_reports,
|
||||
proximity_precision,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
@ -89,6 +96,7 @@ async fn patch_features(
|
||||
"vector_store": vector_store,
|
||||
"metrics": metrics,
|
||||
"export_puffin_reports": export_puffin_reports,
|
||||
"proximity_precision": proximity_precision,
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
@ -3,7 +3,7 @@ use std::io::ErrorKind;
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
|
||||
use bstr::ByteSlice;
|
||||
use bstr::ByteSlice as _;
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
use futures::StreamExt;
|
||||
|
@ -78,6 +78,7 @@ macro_rules! make_setting_route {
|
||||
|
||||
let body = body.into_inner();
|
||||
|
||||
#[allow(clippy::redundant_closure_call)]
|
||||
$analytics(&body, &req);
|
||||
|
||||
let new_settings = Settings {
|
||||
@ -434,6 +435,30 @@ make_setting_route!(
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/proximity-precision",
|
||||
put,
|
||||
meilisearch_types::settings::ProximityPrecisionView,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
|
||||
>,
|
||||
proximity_precision,
|
||||
"proximityPrecision",
|
||||
analytics,
|
||||
|precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
analytics.publish(
|
||||
"ProximityPrecision Updated".to_string(),
|
||||
json!({
|
||||
"proximity_precision": {
|
||||
"set": precision.is_some(),
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/ranking-rules",
|
||||
put,
|
||||
@ -540,6 +565,7 @@ generate_configure!(
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
distinct_attribute,
|
||||
proximity_precision,
|
||||
stop_words,
|
||||
separator_tokens,
|
||||
non_separator_tokens,
|
||||
@ -593,6 +619,9 @@ pub async fn update_all(
|
||||
"distinct_attribute": {
|
||||
"set": new_settings.distinct_attribute.as_ref().set().is_some()
|
||||
},
|
||||
"proximity_precision": {
|
||||
"set": new_settings.proximity_precision.as_ref().set().is_some()
|
||||
},
|
||||
"typo_tolerance": {
|
||||
"enabled": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
|
@ -46,49 +46,46 @@ pub async fn multi_search_with_post(
|
||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||
// changes.
|
||||
let search_results: Result<_, (ResponseError, usize)> = (|| {
|
||||
async {
|
||||
let mut search_results = Vec::with_capacity(queries.len());
|
||||
for (query_index, (index_uid, mut query)) in
|
||||
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
|
||||
let search_results: Result<_, (ResponseError, usize)> = async {
|
||||
let mut search_results = Vec::with_capacity(queries.len());
|
||||
for (query_index, (index_uid, mut query)) in
|
||||
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
|
||||
{
|
||||
debug!("multi-search #{query_index}: called with params: {:?}", query);
|
||||
|
||||
// Check index from API key
|
||||
if !index_scheduler.filters().is_index_authorized(&index_uid) {
|
||||
return Err(AuthenticationError::InvalidToken).with_index(query_index);
|
||||
}
|
||||
// Apply search rules from tenant token
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
|
||||
{
|
||||
debug!("multi-search #{query_index}: called with params: {:?}", query);
|
||||
add_search_rules(&mut query, search_rules);
|
||||
}
|
||||
|
||||
// Check index from API key
|
||||
if !index_scheduler.filters().is_index_authorized(&index_uid) {
|
||||
return Err(AuthenticationError::InvalidToken).with_index(query_index);
|
||||
}
|
||||
// Apply search rules from tenant token
|
||||
if let Some(search_rules) =
|
||||
index_scheduler.filters().get_index_search_rules(&index_uid)
|
||||
{
|
||||
add_search_rules(&mut query, search_rules);
|
||||
}
|
||||
let index = index_scheduler
|
||||
.index(&index_uid)
|
||||
.map_err(|err| {
|
||||
let mut err = ResponseError::from(err);
|
||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||
// here the resource not found is not part of the URL.
|
||||
err.code = StatusCode::BAD_REQUEST;
|
||||
err
|
||||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
let index = index_scheduler
|
||||
.index(&index_uid)
|
||||
.map_err(|err| {
|
||||
let mut err = ResponseError::from(err);
|
||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||
// here the resource not found is not part of the URL.
|
||||
err.code = StatusCode::BAD_REQUEST;
|
||||
err
|
||||
})
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features))
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_result =
|
||||
tokio::task::spawn_blocking(move || perform_search(&index, query, features))
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
search_results.push(SearchResultWithIndex {
|
||||
index_uid: index_uid.into_inner(),
|
||||
result: search_result.with_index(query_index)?,
|
||||
});
|
||||
}
|
||||
Ok(search_results)
|
||||
search_results.push(SearchResultWithIndex {
|
||||
index_uid: index_uid.into_inner(),
|
||||
result: search_result.with_index(query_index)?,
|
||||
});
|
||||
}
|
||||
})()
|
||||
Ok(search_results)
|
||||
}
|
||||
.await;
|
||||
|
||||
if search_results.is_ok() {
|
||||
|
@ -360,6 +360,7 @@ fn prepare_search<'t>(
|
||||
let max_total_hits = index
|
||||
.pagination_max_total_hits(rtxn)
|
||||
.map_err(milli::Error::from)?
|
||||
.map(|x| x as usize)
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
search.exhaustive_number_hits(is_finite_pagination);
|
||||
@ -586,6 +587,7 @@ pub fn perform_search(
|
||||
let max_values_by_facet = index
|
||||
.max_values_per_facet(&rtxn)
|
||||
.map_err(milli::Error::from)?
|
||||
.map(|x| x as usize)
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
||||
facet_distribution.max_values_per_facet(max_values_by_facet);
|
||||
|
||||
|
Binary file not shown.
@ -5,9 +5,11 @@ pub mod service;
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
#[allow(unused)]
|
||||
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
|
||||
use meili_snap::json_string;
|
||||
use serde::{Deserialize, Serialize};
|
||||
#[allow(unused)]
|
||||
pub use server::{default_settings, Server};
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||
|
@ -20,6 +20,8 @@ pub enum GetDump {
|
||||
RubyGemsWithSettingsV4,
|
||||
|
||||
TestV5,
|
||||
|
||||
TestV6WithExperimental,
|
||||
}
|
||||
|
||||
impl GetDump {
|
||||
@ -68,6 +70,10 @@ impl GetDump {
|
||||
GetDump::TestV5 => {
|
||||
exist_relative_path!("tests/assets/v5_v0.28.0_test_dump.dump").into()
|
||||
}
|
||||
GetDump::TestV6WithExperimental => exist_relative_path!(
|
||||
"tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump"
|
||||
)
|
||||
.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -59,6 +59,7 @@ async fn import_dump_v1_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -219,6 +220,7 @@ async fn import_dump_v1_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -365,6 +367,7 @@ async fn import_dump_v1_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -497,6 +500,7 @@ async fn import_dump_v2_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -641,6 +645,7 @@ async fn import_dump_v2_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -784,6 +789,7 @@ async fn import_dump_v2_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -916,6 +922,7 @@ async fn import_dump_v3_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1060,6 +1067,7 @@ async fn import_dump_v3_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1203,6 +1211,7 @@ async fn import_dump_v3_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1335,6 +1344,7 @@ async fn import_dump_v4_movie_raw() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1479,6 +1489,7 @@ async fn import_dump_v4_movie_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1622,6 +1633,7 @@ async fn import_dump_v4_rubygems_with_settings() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": null,
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
@ -1810,3 +1822,108 @@ async fn import_dump_v5() {
|
||||
json_string!(tasks, { ".results[].details.dumpUid" => "[uid]", ".results[].duration" => "[duration]" , ".results[].startedAt" => "[date]" , ".results[].finishedAt" => "[date]" })
|
||||
);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn import_dump_v6_containing_experimental_features() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
|
||||
let options = Opt {
|
||||
import_dump: Some(GetDump::TestV6WithExperimental.path()),
|
||||
..default_settings(temp.path())
|
||||
};
|
||||
let mut server = Server::new_auth_with_options(options, temp).await;
|
||||
server.use_api_key("MASTER_KEY");
|
||||
|
||||
let (indexes, code) = server.list_indexes(None, None).await;
|
||||
assert_eq!(code, 200, "{indexes}");
|
||||
|
||||
assert_eq!(indexes["results"].as_array().unwrap().len(), 1);
|
||||
assert_eq!(indexes["results"][0]["uid"], json!("movies"));
|
||||
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
|
||||
|
||||
let (response, code) = server.get_features().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let index = server.index("movies");
|
||||
|
||||
let (response, code) = index.settings().await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity"
|
||||
],
|
||||
"stopWords": [],
|
||||
"nonSeparatorTokens": [],
|
||||
"separatorTokens": [],
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": "attributeScale",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
"oneTypo": 5,
|
||||
"twoTypos": 9
|
||||
},
|
||||
"disableOnWords": [],
|
||||
"disableOnAttributes": []
|
||||
},
|
||||
"faceting": {
|
||||
"maxValuesPerFacet": 100,
|
||||
"sortFacetValuesBy": {
|
||||
"*": "alpha"
|
||||
}
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// the expected order is [1, 3, 2] instead of [3, 1, 2]
|
||||
// because the attribute scale doesn't make the difference between 1 and 3.
|
||||
index
|
||||
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
@ -21,7 +21,8 @@ async fn experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -33,7 +34,8 @@ async fn experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -45,7 +47,8 @@ async fn experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -58,7 +61,8 @@ async fn experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -71,7 +75,8 @@ async fn experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
@ -91,7 +96,8 @@ async fn experimental_feature_metrics() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": true,
|
||||
"exportPuffinReports": false
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -146,7 +152,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`, `proximityPrecision`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
@ -4,23 +4,111 @@ use once_cell::sync::Lazy;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{"productId": 1, "shopId": 1},
|
||||
{"productId": 2, "shopId": 1},
|
||||
{"productId": 3, "shopId": 2},
|
||||
{"productId": 4, "shopId": 2},
|
||||
{"productId": 5, "shopId": 3},
|
||||
{"productId": 6, "shopId": 3},
|
||||
{"productId": 7, "shopId": 4},
|
||||
{"productId": 8, "shopId": 4},
|
||||
{"productId": 9, "shopId": 5},
|
||||
{"productId": 10, "shopId": 5}
|
||||
{
|
||||
"id": 1,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Brown"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": "Red"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"description": "Running Shoes",
|
||||
"brand": "Adidas",
|
||||
"product_id": "456789",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"description": "Running Shoes",
|
||||
"brand": "Adidas",
|
||||
"product_id": "456789",
|
||||
"color": "White"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"description": "Hoodie",
|
||||
"brand": "Puma",
|
||||
"product_id": "987654",
|
||||
"color": "Gray"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Green"
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Red"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Indigo"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Stone Wash"
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
|
||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
|
||||
static DOCUMENT_PRIMARY_KEY: &str = "id";
|
||||
static DOCUMENT_DISTINCT_KEY: &str = "product_id";
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
#[actix_rt::test]
|
||||
@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() {
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(Value(response): Value) -> Vec<i64> {
|
||||
fn get_hits(response: &Value) -> Vec<&str> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[1, 2]");
|
||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||
snapshot!(response["estimatedTotalHits"] , @"11");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[3, 4]");
|
||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"10");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"1");
|
||||
snapshot!(format!("{:?}", hits), @"[5]");
|
||||
snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
}
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4130
|
||||
#[actix_rt::test]
|
||||
async fn distinct_search_with_pagination_no_ranking() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(response: &Value) -> Vec<&str> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["page"], @"0");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||
snapshot!(response["page"], @"1");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||
snapshot!(response["page"], @"2");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||
snapshot!(response["page"], @"3");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["page"], @"4");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"3");
|
||||
snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
|
||||
snapshot!(response["page"], @"2");
|
||||
snapshot!(response["totalPages"], @"2");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
|
@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
|
@ -15,7 +15,7 @@ use once_cell::sync::Lazy;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
@ -40,7 +40,7 @@ pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
])
|
||||
});
|
||||
|
||||
pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 852,
|
||||
|
@ -54,7 +54,7 @@ async fn get_settings() {
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
let settings = response.as_object().unwrap();
|
||||
assert_eq!(settings.keys().len(), 14);
|
||||
assert_eq!(settings.keys().len(), 15);
|
||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||
|
@ -1,4 +1,5 @@
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod get_settings;
|
||||
mod proximity_settings;
|
||||
mod tokenizer_customization;
|
||||
|
396
meilisearch/tests/settings/proximity_settings.rs
Normal file
396
meilisearch/tests/settings/proximity_settings.rs
Normal file
@ -0,0 +1,396 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish",
|
||||
},
|
||||
])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_search() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(1).await;
|
||||
|
||||
// the expected order is [1, 3, 2] instead of [3, 1, 2]
|
||||
// because the attribute scale doesn't make the difference between 1 and 3.
|
||||
index
|
||||
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// the expected order is [1, 2, 3] instead of [1, 3, 2]
|
||||
// because the attribute scale sees all the word in the same attribute
|
||||
// and so doesn't make the difference between the documents.
|
||||
index
|
||||
.search(json!({"q": "many the fish"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_phrase_search() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
// the expected order is [1, 3] instead of [3, 1]
|
||||
// because the attribute scale doesn't make the difference between 1 and 3.
|
||||
// But 2 shouldn't be returned because "the" is not in the same attribute.
|
||||
index
|
||||
.search(json!({"q": "\"the soup of day\""}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// the expected order is [1, 2, 3] instead of [1, 3]
|
||||
// because the attribute scale sees all the word in the same attribute
|
||||
// and so doesn't make the difference between the documents.
|
||||
index
|
||||
.search(json!({"q": "\"many the fish\""}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn word_scale_set_and_reset() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
// Set and reset the setting ensuring the swap between the 2 settings is applied.
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "wordScale",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
// [3, 1, 2]
|
||||
index
|
||||
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// [1, 3, 2]
|
||||
index
|
||||
.search(json!({"q": "many the fish"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// [3]
|
||||
index
|
||||
.search(json!({"q": "\"the soup of day\""}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// [1, 3]
|
||||
index
|
||||
.search(json!({"q": "\"many the fish\""}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_default_ranking_rules() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale"
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
index.wait_task(1).await;
|
||||
|
||||
// the expected order is [3, 1, 2]
|
||||
index
|
||||
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// the expected order is [1, 3, 2] instead of [1, 3]
|
||||
// because the attribute scale sees all the word in the same attribute
|
||||
// and so doesn't remove the document 2.
|
||||
index
|
||||
.search(json!({"q": "\"many the fish\""}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"a": "Soup of the day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"a": "the Soup of day",
|
||||
"b": "many the fish"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"a": "Soup of day",
|
||||
"b": "many the lazy fish"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
19
meilitool/Cargo.toml
Normal file
19
meilitool/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "meilitool"
|
||||
description = "A CLI to edit a Meilisearch database from the command line"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
homepage.workspace = true
|
||||
readme.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.75"
|
||||
clap = { version = "4.2.1", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
time = { version = "0.3.30", features = ["formatting"] }
|
||||
uuid = { version = "1.5.0", features = ["v4"], default-features = false }
|
314
meilitool/src/main.rs
Normal file
314
meilitool/src/main.rs
Normal file
@ -0,0 +1,314 @@
|
||||
use std::fs::{read_dir, read_to_string, remove_file, File};
|
||||
use std::io::BufWriter;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use clap::{Parser, Subcommand};
|
||||
use dump::{DumpWriter, IndexMetadata};
|
||||
use file_store::FileStore;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::versioning::check_version_file;
|
||||
use meilisearch_types::Index;
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
use uuid_codec::UuidCodec;
|
||||
|
||||
mod uuid_codec;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Cli {
|
||||
/// The database path where the Meilisearch is running.
|
||||
#[arg(long, default_value = "data.ms/")]
|
||||
db_path: PathBuf,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Command {
|
||||
/// Clears the task queue and make it empty.
|
||||
///
|
||||
/// This command can be safely executed even if Meilisearch is running and processing tasks.
|
||||
/// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
|
||||
/// even the ones that were processing. However, it's highly possible that you see the processing
|
||||
/// tasks in the queue again with an associated internal error message.
|
||||
ClearTaskQueue,
|
||||
|
||||
/// Exports a dump from the Meilisearch database.
|
||||
///
|
||||
/// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
|
||||
/// If tasks are being processed while a dump is being exported there are chances for the dump to be
|
||||
/// malformed with missing tasks.
|
||||
///
|
||||
/// TODO Verify this claim or make sure it cannot happen and we can export dumps
|
||||
/// without caring about killing Meilisearch first!
|
||||
ExportADump {
|
||||
/// The directory in which the dump will be created.
|
||||
#[arg(long, default_value = "dumps/")]
|
||||
dump_dir: PathBuf,
|
||||
|
||||
/// Skip dumping the enqueued or processing tasks.
|
||||
///
|
||||
/// Can be useful when there are a lot of them and it is not particularly useful
|
||||
/// to keep them. Note that only the enqueued tasks takes up space so skipping
|
||||
/// the processed ones is not particularly interesting.
|
||||
#[arg(long)]
|
||||
skip_enqueued_tasks: bool,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let Cli { db_path, command } = Cli::parse();
|
||||
|
||||
check_version_file(&db_path).context("While checking the version file")?;
|
||||
|
||||
match command {
|
||||
Command::ClearTaskQueue => clear_task_queue(db_path),
|
||||
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
||||
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Clears the task queue located at `db_path`.
|
||||
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
||||
let path = db_path.join("tasks");
|
||||
let env = EnvOpenOptions::new()
|
||||
.max_dbs(100)
|
||||
.open(&path)
|
||||
.with_context(|| format!("While trying to open {:?}", path.display()))?;
|
||||
|
||||
eprintln!("Deleting tasks from the database...");
|
||||
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
|
||||
let total = all_tasks.len(&wtxn)?;
|
||||
let status = try_opening_poly_database(&env, &wtxn, "status")?;
|
||||
let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
|
||||
let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
|
||||
let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
|
||||
let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
|
||||
let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
|
||||
let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
|
||||
|
||||
try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
|
||||
try_clearing_poly_database(&mut wtxn, status, "status")?;
|
||||
try_clearing_poly_database(&mut wtxn, kind, "kind")?;
|
||||
try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
|
||||
try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
|
||||
try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
|
||||
try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
|
||||
try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
|
||||
|
||||
wtxn.commit().context("While committing the transaction")?;
|
||||
|
||||
eprintln!("Successfully deleted {total} tasks from the tasks database!");
|
||||
eprintln!("Deleting the content files from disk...");
|
||||
|
||||
let mut count = 0usize;
|
||||
let update_files = db_path.join("update_files");
|
||||
let entries = read_dir(&update_files).with_context(|| {
|
||||
format!("While trying to read the content of {:?}", update_files.display())
|
||||
})?;
|
||||
for result in entries {
|
||||
match result {
|
||||
Ok(ent) => match remove_file(ent.path()) {
|
||||
Ok(_) => count += 1,
|
||||
Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("Sucessfully deleted {count} content files from disk!");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn try_opening_database<KC: 'static, DC: 'static>(
|
||||
env: &Env,
|
||||
rtxn: &RoTxn,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<Database<KC, DC>> {
|
||||
env.open_database(rtxn, Some(db_name))
|
||||
.with_context(|| format!("While opening the {db_name:?} database"))?
|
||||
.with_context(|| format!("Missing the {db_name:?} database"))
|
||||
}
|
||||
|
||||
fn try_opening_poly_database(
|
||||
env: &Env,
|
||||
rtxn: &RoTxn,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<Database<Unspecified, Unspecified>> {
|
||||
env.database_options()
|
||||
.name(db_name)
|
||||
.open(rtxn)
|
||||
.with_context(|| format!("While opening the {db_name:?} poly database"))?
|
||||
.with_context(|| format!("Missing the {db_name:?} poly database"))
|
||||
}
|
||||
|
||||
fn try_clearing_poly_database(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<Unspecified, Unspecified>,
|
||||
db_name: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
|
||||
}
|
||||
|
||||
/// Exports a dump into the dump directory.
|
||||
fn export_a_dump(
|
||||
db_path: PathBuf,
|
||||
dump_dir: PathBuf,
|
||||
skip_enqueued_tasks: bool,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let started_at = OffsetDateTime::now_utc();
|
||||
|
||||
// 1. Extracts the instance UID from disk
|
||||
let instance_uid_path = db_path.join("instance-uid");
|
||||
let instance_uid = match read_to_string(&instance_uid_path) {
|
||||
Ok(content) => match content.trim().parse() {
|
||||
Ok(uuid) => Some(uuid),
|
||||
Err(e) => {
|
||||
eprintln!("Impossible to parse instance-uid: {e}");
|
||||
None
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
|
||||
let file_store =
|
||||
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
|
||||
|
||||
let index_scheduler_path = db_path.join("tasks");
|
||||
let env = EnvOpenOptions::new()
|
||||
.max_dbs(100)
|
||||
.open(&index_scheduler_path)
|
||||
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
|
||||
|
||||
eprintln!("Dumping the keys...");
|
||||
|
||||
// 2. dump the keys
|
||||
let auth_store = AuthController::new(&db_path, &None)
|
||||
.with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
|
||||
let mut dump_keys = dump.create_keys()?;
|
||||
let mut count = 0;
|
||||
for key in auth_store.list_keys()? {
|
||||
dump_keys.push_key(&key)?;
|
||||
count += 1;
|
||||
}
|
||||
dump_keys.flush()?;
|
||||
|
||||
eprintln!("Successfully dumped {count} keys!");
|
||||
|
||||
let rtxn = env.read_txn()?;
|
||||
let all_tasks: Database<BEU32, SerdeJson<Task>> =
|
||||
try_opening_database(&env, &rtxn, "all-tasks")?;
|
||||
let index_mapping: Database<Str, UuidCodec> =
|
||||
try_opening_database(&env, &rtxn, "index-mapping")?;
|
||||
|
||||
if skip_enqueued_tasks {
|
||||
eprintln!("Skip dumping the enqueued tasks...");
|
||||
} else {
|
||||
eprintln!("Dumping the enqueued tasks...");
|
||||
|
||||
// 3. dump the tasks
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
let mut count = 0;
|
||||
for ret in all_tasks.iter(&rtxn)? {
|
||||
let (_, t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file_uuid) = content_file {
|
||||
if status == Status::Enqueued {
|
||||
let content_file = file_store.get_update(content_file_uuid)?;
|
||||
|
||||
let reader =
|
||||
DocumentsBatchReader::from_reader(content_file).with_context(|| {
|
||||
format!("While reading content file {:?}", content_file_uuid)
|
||||
})?;
|
||||
|
||||
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
while let Some(doc) = cursor.next_document().with_context(|| {
|
||||
format!("While iterating on content file {:?}", content_file_uuid)
|
||||
})? {
|
||||
dump_content_file
|
||||
.push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
|
||||
}
|
||||
dump_content_file.flush()?;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
eprintln!("Successfully dumped {count} enqueued tasks!");
|
||||
}
|
||||
|
||||
eprintln!("Dumping the indexes...");
|
||||
|
||||
// 4. Dump the indexes
|
||||
let mut count = 0;
|
||||
for result in index_mapping.iter(&rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
|
||||
format!("While trying to open the index at path {:?}", index_path.display())
|
||||
})?;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
created_at: index.created_at(&rtxn)?,
|
||||
updated_at: index.updated_at(&rtxn)?,
|
||||
};
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
// 4.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
let (_id, doc) = ret?;
|
||||
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
index_dumper.push_document(&document)?;
|
||||
}
|
||||
|
||||
// 4.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
|
||||
index_dumper.settings(&settings)?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
eprintln!("Successfully dumped {count} indexes!");
|
||||
// We will not dump experimental feature settings
|
||||
eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
let path = dump_dir.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(&path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
|
||||
eprintln!("Dump exported at path {:?}", path.display());
|
||||
|
||||
Ok(())
|
||||
}
|
24
meilitool/src/uuid_codec.rs
Normal file
24
meilitool/src/uuid_codec.rs
Normal file
@ -0,0 +1,24 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// A heed codec for value of struct Uuid.
|
||||
pub struct UuidCodec;
|
||||
|
||||
impl<'a> BytesDecode<'a> for UuidCodec {
|
||||
type DItem = Uuid;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesEncode<'_> for UuidCodec {
|
||||
type EItem = Uuid;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item.as_bytes()))
|
||||
}
|
||||
}
|
@ -17,10 +17,10 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.3", default-features = false }
|
||||
charabia = { version = "0.8.5", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
deserr = "0.6.0"
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
@ -29,8 +29,8 @@ geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.5", default-features = false, features = [
|
||||
"rayon", "tempfile"
|
||||
] }
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
|
||||
"lmdb", "read-txn-no-tls"
|
||||
heed = { version = "0.20.0-alpha.9", default-features = false, features = [
|
||||
"serde-json", "serde-bincode", "read-txn-no-tls"
|
||||
] }
|
||||
indexmap = { version = "2.0.0", features = ["serde"] }
|
||||
instant-distance = { version = "0.6.1", features = ["with-serde"] }
|
||||
@ -83,7 +83,7 @@ meili-snap = { path = "../meili-snap" }
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[features]
|
||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
|
||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
|
||||
|
||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||
# For more information on this feature, see heed's Cargo.toml
|
||||
@ -107,3 +107,6 @@ thai = ["charabia/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["charabia/greek"]
|
||||
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["charabia/khmer"]
|
||||
|
@ -152,7 +152,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
valid_fields: BTreeSet<String>,
|
||||
hidden_fields: bool,
|
||||
},
|
||||
#[error("{}", HeedError::BadOpenOptions)]
|
||||
#[error("an environment is already opened with different options")]
|
||||
InvalidLmdbOpenOptions,
|
||||
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
|
||||
SortRankingRuleMissing,
|
||||
@ -326,11 +326,12 @@ impl From<HeedError> for Error {
|
||||
HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached),
|
||||
HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile),
|
||||
HeedError::Mdb(error) => InternalError(Store(error)),
|
||||
HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })),
|
||||
HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
|
||||
// TODO use the encoding
|
||||
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
|
||||
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
|
||||
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
|
||||
HeedError::DatabaseClosing => InternalError(DatabaseClosing),
|
||||
HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions),
|
||||
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use heed::types::{OwnedType, Str};
|
||||
use heed::types::Str;
|
||||
use heed::{Database, RoIter, RoTxn, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{DocumentId, BEU32};
|
||||
|
||||
@ -17,10 +16,10 @@ pub struct DocumentOperation {
|
||||
pub kind: DocumentOperationKind,
|
||||
}
|
||||
|
||||
pub struct ExternalDocumentsIds(Database<Str, OwnedType<BEU32>>);
|
||||
pub struct ExternalDocumentsIds(Database<Str, BEU32>);
|
||||
|
||||
impl ExternalDocumentsIds {
|
||||
pub fn new(db: Database<Str, OwnedType<BEU32>>) -> ExternalDocumentsIds {
|
||||
pub fn new(db: Database<Str, BEU32>) -> ExternalDocumentsIds {
|
||||
ExternalDocumentsIds(db)
|
||||
}
|
||||
|
||||
@ -30,7 +29,7 @@ impl ExternalDocumentsIds {
|
||||
}
|
||||
|
||||
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
|
||||
Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get()))
|
||||
self.0.get(rtxn, external_id.as_ref())
|
||||
}
|
||||
|
||||
/// An helper function to debug this type, returns an `HashMap` of both,
|
||||
@ -39,28 +38,11 @@ impl ExternalDocumentsIds {
|
||||
let mut map = HashMap::default();
|
||||
for result in self.0.iter(rtxn)? {
|
||||
let (external, internal) = result?;
|
||||
map.insert(external.to_owned(), internal.get());
|
||||
map.insert(external.to_owned(), internal);
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
/// Looks for the internal ids in the passed bitmap, and returns an iterator over the mapping between
|
||||
/// these internal ids and their external id.
|
||||
///
|
||||
/// The returned iterator has `Result<(String, DocumentId), RoaringBitmap>` as `Item`,
|
||||
/// where the returned values can be:
|
||||
/// - `Ok((external_id, internal_id))`: if a mapping was found
|
||||
/// - `Err(remaining_ids)`: if the external ids for some of the requested internal ids weren't found.
|
||||
/// In that case the returned bitmap contains the internal ids whose external ids were not found after traversing
|
||||
/// the entire fst.
|
||||
pub fn find_external_id_of<'t>(
|
||||
&self,
|
||||
rtxn: &'t RoTxn,
|
||||
internal_ids: RoaringBitmap,
|
||||
) -> heed::Result<ExternalToInternalOwnedIterator<'t>> {
|
||||
self.0.iter(rtxn).map(|iter| ExternalToInternalOwnedIterator { iter, internal_ids })
|
||||
}
|
||||
|
||||
/// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
|
||||
///
|
||||
/// If the list contains multiple operations on the same external id, then the result is unspecified.
|
||||
@ -73,7 +55,7 @@ impl ExternalDocumentsIds {
|
||||
for DocumentOperation { external_id, internal_id, kind } in operations {
|
||||
match kind {
|
||||
DocumentOperationKind::Create => {
|
||||
self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?;
|
||||
self.0.put(wtxn, &external_id, &internal_id)?;
|
||||
}
|
||||
DocumentOperationKind::Delete => {
|
||||
if !self.0.delete(wtxn, &external_id)? {
|
||||
@ -87,60 +69,7 @@ impl ExternalDocumentsIds {
|
||||
}
|
||||
|
||||
/// Returns an iterator over all the external ids.
|
||||
pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, OwnedType<BEU32>>> {
|
||||
pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> {
|
||||
self.0.iter(rtxn)
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over mappings between requested internal ids and external ids.
|
||||
///
|
||||
/// See [`ExternalDocumentsIds::find_external_id_of`] for details.
|
||||
pub struct ExternalToInternalOwnedIterator<'t> {
|
||||
iter: RoIter<'t, Str, OwnedType<BEU32>>,
|
||||
internal_ids: RoaringBitmap,
|
||||
}
|
||||
|
||||
impl<'t> Iterator for ExternalToInternalOwnedIterator<'t> {
|
||||
/// A result indicating if a mapping was found, or if the stream was exhausted without finding all internal ids.
|
||||
type Item = Result<(&'t str, DocumentId), RoaringBitmap>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// if all requested ids were found, we won't find any other, so short-circuit
|
||||
if self.internal_ids.is_empty() {
|
||||
return None;
|
||||
}
|
||||
loop {
|
||||
let (external, internal) = match self.iter.next() {
|
||||
Some(Ok((external, internal))) => (external, internal),
|
||||
// TODO manage this better, remove panic
|
||||
Some(Err(e)) => panic!("{}", e),
|
||||
_ => {
|
||||
// we exhausted the stream but we still have some internal ids to find
|
||||
let remaining_ids = std::mem::take(&mut self.internal_ids);
|
||||
return Some(Err(remaining_ids));
|
||||
// note: next calls to `next` will return `None` since we replaced the internal_ids
|
||||
// with the default empty bitmap
|
||||
}
|
||||
};
|
||||
let internal = internal.get();
|
||||
let was_contained = self.internal_ids.remove(internal);
|
||||
if was_contained {
|
||||
return Some(Ok((external, internal)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> ExternalToInternalOwnedIterator<'t> {
|
||||
/// Returns the bitmap of internal ids whose external id are yet to be found
|
||||
pub fn remaining_internal_ids(&self) -> &RoaringBitmap {
|
||||
&self.internal_ids
|
||||
}
|
||||
|
||||
/// Consumes this iterator and returns an iterator over only the external ids, ignoring the internal ids.
|
||||
///
|
||||
/// Use this when you don't need the mapping between the external and the internal ids.
|
||||
pub fn only_external_ids(self) -> impl Iterator<Item = Result<String, RoaringBitmap>> + 't {
|
||||
self.map(|res| res.map(|(external, _internal)| external.to_owned()))
|
||||
}
|
||||
}
|
||||
|
@ -2,26 +2,28 @@ use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
use heed::BoxedError;
|
||||
|
||||
pub struct BEU16StrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
|
||||
type DItem = (u16, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (n_bytes, str_bytes) = bytes.split_at(2);
|
||||
let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?;
|
||||
let s = str::from_utf8(str_bytes).ok()?;
|
||||
Some((n, s))
|
||||
let n = n_bytes.try_into().map(u16::from_be_bytes)?;
|
||||
let s = str::from_utf8(str_bytes)?;
|
||||
Ok((n, s))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
|
||||
type EItem = (u16, &'a str);
|
||||
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s.len() + 2);
|
||||
bytes.extend_from_slice(&n.to_be_bytes());
|
||||
bytes.extend_from_slice(s.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -2,26 +2,28 @@ use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
use heed::BoxedError;
|
||||
|
||||
pub struct BEU32StrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
|
||||
type DItem = (u32, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (n_bytes, str_bytes) = bytes.split_at(4);
|
||||
let n = n_bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
let s = str::from_utf8(str_bytes).ok()?;
|
||||
Some((n, s))
|
||||
let n = n_bytes.try_into().map(u32::from_be_bytes)?;
|
||||
let s = str::from_utf8(str_bytes)?;
|
||||
Ok((n, s))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
|
||||
type EItem = (u32, &'a str);
|
||||
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s.len() + 4);
|
||||
bytes.extend_from_slice(&n.to_be_bytes());
|
||||
bytes.extend_from_slice(s.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -1,23 +1,23 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
|
||||
/// A codec for values of type `&[u8]`. Unlike `Bytes`, its `EItem` and `DItem` associated
|
||||
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
|
||||
pub struct ByteSliceRefCodec;
|
||||
pub struct BytesRefCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for ByteSliceRefCodec {
|
||||
impl<'a> BytesEncode<'a> for BytesRefCodec {
|
||||
type EItem = &'a [u8];
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item))
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for ByteSliceRefCodec {
|
||||
impl<'a> BytesDecode<'a> for BytesRefCodec {
|
||||
type DItem = &'a [u8];
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(bytes)
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Ok(bytes)
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,9 @@
|
||||
use std::borrow::Cow;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
|
||||
use crate::heed_codec::SliceTooShortError;
|
||||
use crate::{try_split_array_at, DocumentId, FieldId};
|
||||
|
||||
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
|
||||
@ -13,16 +14,16 @@ where
|
||||
{
|
||||
type DItem = (FieldId, DocumentId, C::DItem);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
|
||||
let field_id = u16::from_be_bytes(field_id_bytes);
|
||||
|
||||
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||
let (document_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
|
||||
let value = C::bytes_decode(bytes)?;
|
||||
|
||||
Some((field_id, document_id, value))
|
||||
Ok((field_id, document_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
@ -32,13 +33,15 @@ where
|
||||
{
|
||||
type EItem = (FieldId, DocumentId, C::EItem);
|
||||
|
||||
fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode(
|
||||
(field_id, document_id, value): &'a Self::EItem,
|
||||
) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(32);
|
||||
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
|
||||
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes
|
||||
let value_bytes = C::bytes_encode(value)?;
|
||||
// variable length, if f64 -> 16 bytes, if string -> large, potentially
|
||||
bytes.extend_from_slice(&value_bytes);
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -5,8 +5,8 @@ use std::borrow::Cow;
|
||||
use std::convert::TryFrom;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use heed::types::{DecodeIgnore, OwnedType};
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::types::DecodeIgnore;
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
|
||||
@ -18,7 +18,7 @@ pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
|
||||
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
|
||||
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
|
||||
|
||||
pub type FieldIdCodec = OwnedType<BEU16>;
|
||||
pub type FieldIdCodec = BEU16;
|
||||
|
||||
/// Tries to split a slice in half at the given middle point,
|
||||
/// `None` if the slice is too short.
|
||||
@ -58,7 +58,7 @@ where
|
||||
{
|
||||
type EItem = FacetGroupKey<T::EItem>;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut v = vec![];
|
||||
v.extend_from_slice(&value.field_id.to_be_bytes());
|
||||
v.extend_from_slice(&[value.level]);
|
||||
@ -66,7 +66,7 @@ where
|
||||
let bound = T::bytes_encode(&value.left_bound)?;
|
||||
v.extend_from_slice(&bound);
|
||||
|
||||
Some(Cow::Owned(v))
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
|
||||
@ -75,11 +75,11 @@ where
|
||||
{
|
||||
type DItem = FacetGroupKey<T::DItem>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?);
|
||||
let level = bytes[2];
|
||||
let bound = T::bytes_decode(&bytes[3..])?;
|
||||
Some(FacetGroupKey { field_id: fid, level, left_bound: bound })
|
||||
Ok(FacetGroupKey { field_id: fid, level, left_bound: bound })
|
||||
}
|
||||
}
|
||||
|
||||
@ -87,17 +87,17 @@ pub struct FacetGroupValueCodec;
|
||||
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
type EItem = FacetGroupValue;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut v = vec![value.size];
|
||||
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
||||
Some(Cow::Owned(v))
|
||||
Ok(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
type DItem = FacetGroupValue;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let size = bytes[0];
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
|
||||
Some(FacetGroupValue { size, bitmap })
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
|
||||
Ok(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
@ -1,37 +1,45 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::heed_codec::SliceTooShortError;
|
||||
|
||||
pub struct OrderedF64Codec;
|
||||
|
||||
impl<'a> BytesDecode<'a> for OrderedF64Codec {
|
||||
type DItem = f64;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
if bytes.len() < 16 {
|
||||
return None;
|
||||
Err(SliceTooShortError.into())
|
||||
} else {
|
||||
bytes[8..].try_into().map(f64::from_be_bytes).map_err(Into::into)
|
||||
}
|
||||
let f = bytes[8..].try_into().ok().map(f64::from_be_bytes)?;
|
||||
Some(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for OrderedF64Codec {
|
||||
type EItem = f64;
|
||||
|
||||
fn bytes_encode(f: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut buffer = [0u8; 16];
|
||||
|
||||
// write the globally ordered float
|
||||
let bytes = f64_into_bytes(*f)?;
|
||||
let bytes = f64_into_bytes(*f).ok_or(InvalidGloballyOrderedFloatError { float: *f })?;
|
||||
buffer[..8].copy_from_slice(&bytes[..]);
|
||||
// Then the f64 value just to be able to read it back
|
||||
let bytes = f.to_be_bytes();
|
||||
buffer[8..16].copy_from_slice(&bytes[..]);
|
||||
|
||||
Some(Cow::Owned(buffer.to_vec()))
|
||||
Ok(Cow::Owned(buffer.to_vec()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("the float {float} cannot be converted to a globally ordered representation")]
|
||||
pub struct InvalidGloballyOrderedFloatError {
|
||||
float: f64,
|
||||
}
|
||||
|
@ -1,5 +1,8 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::BoxedError;
|
||||
|
||||
use super::SliceTooShortError;
|
||||
use crate::{try_split_array_at, FieldId};
|
||||
|
||||
pub struct FieldIdWordCountCodec;
|
||||
@ -7,21 +10,21 @@ pub struct FieldIdWordCountCodec;
|
||||
impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
|
||||
type DItem = (FieldId, u8);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
|
||||
let field_id = u16::from_be_bytes(field_id_bytes);
|
||||
let ([word_count], _nothing) = try_split_array_at(bytes)?;
|
||||
Some((field_id, word_count))
|
||||
let ([word_count], _nothing) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
|
||||
Ok((field_id, word_count))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
|
||||
type EItem = (FieldId, u8);
|
||||
|
||||
fn bytes_encode((field_id, word_count): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(2 + 1);
|
||||
bytes.extend_from_slice(&field_id.to_be_bytes());
|
||||
bytes.push(*word_count);
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use fst::Set;
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `Set<&[u8]>`.
|
||||
pub struct FstSetCodec;
|
||||
@ -9,15 +9,15 @@ pub struct FstSetCodec;
|
||||
impl<'a> BytesEncode<'a> for FstSetCodec {
|
||||
type EItem = Set<Vec<u8>>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item.as_fst().as_bytes()))
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item.as_fst().as_bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for FstSetCodec {
|
||||
type DItem = Set<&'a [u8]>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Set::new(bytes).ok()
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Set::new(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
@ -12,8 +12,10 @@ mod str_beu32_codec;
|
||||
mod str_ref;
|
||||
mod str_str_u8_codec;
|
||||
|
||||
pub use byte_slice_ref::ByteSliceRefCodec;
|
||||
pub use byte_slice_ref::BytesRefCodec;
|
||||
use heed::BoxedError;
|
||||
pub use str_ref::StrRefCodec;
|
||||
use thiserror::Error;
|
||||
|
||||
pub use self::beu16_str_codec::BEU16StrCodec;
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
@ -31,5 +33,9 @@ pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||
pub trait BytesDecodeOwned {
|
||||
type DItem;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>;
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError>;
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("the slice is too short")]
|
||||
pub struct SliceTooShortError;
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::BoxedError;
|
||||
use obkv::{KvReaderU16, KvWriterU16};
|
||||
|
||||
pub struct ObkvCodec;
|
||||
@ -7,15 +8,15 @@ pub struct ObkvCodec;
|
||||
impl<'a> heed::BytesDecode<'a> for ObkvCodec {
|
||||
type DItem = KvReaderU16<'a>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(KvReaderU16::new(bytes))
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Ok(KvReaderU16::new(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for ObkvCodec {
|
||||
type EItem = KvWriterU16<Vec<u8>>;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
item.clone().into_inner().map(Cow::Owned).ok()
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
item.clone().into_inner().map(Cow::Owned).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::mem::size_of;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
@ -19,22 +19,22 @@ impl BoRoaringBitmapCodec {
|
||||
impl BytesDecode<'_> for BoRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
|
||||
for chunk in bytes.chunks(size_of::<u32>()) {
|
||||
let bytes = chunk.try_into().ok()?;
|
||||
let bytes = chunk.try_into()?;
|
||||
bitmap.push(u32::from_ne_bytes(bytes));
|
||||
}
|
||||
|
||||
Some(bitmap)
|
||||
Ok(bitmap)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for BoRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
@ -42,9 +42,9 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec {
|
||||
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut out = Vec::new();
|
||||
BoRoaringBitmapCodec::serialize_into(item, &mut out);
|
||||
Some(Cow::Owned(out))
|
||||
Ok(Cow::Owned(out))
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ use std::io;
|
||||
use std::mem::size_of;
|
||||
|
||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
use heed::BoxedError;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
@ -132,26 +133,26 @@ impl CboRoaringBitmapCodec {
|
||||
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::deserialize_from(bytes).ok()
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for CboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::deserialize_from(bytes).ok()
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut vec = Vec::with_capacity(Self::serialized_size(item));
|
||||
Self::serialize_into(item, &mut vec);
|
||||
Some(Cow::Owned(vec))
|
||||
Ok(Cow::Owned(vec))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::BoxedError;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
@ -9,25 +10,25 @@ pub struct RoaringBitmapCodec;
|
||||
impl heed::BytesDecode<'_> for RoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmap::deserialize_unchecked_from(bytes).ok()
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
RoaringBitmap::deserialize_unchecked_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for RoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmap::deserialize_from(bytes).ok()
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
RoaringBitmap::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(item.serialized_size());
|
||||
item.serialize_into(&mut bytes).ok()?;
|
||||
Some(Cow::Owned(bytes))
|
||||
item.serialize_into(&mut bytes)?;
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::mem;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
@ -9,15 +9,15 @@ pub struct BoRoaringBitmapLenCodec;
|
||||
impl BytesDecode<'_> for BoRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Some((bytes.len() / mem::size_of::<u32>()) as u64)
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Ok((bytes.len() / mem::size_of::<u32>()) as u64)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for BoRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::mem;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use heed::{BoxedError, BytesDecode};
|
||||
|
||||
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
|
||||
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
|
||||
@ -11,7 +11,7 @@ pub struct CboRoaringBitmapLenCodec;
|
||||
impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
if bytes.len() <= THRESHOLD * mem::size_of::<u32>() {
|
||||
// If there is threshold or less than threshold integers that can fit into this array
|
||||
// of bytes it means that we used the ByteOrder codec serializer.
|
||||
@ -27,7 +27,7 @@ impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
|
||||
impl BytesDecodeOwned for CboRoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::bytes_decode(bytes)
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ use std::io::{self, BufRead, Read};
|
||||
use std::mem;
|
||||
|
||||
use byteorder::{LittleEndian, ReadBytesExt};
|
||||
use heed::BoxedError;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
@ -56,16 +57,16 @@ impl RoaringBitmapLenCodec {
|
||||
impl heed::BytesDecode<'_> for RoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for RoaringBitmapLenCodec {
|
||||
type DItem = u64;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,30 +1,31 @@
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::CStr;
|
||||
use std::str;
|
||||
|
||||
use charabia::{Language, Script};
|
||||
use heed::BoxedError;
|
||||
|
||||
pub struct ScriptLanguageCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
|
||||
type DItem = (Script, Language);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let sep = bytes.iter().position(|b| *b == 0)?;
|
||||
let (s_bytes, l_bytes) = bytes.split_at(sep);
|
||||
let script = str::from_utf8(s_bytes).ok()?;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let cstr = CStr::from_bytes_until_nul(bytes)?;
|
||||
let script = cstr.to_str()?;
|
||||
let script_name = Script::from_name(script);
|
||||
let lan = str::from_utf8(l_bytes).ok()?;
|
||||
// skip '\0' byte between the two strings.
|
||||
let lan_name = Language::from_name(&lan[1..]);
|
||||
let lan = str::from_utf8(&bytes[script.len() + 1..])?;
|
||||
let lan_name = Language::from_name(lan);
|
||||
|
||||
Some((script_name, lan_name))
|
||||
Ok((script_name, lan_name))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
|
||||
type EItem = (Script, Language);
|
||||
|
||||
fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let script_name = script.name().as_bytes();
|
||||
let lan_name = lan.name().as_bytes();
|
||||
|
||||
@ -33,6 +34,6 @@ impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(lan_name);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -3,37 +3,41 @@ use std::convert::TryInto;
|
||||
use std::mem::size_of;
|
||||
use std::str;
|
||||
|
||||
use heed::BoxedError;
|
||||
|
||||
use super::SliceTooShortError;
|
||||
|
||||
pub struct StrBEU32Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrBEU32Codec {
|
||||
type DItem = (&'a str, u32);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let footer_len = size_of::<u32>();
|
||||
|
||||
if bytes.len() < footer_len {
|
||||
return None;
|
||||
return Err(SliceTooShortError.into());
|
||||
}
|
||||
|
||||
let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let word = str::from_utf8(word).ok()?;
|
||||
let pos = bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
let word = str::from_utf8(word)?;
|
||||
let pos = bytes.try_into().map(u32::from_be_bytes)?;
|
||||
|
||||
Some((word, pos))
|
||||
Ok((word, pos))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
|
||||
type EItem = (&'a str, u32);
|
||||
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + pos.len());
|
||||
bytes.extend_from_slice(word.as_bytes());
|
||||
bytes.extend_from_slice(&pos[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
@ -42,26 +46,27 @@ pub struct StrBEU16Codec;
|
||||
impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
|
||||
type DItem = (&'a str, u16);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let footer_len = size_of::<u16>();
|
||||
|
||||
if bytes.len() < footer_len + 1 {
|
||||
return None;
|
||||
return Err(SliceTooShortError.into());
|
||||
}
|
||||
|
||||
let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let (_, word) = word_plus_nul_byte.split_last()?;
|
||||
let word = str::from_utf8(word).ok()?;
|
||||
let pos = bytes.try_into().map(u16::from_be_bytes).ok()?;
|
||||
// unwrap: we just checked the footer + 1 above.
|
||||
let (_, word) = word_plus_nul_byte.split_last().unwrap();
|
||||
let word = str::from_utf8(word)?;
|
||||
let pos = bytes.try_into().map(u16::from_be_bytes)?;
|
||||
|
||||
Some((word, pos))
|
||||
Ok((word, pos))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
|
||||
type EItem = (&'a str, u16);
|
||||
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
|
||||
@ -69,6 +74,6 @@ impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(&pos[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated
|
||||
/// types are equivalent (= `&'a str`) and these values can reside within another structure.
|
||||
@ -8,15 +8,14 @@ pub struct StrRefCodec;
|
||||
impl<'a> BytesEncode<'a> for StrRefCodec {
|
||||
type EItem = &'a str;
|
||||
|
||||
fn bytes_encode(item: &'a &'a str) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item.as_bytes()))
|
||||
fn bytes_encode(item: &'a &'a str) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(item.as_bytes()))
|
||||
}
|
||||
}
|
||||
impl<'a> BytesDecode<'a> for StrRefCodec {
|
||||
type DItem = &'a str;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let s = std::str::from_utf8(bytes).ok()?;
|
||||
Some(s)
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
std::str::from_utf8(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
@ -1,32 +1,36 @@
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::CStr;
|
||||
use std::str;
|
||||
|
||||
use heed::BoxedError;
|
||||
|
||||
use super::SliceTooShortError;
|
||||
|
||||
pub struct U8StrStrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
|
||||
type DItem = (u8, &'a str, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (n, bytes) = bytes.split_first()?;
|
||||
let s1_end = bytes.iter().position(|b| *b == 0)?;
|
||||
let (s1_bytes, rest) = bytes.split_at(s1_end);
|
||||
let s2_bytes = &rest[1..];
|
||||
let s1 = str::from_utf8(s1_bytes).ok()?;
|
||||
let s2 = str::from_utf8(s2_bytes).ok()?;
|
||||
Some((*n, s1, s2))
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?;
|
||||
let cstr = CStr::from_bytes_until_nul(bytes)?;
|
||||
let s1 = cstr.to_str()?;
|
||||
// skip '\0' byte between the two strings.
|
||||
let s2 = str::from_utf8(&bytes[s1.len() + 1..])?;
|
||||
Ok((*n, s1, s2))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
|
||||
type EItem = (u8, &'a str, &'a str);
|
||||
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
|
||||
bytes.push(*n);
|
||||
bytes.extend_from_slice(s1.as_bytes());
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(s2.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
pub struct UncheckedU8StrStrCodec;
|
||||
@ -34,24 +38,25 @@ pub struct UncheckedU8StrStrCodec;
|
||||
impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
|
||||
type DItem = (u8, &'a [u8], &'a [u8]);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (n, bytes) = bytes.split_first()?;
|
||||
let s1_end = bytes.iter().position(|b| *b == 0)?;
|
||||
let (s1_bytes, rest) = bytes.split_at(s1_end);
|
||||
let s2_bytes = &rest[1..];
|
||||
Some((*n, s1_bytes, s2_bytes))
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?;
|
||||
let cstr = CStr::from_bytes_until_nul(bytes)?;
|
||||
let s1_bytes = cstr.to_bytes();
|
||||
// skip '\0' byte between the two strings.
|
||||
let s2_bytes = &bytes[s1_bytes.len() + 1..];
|
||||
Ok((*n, s1_bytes, s2_bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
|
||||
type EItem = (u8, &'a [u8], &'a [u8]);
|
||||
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
|
||||
bytes.push(*n);
|
||||
bytes.extend_from_slice(s1);
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(s2);
|
||||
Some(Cow::Owned(bytes))
|
||||
Ok(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -4,9 +4,8 @@ use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
use charabia::{Language, Script};
|
||||
use heed::flags::Flags;
|
||||
use heed::types::*;
|
||||
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
|
||||
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use time::OffsetDateTime;
|
||||
@ -22,12 +21,13 @@ use crate::heed_codec::facet::{
|
||||
use crate::heed_codec::{
|
||||
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
|
||||
};
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::readable_slices::ReadableSlices;
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
|
||||
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
|
||||
BEU32,
|
||||
BEU32, BEU64,
|
||||
};
|
||||
|
||||
/// The HNSW data-structure that we serialize, fill and search in.
|
||||
@ -73,6 +73,7 @@ pub mod main_key {
|
||||
pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet";
|
||||
pub const SORT_FACET_VALUES_BY: &str = "sort-facet-values-by";
|
||||
pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
|
||||
pub const PROXIMITY_PRECISION: &str = "proximity-precision";
|
||||
}
|
||||
|
||||
pub mod db_name {
|
||||
@ -109,10 +110,10 @@ pub struct Index {
|
||||
pub(crate) env: heed::Env,
|
||||
|
||||
/// Contains many different types (e.g. the fields ids map).
|
||||
pub(crate) main: PolyDatabase,
|
||||
pub(crate) main: Database<Unspecified, Unspecified>,
|
||||
|
||||
/// Maps the external documents ids with the internal document id.
|
||||
pub external_documents_ids: Database<Str, OwnedType<BEU32>>,
|
||||
pub external_documents_ids: Database<Str, BEU32>,
|
||||
|
||||
/// A word and all the documents ids containing the word.
|
||||
pub word_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
@ -158,7 +159,7 @@ pub struct Index {
|
||||
/// Maps the facet field id of the normalized-for-search string facets with their original versions.
|
||||
pub facet_id_normalized_string_strings: Database<BEU16StrCodec, SerdeJson<BTreeSet<String>>>,
|
||||
/// Maps the facet field id of the string facets with an FST containing all the facets values.
|
||||
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
|
||||
pub facet_id_string_fst: Database<BEU16, FstSetCodec>,
|
||||
|
||||
/// Maps the document id, the facet field id and the numbers.
|
||||
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
|
||||
@ -166,10 +167,10 @@ pub struct Index {
|
||||
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
|
||||
|
||||
/// Maps a vector id to the document id that have it.
|
||||
pub vector_id_docid: Database<OwnedType<BEU32>, OwnedType<BEU32>>,
|
||||
pub vector_id_docid: Database<BEU32, BEU32>,
|
||||
|
||||
/// Maps the document id to the document as an obkv store.
|
||||
pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>,
|
||||
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
@ -182,11 +183,10 @@ impl Index {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(24);
|
||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||
|
||||
let env = options.open(path)?;
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let main = env.create_poly_database(&mut wtxn, Some(MAIN))?;
|
||||
let main = env.database_options().name(MAIN).create(&mut wtxn)?;
|
||||
let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
|
||||
let external_documents_ids =
|
||||
env.create_database(&mut wtxn, Some(EXTERNAL_DOCUMENTS_IDS))?;
|
||||
@ -264,24 +264,16 @@ impl Index {
|
||||
|
||||
fn set_creation_dates(
|
||||
env: &heed::Env,
|
||||
main: PolyDatabase,
|
||||
main: Database<Unspecified, Unspecified>,
|
||||
created_at: OffsetDateTime,
|
||||
updated_at: OffsetDateTime,
|
||||
) -> heed::Result<()> {
|
||||
let mut txn = env.write_txn()?;
|
||||
// The db was just created, we update its metadata with the relevant information.
|
||||
if main.get::<_, Str, SerdeJson<OffsetDateTime>>(&txn, main_key::CREATED_AT_KEY)?.is_none()
|
||||
{
|
||||
main.put::<_, Str, SerdeJson<OffsetDateTime>>(
|
||||
&mut txn,
|
||||
main_key::UPDATED_AT_KEY,
|
||||
&updated_at,
|
||||
)?;
|
||||
main.put::<_, Str, SerdeJson<OffsetDateTime>>(
|
||||
&mut txn,
|
||||
main_key::CREATED_AT_KEY,
|
||||
&created_at,
|
||||
)?;
|
||||
let main = main.remap_types::<Str, SerdeJson<OffsetDateTime>>();
|
||||
if main.get(&txn, main_key::CREATED_AT_KEY)?.is_none() {
|
||||
main.put(&mut txn, main_key::UPDATED_AT_KEY, &updated_at)?;
|
||||
main.put(&mut txn, main_key::CREATED_AT_KEY, &created_at)?;
|
||||
txn.commit()?;
|
||||
}
|
||||
Ok(())
|
||||
@ -318,12 +310,12 @@ impl Index {
|
||||
///
|
||||
/// This value is the maximum between the map size passed during the opening of the index
|
||||
/// and the on-disk size of the index at the time of opening.
|
||||
pub fn map_size(&self) -> Result<usize> {
|
||||
Ok(self.env.map_size()?)
|
||||
pub fn map_size(&self) -> usize {
|
||||
self.env.info().map_size
|
||||
}
|
||||
|
||||
pub fn copy_to_path<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
|
||||
self.env.copy_to_path(path, option).map_err(Into::into)
|
||||
pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
|
||||
self.env.copy_to_file(path, option).map_err(Into::into)
|
||||
}
|
||||
|
||||
/// Returns an `EnvClosingEvent` that can be used to wait for the closing event,
|
||||
@ -343,21 +335,28 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
docids: &RoaringBitmap,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids)
|
||||
self.main.remap_types::<Str, RoaringBitmapCodec>().put(
|
||||
wtxn,
|
||||
main_key::DOCUMENTS_IDS_KEY,
|
||||
docids,
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the internal documents ids.
|
||||
pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?
|
||||
.remap_types::<Str, RoaringBitmapCodec>()
|
||||
.get(rtxn, main_key::DOCUMENTS_IDS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Returns the number of documents indexed in the database.
|
||||
pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> {
|
||||
let count =
|
||||
self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
|
||||
let count = self
|
||||
.main
|
||||
.remap_types::<Str, RoaringBitmapLenCodec>()
|
||||
.get(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
|
||||
Ok(count.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -366,17 +365,17 @@ impl Index {
|
||||
/// Writes the documents primary key, this is the field name that is used to store the id.
|
||||
pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
|
||||
self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
|
||||
self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
|
||||
self.main.remap_types::<Str, Str>().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
|
||||
}
|
||||
|
||||
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
|
||||
pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::PRIMARY_KEY_KEY)
|
||||
}
|
||||
|
||||
/// Returns the documents primary key, `None` if it hasn't been defined.
|
||||
pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
|
||||
self.main.get::<_, Str, Str>(rtxn, main_key::PRIMARY_KEY_KEY)
|
||||
self.main.remap_types::<Str, Str>().get(rtxn, main_key::PRIMARY_KEY_KEY)
|
||||
}
|
||||
|
||||
/* external documents ids */
|
||||
@ -396,7 +395,11 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
map: &FieldsIdsMap,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map)
|
||||
self.main.remap_types::<Str, SerdeJson<FieldsIdsMap>>().put(
|
||||
wtxn,
|
||||
main_key::FIELDS_IDS_MAP_KEY,
|
||||
map,
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the fields ids map which associate the documents keys with an internal field id
|
||||
@ -404,7 +407,8 @@ impl Index {
|
||||
pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result<FieldsIdsMap> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeJson<FieldsIdsMap>>(rtxn, main_key::FIELDS_IDS_MAP_KEY)?
|
||||
.remap_types::<Str, SerdeJson<FieldsIdsMap>>()
|
||||
.get(rtxn, main_key::FIELDS_IDS_MAP_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -416,19 +420,24 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
rtree: &RTree<GeoPoint>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<RTree<GeoPoint>>>(wtxn, main_key::GEO_RTREE_KEY, rtree)
|
||||
self.main.remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>().put(
|
||||
wtxn,
|
||||
main_key::GEO_RTREE_KEY,
|
||||
rtree,
|
||||
)
|
||||
}
|
||||
|
||||
/// Delete the `rtree` which associates coordinates to documents ids.
|
||||
pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::GEO_RTREE_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_RTREE_KEY)
|
||||
}
|
||||
|
||||
/// Returns the `rtree` which associates coordinates to documents ids.
|
||||
pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result<Option<RTree<GeoPoint>>> {
|
||||
match self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<RTree<GeoPoint>>>(rtxn, main_key::GEO_RTREE_KEY)?
|
||||
.remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>()
|
||||
.get(rtxn, main_key::GEO_RTREE_KEY)?
|
||||
{
|
||||
Some(rtree) => Ok(Some(rtree)),
|
||||
None => Ok(None),
|
||||
@ -443,7 +452,7 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
docids: &RoaringBitmap,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, RoaringBitmapCodec>(
|
||||
self.main.remap_types::<Str, RoaringBitmapCodec>().put(
|
||||
wtxn,
|
||||
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
|
||||
docids,
|
||||
@ -452,14 +461,15 @@ impl Index {
|
||||
|
||||
/// Delete the documents ids that are faceted with a _geo field.
|
||||
pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
|
||||
}
|
||||
|
||||
/// Retrieve all the documents ids that are faceted with a _geo field.
|
||||
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
|
||||
match self
|
||||
.main
|
||||
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
|
||||
.remap_types::<Str, RoaringBitmapCodec>()
|
||||
.get(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
|
||||
{
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
@ -474,22 +484,22 @@ impl Index {
|
||||
self.delete_vector_hnsw(wtxn)?;
|
||||
|
||||
let chunk_size = 1024 * 1024 * (1024 + 512); // 1.5 GiB
|
||||
let bytes = bincode::serialize(hnsw).map_err(|_| heed::Error::Encoding)?;
|
||||
let bytes = bincode::serialize(hnsw).map_err(Into::into).map_err(heed::Error::Encoding)?;
|
||||
for (i, chunk) in bytes.chunks(chunk_size).enumerate() {
|
||||
let i = i as u32;
|
||||
let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec();
|
||||
key.extend_from_slice(&i.to_be_bytes());
|
||||
self.main.put::<_, ByteSlice, ByteSlice>(wtxn, &key, chunk)?;
|
||||
self.main.remap_types::<Bytes, Bytes>().put(wtxn, &key, chunk)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete the `hnsw`.
|
||||
pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
let mut iter = self.main.prefix_iter_mut::<_, ByteSlice, DecodeIgnore>(
|
||||
wtxn,
|
||||
main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes(),
|
||||
)?;
|
||||
let mut iter = self
|
||||
.main
|
||||
.remap_types::<Bytes, DecodeIgnore>()
|
||||
.prefix_iter_mut(wtxn, main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes())?;
|
||||
let mut deleted = false;
|
||||
while iter.next().transpose()?.is_some() {
|
||||
// We do not keep a reference to the key or the value.
|
||||
@ -501,8 +511,10 @@ impl Index {
|
||||
/// Returns the `hnsw`.
|
||||
pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result<Option<Hnsw>> {
|
||||
let mut slices = Vec::new();
|
||||
for result in
|
||||
self.main.prefix_iter::<_, Str, ByteSlice>(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)?
|
||||
for result in self
|
||||
.main
|
||||
.remap_types::<Str, Bytes>()
|
||||
.prefix_iter(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)?
|
||||
{
|
||||
let (_, slice) = result?;
|
||||
slices.push(slice);
|
||||
@ -512,7 +524,11 @@ impl Index {
|
||||
Ok(None)
|
||||
} else {
|
||||
let readable_slices: ReadableSlices<_> = slices.into_iter().collect();
|
||||
Ok(Some(bincode::deserialize_from(readable_slices).map_err(|_| heed::Error::Decoding)?))
|
||||
Ok(Some(
|
||||
bincode::deserialize_from(readable_slices)
|
||||
.map_err(Into::into)
|
||||
.map_err(heed::Error::Decoding)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@ -525,7 +541,7 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
distribution: &FieldDistribution,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<FieldDistribution>>(
|
||||
self.main.remap_types::<Str, SerdeJson<FieldDistribution>>().put(
|
||||
wtxn,
|
||||
main_key::FIELD_DISTRIBUTION_KEY,
|
||||
distribution,
|
||||
@ -537,7 +553,8 @@ impl Index {
|
||||
pub fn field_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldDistribution> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeJson<FieldDistribution>>(rtxn, main_key::FIELD_DISTRIBUTION_KEY)?
|
||||
.remap_types::<Str, SerdeJson<FieldDistribution>>()
|
||||
.get(rtxn, main_key::FIELD_DISTRIBUTION_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -550,7 +567,7 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
fields: &[&str],
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(
|
||||
self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
|
||||
wtxn,
|
||||
main_key::DISPLAYED_FIELDS_KEY,
|
||||
&fields,
|
||||
@ -560,13 +577,15 @@ impl Index {
|
||||
/// Deletes the displayed fields ids, this will make the engine to display
|
||||
/// all the documents attributes in the order of the `FieldsIdsMap`.
|
||||
pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISPLAYED_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the displayed fields in the order they were set by the user. If it returns
|
||||
/// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
|
||||
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
||||
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::DISPLAYED_FIELDS_KEY)
|
||||
self.main
|
||||
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
|
||||
.get(rtxn, main_key::DISPLAYED_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Identical to `displayed_fields`, but returns the ids instead.
|
||||
@ -646,7 +665,7 @@ impl Index {
|
||||
|
||||
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
||||
fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(
|
||||
self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
|
||||
wtxn,
|
||||
main_key::SEARCHABLE_FIELDS_KEY,
|
||||
&fields,
|
||||
@ -655,13 +674,15 @@ impl Index {
|
||||
|
||||
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
|
||||
fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the searchable fields, those are the fields that are indexed,
|
||||
/// if the searchable fields aren't there it means that **all** the fields are indexed.
|
||||
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
|
||||
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
||||
self.main
|
||||
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
|
||||
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Identical to `searchable_fields`, but returns the ids instead.
|
||||
@ -687,7 +708,7 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
fields: &[&str],
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(
|
||||
self.main.remap_types::<Str, SerdeBincode<_>>().put(
|
||||
wtxn,
|
||||
main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY,
|
||||
&fields,
|
||||
@ -699,7 +720,7 @@ impl Index {
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the user defined searchable fields.
|
||||
@ -708,7 +729,8 @@ impl Index {
|
||||
rtxn: &'t RoTxn,
|
||||
) -> heed::Result<Option<Vec<&'t str>>> {
|
||||
self.main
|
||||
.get::<_, Str, SerdeBincode<Vec<_>>>(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
|
||||
.remap_types::<Str, SerdeBincode<Vec<_>>>()
|
||||
.get(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/* filterable fields */
|
||||
@ -719,19 +741,24 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
fields: &HashSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields)
|
||||
self.main.remap_types::<Str, SerdeJson<_>>().put(
|
||||
wtxn,
|
||||
main_key::FILTERABLE_FIELDS_KEY,
|
||||
fields,
|
||||
)
|
||||
}
|
||||
|
||||
/// Deletes the filterable fields ids in the database.
|
||||
pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the filterable fields names.
|
||||
pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::FILTERABLE_FIELDS_KEY)?
|
||||
.remap_types::<Str, SerdeJson<_>>()
|
||||
.get(rtxn, main_key::FILTERABLE_FIELDS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -758,19 +785,24 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
fields: &HashSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::SORTABLE_FIELDS_KEY, fields)
|
||||
self.main.remap_types::<Str, SerdeJson<_>>().put(
|
||||
wtxn,
|
||||
main_key::SORTABLE_FIELDS_KEY,
|
||||
fields,
|
||||
)
|
||||
}
|
||||
|
||||
/// Deletes the sortable fields ids in the database.
|
||||
pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SORTABLE_FIELDS_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SORTABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the sortable fields names.
|
||||
pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::SORTABLE_FIELDS_KEY)?
|
||||
.remap_types::<Str, SerdeJson<_>>()
|
||||
.get(rtxn, main_key::SORTABLE_FIELDS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -789,14 +821,19 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
fields: &HashSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::HIDDEN_FACETED_FIELDS_KEY, fields)
|
||||
self.main.remap_types::<Str, SerdeJson<_>>().put(
|
||||
wtxn,
|
||||
main_key::HIDDEN_FACETED_FIELDS_KEY,
|
||||
fields,
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the faceted fields names.
|
||||
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
|
||||
.remap_types::<Str, SerdeJson<_>>()
|
||||
.get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -863,7 +900,7 @@ impl Index {
|
||||
rtxn: &RoTxn,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<RoaringBitmap> {
|
||||
match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? {
|
||||
match self.facet_id_is_null_docids.get(rtxn, &field_id)? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
@ -875,7 +912,7 @@ impl Index {
|
||||
rtxn: &RoTxn,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<RoaringBitmap> {
|
||||
match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? {
|
||||
match self.facet_id_is_empty_docids.get(rtxn, &field_id)? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
@ -887,7 +924,7 @@ impl Index {
|
||||
rtxn: &RoTxn,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<RoaringBitmap> {
|
||||
match self.facet_id_exists_docids.get(rtxn, &BEU16::new(field_id))? {
|
||||
match self.facet_id_exists_docids.get(rtxn, &field_id)? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
@ -900,15 +937,15 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
distinct_field: &str,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, Str>(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
|
||||
self.main.remap_types::<Str, Str>().put(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
|
||||
}
|
||||
|
||||
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
||||
self.main.get::<_, Str, Str>(rtxn, main_key::DISTINCT_FIELD_KEY)
|
||||
self.main.remap_types::<Str, Str>().get(rtxn, main_key::DISTINCT_FIELD_KEY)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::DISTINCT_FIELD_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISTINCT_FIELD_KEY)
|
||||
}
|
||||
|
||||
/* criteria */
|
||||
@ -918,15 +955,23 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
criteria: &[Criterion],
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
|
||||
self.main.remap_types::<Str, SerdeJson<&[Criterion]>>().put(
|
||||
wtxn,
|
||||
main_key::CRITERIA_KEY,
|
||||
&criteria,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::CRITERIA_KEY)
|
||||
}
|
||||
|
||||
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
|
||||
match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, main_key::CRITERIA_KEY)? {
|
||||
match self
|
||||
.main
|
||||
.remap_types::<Str, SerdeJson<Vec<Criterion>>>()
|
||||
.get(rtxn, main_key::CRITERIA_KEY)?
|
||||
{
|
||||
Some(criteria) => Ok(criteria),
|
||||
None => Ok(default_criteria()),
|
||||
}
|
||||
@ -940,12 +985,16 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
fst: &fst::Set<A>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes())
|
||||
self.main.remap_types::<Str, Bytes>().put(
|
||||
wtxn,
|
||||
main_key::WORDS_FST_KEY,
|
||||
fst.as_fst().as_bytes(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the FST which is the words dictionary of the engine.
|
||||
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
||||
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_FST_KEY)? {
|
||||
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_FST_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
|
||||
}
|
||||
@ -958,15 +1007,19 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
fst: &fst::Set<A>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes())
|
||||
self.main.remap_types::<Str, Bytes>().put(
|
||||
wtxn,
|
||||
main_key::STOP_WORDS_KEY,
|
||||
fst.as_fst().as_bytes(),
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::STOP_WORDS_KEY)
|
||||
}
|
||||
|
||||
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
|
||||
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::STOP_WORDS_KEY)? {
|
||||
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::STOP_WORDS_KEY)? {
|
||||
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
|
||||
None => Ok(None),
|
||||
}
|
||||
@ -979,18 +1032,22 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set)
|
||||
self.main.remap_types::<Str, SerdeBincode<_>>().put(
|
||||
wtxn,
|
||||
main_key::NON_SEPARATOR_TOKENS_KEY,
|
||||
set,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
|
||||
}
|
||||
|
||||
pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>(
|
||||
rtxn,
|
||||
main_key::NON_SEPARATOR_TOKENS_KEY,
|
||||
)?)
|
||||
Ok(self
|
||||
.main
|
||||
.remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
|
||||
.get(rtxn, main_key::NON_SEPARATOR_TOKENS_KEY)?)
|
||||
}
|
||||
|
||||
/* separator tokens */
|
||||
@ -1000,17 +1057,22 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set)
|
||||
self.main.remap_types::<Str, SerdeBincode<_>>().put(
|
||||
wtxn,
|
||||
main_key::SEPARATOR_TOKENS_KEY,
|
||||
set,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEPARATOR_TOKENS_KEY)
|
||||
}
|
||||
|
||||
pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
|
||||
.remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
|
||||
.get(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
|
||||
}
|
||||
|
||||
/* separators easing method */
|
||||
@ -1040,17 +1102,18 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set)
|
||||
self.main.remap_types::<Str, SerdeBincode<_>>().put(wtxn, main_key::DICTIONARY_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DICTIONARY_KEY)
|
||||
}
|
||||
|
||||
pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?)
|
||||
.remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
|
||||
.get(rtxn, main_key::DICTIONARY_KEY)?)
|
||||
}
|
||||
|
||||
/* synonyms */
|
||||
@ -1061,8 +1124,12 @@ impl Index {
|
||||
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
|
||||
user_defined_synonyms: &BTreeMap<String, Vec<String>>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?;
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(
|
||||
self.main.remap_types::<Str, SerdeBincode<_>>().put(
|
||||
wtxn,
|
||||
main_key::SYNONYMS_KEY,
|
||||
synonyms,
|
||||
)?;
|
||||
self.main.remap_types::<Str, SerdeBincode<_>>().put(
|
||||
wtxn,
|
||||
main_key::USER_DEFINED_SYNONYMS_KEY,
|
||||
user_defined_synonyms,
|
||||
@ -1070,8 +1137,8 @@ impl Index {
|
||||
}
|
||||
|
||||
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?;
|
||||
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SYNONYMS_KEY)?;
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
|
||||
}
|
||||
|
||||
pub fn user_defined_synonyms(
|
||||
@ -1080,14 +1147,16 @@ impl Index {
|
||||
) -> heed::Result<BTreeMap<String, Vec<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
|
||||
.remap_types::<Str, SerdeBincode<_>>()
|
||||
.get(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::SYNONYMS_KEY)?
|
||||
.remap_types::<Str, SerdeBincode<_>>()
|
||||
.get(rtxn, main_key::SYNONYMS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -1108,7 +1177,7 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
fst: &fst::Set<A>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, ByteSlice>(
|
||||
self.main.remap_types::<Str, Bytes>().put(
|
||||
wtxn,
|
||||
main_key::WORDS_PREFIXES_FST_KEY,
|
||||
fst.as_fst().as_bytes(),
|
||||
@ -1117,7 +1186,7 @@ impl Index {
|
||||
|
||||
/// Returns the FST which is the words prefixes dictionnary of the engine.
|
||||
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
||||
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
|
||||
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
|
||||
}
|
||||
@ -1142,7 +1211,7 @@ impl Index {
|
||||
Ok(ids.into_iter().map(move |id| {
|
||||
let kv = self
|
||||
.documents
|
||||
.get(rtxn, &BEU32::new(id))?
|
||||
.get(rtxn, &id)?
|
||||
.ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
|
||||
Ok((id, kv))
|
||||
}))
|
||||
@ -1207,7 +1276,8 @@ impl Index {
|
||||
pub fn created_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeJson<OffsetDateTime>>(rtxn, main_key::CREATED_AT_KEY)?
|
||||
.remap_types::<Str, SerdeJson<OffsetDateTime>>()
|
||||
.get(rtxn, main_key::CREATED_AT_KEY)?
|
||||
.ok_or(InternalError::DatabaseMissingEntry {
|
||||
db_name: db_name::MAIN,
|
||||
key: Some(main_key::CREATED_AT_KEY),
|
||||
@ -1218,7 +1288,8 @@ impl Index {
|
||||
pub fn updated_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeJson<OffsetDateTime>>(rtxn, main_key::UPDATED_AT_KEY)?
|
||||
.remap_types::<Str, SerdeJson<OffsetDateTime>>()
|
||||
.get(rtxn, main_key::UPDATED_AT_KEY)?
|
||||
.ok_or(InternalError::DatabaseMissingEntry {
|
||||
db_name: db_name::MAIN,
|
||||
key: Some(main_key::UPDATED_AT_KEY),
|
||||
@ -1230,14 +1301,18 @@ impl Index {
|
||||
wtxn: &mut RwTxn,
|
||||
time: &OffsetDateTime,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, time)
|
||||
self.main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put(
|
||||
wtxn,
|
||||
main_key::UPDATED_AT_KEY,
|
||||
time,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> {
|
||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||
// because by default, we authorize typos.
|
||||
match self.main.get::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS)? {
|
||||
match self.main.remap_types::<Str, U8>().get(txn, main_key::AUTHORIZE_TYPOS)? {
|
||||
Some(0) => Ok(false),
|
||||
_ => Ok(true),
|
||||
}
|
||||
@ -1247,7 +1322,7 @@ impl Index {
|
||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||
// because by default, we authorize typos.
|
||||
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?;
|
||||
self.main.remap_types::<Str, U8>().put(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -1258,7 +1333,8 @@ impl Index {
|
||||
// because by default, we authorize typos.
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)?
|
||||
.remap_types::<Str, U8>()
|
||||
.get(txn, main_key::ONE_TYPO_WORD_LEN)?
|
||||
.unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO))
|
||||
}
|
||||
|
||||
@ -1266,7 +1342,7 @@ impl Index {
|
||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||
// because by default, we authorize typos.
|
||||
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
|
||||
self.main.remap_types::<Str, U8>().put(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -1276,7 +1352,8 @@ impl Index {
|
||||
// because by default, we authorize typos.
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)?
|
||||
.remap_types::<Str, U8>()
|
||||
.get(txn, main_key::TWO_TYPOS_WORD_LEN)?
|
||||
.unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
|
||||
}
|
||||
|
||||
@ -1284,13 +1361,13 @@ impl Index {
|
||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||
// because by default, we authorize typos.
|
||||
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
|
||||
self.main.remap_types::<Str, U8>().put(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// List the words on which typo are not allowed
|
||||
pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> {
|
||||
match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? {
|
||||
match self.main.remap_types::<Str, Bytes>().get(txn, main_key::EXACT_WORDS)? {
|
||||
Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)),
|
||||
None => Ok(None),
|
||||
}
|
||||
@ -1301,7 +1378,7 @@ impl Index {
|
||||
txn: &mut RwTxn,
|
||||
words: &fst::Set<A>,
|
||||
) -> Result<()> {
|
||||
self.main.put::<_, Str, ByteSlice>(
|
||||
self.main.remap_types::<Str, Bytes>().put(
|
||||
txn,
|
||||
main_key::EXACT_WORDS,
|
||||
words.as_fst().as_bytes(),
|
||||
@ -1313,7 +1390,8 @@ impl Index {
|
||||
pub fn exact_attributes<'t>(&self, txn: &'t RoTxn) -> Result<Vec<&'t str>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<Vec<&str>>>(txn, main_key::EXACT_ATTRIBUTES)?
|
||||
.remap_types::<Str, SerdeBincode<Vec<&str>>>()
|
||||
.get(txn, main_key::EXACT_ATTRIBUTES)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -1326,34 +1404,36 @@ impl Index {
|
||||
|
||||
/// Writes the exact attributes to the database.
|
||||
pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn, attrs: &[&str]) -> Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(txn, main_key::EXACT_ATTRIBUTES, &attrs)?;
|
||||
self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
|
||||
txn,
|
||||
main_key::EXACT_ATTRIBUTES,
|
||||
&attrs,
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Clears the exact attributes from the store.
|
||||
pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES)
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::EXACT_ATTRIBUTES)
|
||||
}
|
||||
|
||||
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
|
||||
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET)
|
||||
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<u64>> {
|
||||
self.main.remap_types::<Str, BEU64>().get(txn, main_key::MAX_VALUES_PER_FACET)
|
||||
}
|
||||
|
||||
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET, &val)
|
||||
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: u64) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, BEU64>().put(txn, main_key::MAX_VALUES_PER_FACET, &val)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET)
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET)
|
||||
}
|
||||
|
||||
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> {
|
||||
let mut orders = self
|
||||
.main
|
||||
.get::<_, Str, SerdeJson<HashMap<String, OrderBy>>>(
|
||||
txn,
|
||||
main_key::SORT_FACET_VALUES_BY,
|
||||
)?
|
||||
.remap_types::<Str, SerdeJson<HashMap<String, OrderBy>>>()
|
||||
.get(txn, main_key::SORT_FACET_VALUES_BY)?
|
||||
.unwrap_or_default();
|
||||
// Insert the default ordering if it is not already overwritten by the user.
|
||||
orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic);
|
||||
@ -1365,27 +1445,49 @@ impl Index {
|
||||
txn: &mut RwTxn,
|
||||
val: &HashMap<String, OrderBy>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(txn, main_key::SORT_FACET_VALUES_BY, &val)
|
||||
self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(txn, main_key::SORT_FACET_VALUES_BY)
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::SORT_FACET_VALUES_BY)
|
||||
}
|
||||
|
||||
pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
|
||||
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
||||
pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<u64>> {
|
||||
self.main.remap_types::<Str, BEU64>().get(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
||||
}
|
||||
|
||||
pub(crate) fn put_pagination_max_total_hits(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
val: usize,
|
||||
val: u64,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val)
|
||||
self.main.remap_types::<Str, BEU64>().put(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
||||
}
|
||||
|
||||
pub fn proximity_precision(&self, txn: &RoTxn) -> heed::Result<Option<ProximityPrecision>> {
|
||||
self.main
|
||||
.remap_types::<Str, SerdeBincode<ProximityPrecision>>()
|
||||
.get(txn, main_key::PROXIMITY_PRECISION)
|
||||
}
|
||||
|
||||
pub(crate) fn put_proximity_precision(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
val: ProximityPrecision,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeBincode<ProximityPrecision>>().put(
|
||||
txn,
|
||||
main_key::PROXIMITY_PRECISION,
|
||||
&val,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_proximity_precision(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
|
||||
}
|
||||
|
||||
/* script language docids */
|
||||
@ -1479,7 +1581,7 @@ pub(crate) mod tests {
|
||||
}
|
||||
pub fn add_documents_using_wtxn<'t, R>(
|
||||
&'t self,
|
||||
wtxn: &mut RwTxn<'t, '_>,
|
||||
wtxn: &mut RwTxn<'t>,
|
||||
documents: DocumentsBatchReader<R>,
|
||||
) -> Result<(), crate::error::Error>
|
||||
where
|
||||
@ -1523,7 +1625,7 @@ pub(crate) mod tests {
|
||||
}
|
||||
pub fn update_settings_using_wtxn<'t>(
|
||||
&'t self,
|
||||
wtxn: &mut RwTxn<'t, '_>,
|
||||
wtxn: &mut RwTxn<'t>,
|
||||
update: impl Fn(&mut Settings),
|
||||
) -> Result<(), crate::error::Error> {
|
||||
let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config);
|
||||
@ -1534,7 +1636,7 @@ pub(crate) mod tests {
|
||||
|
||||
pub fn delete_documents_using_wtxn<'t>(
|
||||
&'t self,
|
||||
wtxn: &mut RwTxn<'t, '_>,
|
||||
wtxn: &mut RwTxn<'t>,
|
||||
external_document_ids: Vec<String>,
|
||||
) {
|
||||
let builder = IndexDocuments::new(
|
||||
|
@ -66,9 +66,9 @@ pub use self::search::{
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
|
||||
pub type Attribute = u32;
|
||||
pub type BEU16 = heed::zerocopy::U16<heed::byteorder::BE>;
|
||||
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
|
||||
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
|
||||
pub type BEU16 = heed::types::U16<heed::byteorder::BE>;
|
||||
pub type BEU32 = heed::types::U32<heed::byteorder::BE>;
|
||||
pub type BEU64 = heed::types::U64<heed::byteorder::BE>;
|
||||
pub type DocumentId = u32;
|
||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
|
||||
|
@ -1,5 +1,7 @@
|
||||
use std::cmp;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{relative_from_absolute_position, Position};
|
||||
|
||||
pub const MAX_DISTANCE: u32 = 4;
|
||||
@ -25,3 +27,11 @@ pub fn positions_proximity(lhs: Position, rhs: Position) -> u32 {
|
||||
pub fn path_proximity(path: &[Position]) -> u32 {
|
||||
path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::<u32>()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum ProximityPrecision {
|
||||
#[default]
|
||||
WordScale,
|
||||
AttributeScale,
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::ops::ControlFlow;
|
||||
use std::{fmt, mem};
|
||||
|
||||
use heed::types::ByteSlice;
|
||||
use heed::types::Bytes;
|
||||
use heed::BytesDecode;
|
||||
use indexmap::IndexMap;
|
||||
use roaring::RoaringBitmap;
|
||||
@ -13,7 +13,7 @@ use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
|
||||
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
|
||||
use crate::search::facet::facet_distribution_iter::{
|
||||
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
|
||||
};
|
||||
@ -105,7 +105,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||
let iter = db
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.remap_key_type::<Bytes>()
|
||||
.prefix_iter(self.rtxn, &key_buffer)?
|
||||
.remap_key_type::<FieldDocIdFacetF64Codec>();
|
||||
|
||||
@ -129,7 +129,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||
let iter = db
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.remap_key_type::<Bytes>()
|
||||
.prefix_iter(self.rtxn, &key_buffer)?
|
||||
.remap_key_type::<FieldDocIdFacetStringCodec>();
|
||||
|
||||
@ -172,9 +172,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
|
||||
search_function(
|
||||
self.rtxn,
|
||||
self.index
|
||||
.facet_id_f64_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
|facet_key, nbr_docids, _| {
|
||||
@ -203,9 +201,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
|
||||
search_function(
|
||||
self.rtxn,
|
||||
self.index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
|facet_key, nbr_docids, any_docid| {
|
||||
|
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::DocumentId;
|
||||
|
||||
/// Call the given closure on the facet distribution of the candidate documents.
|
||||
@ -23,7 +23,7 @@ use crate::DocumentId;
|
||||
/// keep iterating over the different facet values or stop.
|
||||
pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: &RoaringBitmap,
|
||||
callback: CB,
|
||||
@ -34,11 +34,11 @@ where
|
||||
let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
|
||||
let highest_level = get_highest_level(
|
||||
rtxn,
|
||||
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||
Ok(())
|
||||
} else {
|
||||
@ -48,7 +48,7 @@ where
|
||||
|
||||
pub fn count_iterate_over_facet_distribution<'t, CB>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: &RoaringBitmap,
|
||||
mut callback: CB,
|
||||
@ -77,11 +77,11 @@ where
|
||||
let mut heap = BinaryHeap::new();
|
||||
let highest_level = get_highest_level(
|
||||
rtxn,
|
||||
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
// We first fill the heap with values from the highest level
|
||||
let starting_key =
|
||||
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
@ -146,7 +146,7 @@ where
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
callback: CB,
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::Result;
|
||||
|
||||
/// Find all the document ids for which the given field contains a value contained within
|
||||
@ -25,11 +25,11 @@ where
|
||||
let inner;
|
||||
let left = match left {
|
||||
Bound::Included(left) => {
|
||||
inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?;
|
||||
inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
|
||||
Bound::Included(inner.as_ref())
|
||||
}
|
||||
Bound::Excluded(left) => {
|
||||
inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?;
|
||||
inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
|
||||
Bound::Excluded(inner.as_ref())
|
||||
}
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
@ -37,25 +37,22 @@ where
|
||||
let inner;
|
||||
let right = match right {
|
||||
Bound::Included(right) => {
|
||||
inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?;
|
||||
inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
|
||||
Bound::Included(inner.as_ref())
|
||||
}
|
||||
Bound::Excluded(right) => {
|
||||
inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?;
|
||||
inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
|
||||
Bound::Excluded(inner.as_ref())
|
||||
}
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
|
||||
if let Some(starting_left_bound) =
|
||||
get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?
|
||||
{
|
||||
let rightmost_bound = Bound::Included(
|
||||
get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(),
|
||||
); // will not fail because get_first_facet_value succeeded
|
||||
if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
let rightmost_bound =
|
||||
Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
||||
let group_size = usize::MAX;
|
||||
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
||||
Ok(())
|
||||
@ -67,7 +64,7 @@ where
|
||||
/// Fetch the document ids that have a facet with a value between the two given bounds
|
||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
left: Bound<&'b [u8]>,
|
||||
right: Bound<&'b [u8]>,
|
||||
|
@ -5,7 +5,7 @@ use super::{get_first_facet_value, get_highest_level};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
|
||||
/// Return an iterator which iterates over the given candidate documents in
|
||||
/// ascending order of their facet value for the given field id.
|
||||
@ -31,12 +31,12 @@ use crate::heed_codec::ByteSliceRefCodec;
|
||||
/// Note that once a document id is returned by the iterator, it is never returned again.
|
||||
pub fn ascending_facet_sort<'t>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||
|
||||
@ -53,14 +53,12 @@ pub fn ascending_facet_sort<'t>(
|
||||
|
||||
struct AscendingFacetSort<'t, 'e> {
|
||||
rtxn: &'t heed::RoTxn<'e>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
#[allow(clippy::type_complexity)]
|
||||
stack: Vec<(
|
||||
RoaringBitmap,
|
||||
std::iter::Take<
|
||||
heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
>,
|
||||
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>>,
|
||||
)>,
|
||||
}
|
||||
|
||||
|
@ -7,21 +7,21 @@ use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
|
||||
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
|
||||
///
|
||||
/// This function does the same thing, but in the opposite order.
|
||||
pub fn descending_facet_sort<'t>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
|
||||
let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
|
||||
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||
Ok(itertools::Either::Left(DescendingFacetSort {
|
||||
@ -37,13 +37,13 @@ pub fn descending_facet_sort<'t>(
|
||||
|
||||
struct DescendingFacetSort<'t> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
#[allow(clippy::type_complexity)]
|
||||
stack: Vec<(
|
||||
RoaringBitmap,
|
||||
std::iter::Take<
|
||||
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
heed::RoRevRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
>,
|
||||
Bound<&'t [u8]>,
|
||||
)>,
|
||||
@ -100,7 +100,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
||||
*right_bound = Bound::Excluded(left_bound);
|
||||
let iter = match self
|
||||
.db
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||
.rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow))
|
||||
{
|
||||
Ok(iter) => iter,
|
||||
@ -123,7 +123,7 @@ mod tests {
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||
use crate::search::facet::tests::{
|
||||
@ -144,7 +144,7 @@ mod tests {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
||||
for el in iter {
|
||||
let (docids, _) = el.unwrap();
|
||||
@ -167,7 +167,7 @@ mod tests {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
|
||||
for el in iter {
|
||||
let (docids, _) = el.unwrap();
|
||||
|
@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
|
||||
use std::ops::Bound::{self, Excluded, Included};
|
||||
|
||||
use either::Either;
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
|
@ -1,13 +1,13 @@
|
||||
pub use facet_sort_ascending::ascending_facet_sort;
|
||||
pub use facet_sort_descending::descending_facet_sort;
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::types::{Bytes, DecodeIgnore};
|
||||
use heed::{BytesDecode, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::filter::{BadGeoError, Filter};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{Index, Result};
|
||||
mod facet_distribution;
|
||||
mod facet_distribution_iter;
|
||||
@ -22,8 +22,10 @@ fn facet_extreme_value<'t>(
|
||||
let extreme_value =
|
||||
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
|
||||
let (_, extreme_value) = extreme_value?;
|
||||
|
||||
Ok(OrderedF64Codec::bytes_decode(extreme_value))
|
||||
OrderedF64Codec::bytes_decode(extreme_value)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn facet_min_value<'t>(
|
||||
@ -32,7 +34,7 @@ pub fn facet_min_value<'t>(
|
||||
field_id: u16,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Option<f64>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||
facet_extreme_value(it)
|
||||
}
|
||||
@ -43,7 +45,7 @@ pub fn facet_max_value<'t>(
|
||||
field_id: u16,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Option<f64>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||
facet_extreme_value(it)
|
||||
}
|
||||
@ -51,7 +53,7 @@ pub fn facet_max_value<'t>(
|
||||
/// Get the first facet value in the facet database
|
||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@ -60,13 +62,12 @@ where
|
||||
let mut level0prefix = vec![];
|
||||
level0prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
level0prefix.push(0);
|
||||
let mut level0_iter_forward = db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?;
|
||||
let mut level0_iter_forward =
|
||||
db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, level0prefix.as_slice())?;
|
||||
if let Some(first) = level0_iter_forward.next() {
|
||||
let (first_key, _) = first?;
|
||||
let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
|
||||
.ok_or(heed::Error::Encoding)?;
|
||||
.map_err(heed::Error::Decoding)?;
|
||||
Ok(Some(first_key.left_bound))
|
||||
} else {
|
||||
Ok(None)
|
||||
@ -76,7 +77,7 @@ where
|
||||
/// Get the last facet value in the facet database
|
||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@ -85,13 +86,12 @@ where
|
||||
let mut level0prefix = vec![];
|
||||
level0prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
level0prefix.push(0);
|
||||
let mut level0_iter_backward = db
|
||||
.as_polymorph()
|
||||
.rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?;
|
||||
let mut level0_iter_backward =
|
||||
db.remap_types::<Bytes, DecodeIgnore>().rev_prefix_iter(txn, level0prefix.as_slice())?;
|
||||
if let Some(last) = level0_iter_backward.next() {
|
||||
let (last_key, _) = last?;
|
||||
let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
|
||||
.ok_or(heed::Error::Encoding)?;
|
||||
.map_err(heed::Error::Decoding)?;
|
||||
Ok(Some(last_key.left_bound))
|
||||
} else {
|
||||
Ok(None)
|
||||
@ -101,17 +101,17 @@ where
|
||||
/// Get the height of the highest level in the facet database
|
||||
pub(crate) fn get_highest_level<'t>(
|
||||
txn: &'t RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<u8> {
|
||||
let field_id_prefix = &field_id.to_be_bytes();
|
||||
Ok(db
|
||||
.as_polymorph()
|
||||
.rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, field_id_prefix)?
|
||||
.remap_types::<Bytes, DecodeIgnore>()
|
||||
.rev_prefix_iter(txn, field_id_prefix)?
|
||||
.next()
|
||||
.map(|el| {
|
||||
let (key, _) = el.unwrap();
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap();
|
||||
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
|
||||
key.level
|
||||
})
|
||||
.unwrap_or(0))
|
||||
|
@ -11,14 +11,13 @@ use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
||||
use self::new::PartialSearchResult;
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::{
|
||||
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result,
|
||||
SearchContext, BEU16,
|
||||
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, SearchContext,
|
||||
};
|
||||
|
||||
// Building these factories is not free.
|
||||
@ -299,7 +298,7 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
None => return Ok(Vec::new()),
|
||||
};
|
||||
|
||||
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
|
||||
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
|
||||
Some(fst) => fst,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
if let Some(distinct_fid) = distinct_fid {
|
||||
let mut excluded = RoaringBitmap::new();
|
||||
let mut results = vec![];
|
||||
let mut skip = 0;
|
||||
for docid in universe.iter() {
|
||||
if results.len() >= length {
|
||||
if results.len() >= from + length {
|
||||
break;
|
||||
}
|
||||
if excluded.contains(docid) {
|
||||
@ -56,16 +55,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
}
|
||||
|
||||
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
||||
skip += 1;
|
||||
if skip <= from {
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push(docid);
|
||||
}
|
||||
|
||||
let mut all_candidates = universe - excluded;
|
||||
all_candidates.extend(results.iter().copied());
|
||||
// drain the results of the skipped elements
|
||||
// this **must** be done **after** writing the entire results in `all_candidates` to ensure
|
||||
// e.g. estimatedTotalHits is correct.
|
||||
if results.len() >= from {
|
||||
results.drain(..from);
|
||||
} else {
|
||||
results.clear();
|
||||
}
|
||||
|
||||
return Ok(BucketSortOutput {
|
||||
scores: vec![Default::default(); results.len()],
|
||||
|
@ -3,13 +3,14 @@ use std::collections::hash_map::Entry;
|
||||
use std::hash::Hash;
|
||||
|
||||
use fxhash::FxHashMap;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::types::Bytes;
|
||||
use heed::{BytesEncode, Database, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::interner::Interned;
|
||||
use super::Word;
|
||||
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
||||
use crate::{
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
|
||||
@ -50,7 +51,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
cache_key: K1,
|
||||
db_key: &'v KC::EItem,
|
||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||
db: Database<KC, ByteSlice>,
|
||||
db: Database<KC, Bytes>,
|
||||
) -> Result<Option<DC::DItem>>
|
||||
where
|
||||
K1: Copy + Eq + Hash,
|
||||
@ -63,12 +64,14 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
}
|
||||
|
||||
match cache.get(&cache_key).unwrap() {
|
||||
Some(Cow::Borrowed(bytes)) => {
|
||||
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||
}
|
||||
Some(Cow::Owned(bytes)) => {
|
||||
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||
}
|
||||
Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into),
|
||||
Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
@ -78,7 +81,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
cache_key: K1,
|
||||
db_keys: &'v [KC::EItem],
|
||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||
db: Database<KC, ByteSlice>,
|
||||
db: Database<KC, Bytes>,
|
||||
merger: MergeFn,
|
||||
) -> Result<Option<DC::DItem>>
|
||||
where
|
||||
@ -110,12 +113,14 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
}
|
||||
|
||||
match cache.get(&cache_key).unwrap() {
|
||||
Some(Cow::Borrowed(bytes)) => {
|
||||
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||
}
|
||||
Some(Cow::Owned(bytes)) => {
|
||||
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
|
||||
}
|
||||
Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into),
|
||||
Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
@ -164,7 +169,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word,
|
||||
&keys[..],
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
|
||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
@ -173,7 +178,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_docids.remap_data_type::<ByteSlice>(),
|
||||
self.index.word_docids.remap_data_type::<Bytes>(),
|
||||
),
|
||||
}
|
||||
}
|
||||
@ -187,7 +192,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.exact_word_docids,
|
||||
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
|
||||
self.index.exact_word_docids.remap_data_type::<Bytes>(),
|
||||
)
|
||||
}
|
||||
|
||||
@ -226,7 +231,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
prefix,
|
||||
&keys[..],
|
||||
&mut self.db_cache.word_prefix_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
@ -235,7 +240,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
&mut self.db_cache.word_prefix_docids,
|
||||
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
|
||||
self.index.word_prefix_docids.remap_data_type::<Bytes>(),
|
||||
),
|
||||
}
|
||||
}
|
||||
@ -249,7 +254,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
&mut self.db_cache.exact_word_prefix_docids,
|
||||
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
|
||||
self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
|
||||
)
|
||||
}
|
||||
|
||||
@ -259,17 +264,67 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(word2).as_str(),
|
||||
),
|
||||
&mut self.db_cache.word_pair_proximity_docids,
|
||||
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||
)
|
||||
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
|
||||
ProximityPrecision::AttributeScale => {
|
||||
// Force proximity to 0 because:
|
||||
// in AttributeScale, there are only 2 possible distances:
|
||||
// 1. words in same attribute: in that the DB contains (0, word1, word2)
|
||||
// 2. words in different attributes: no DB entry for these two words.
|
||||
let proximity = 0;
|
||||
let docids = if let Some(docids) =
|
||||
self.db_cache.word_pair_proximity_docids.get(&(proximity, word1, word2))
|
||||
{
|
||||
docids
|
||||
.as_ref()
|
||||
.map(|d| CboRoaringBitmapCodec::bytes_decode_owned(d))
|
||||
.transpose()
|
||||
.map_err(heed::Error::Decoding)?
|
||||
} else {
|
||||
// Compute the distance at the attribute level and store it in the cache.
|
||||
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
||||
fids
|
||||
} else {
|
||||
self.index.fields_ids_map(self.txn)?.ids().collect()
|
||||
};
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for fid in fids {
|
||||
// for each field, intersect left word bitmap and right word bitmap,
|
||||
// then merge the result in a global bitmap before storing it in the cache.
|
||||
let word1_docids = self.get_db_word_fid_docids(word1, fid)?;
|
||||
let word2_docids = self.get_db_word_fid_docids(word2, fid)?;
|
||||
if let (Some(word1_docids), Some(word2_docids)) =
|
||||
(word1_docids, word2_docids)
|
||||
{
|
||||
docids |= word1_docids & word2_docids;
|
||||
}
|
||||
}
|
||||
let encoded = CboRoaringBitmapCodec::bytes_encode(&docids)
|
||||
.map(Cow::into_owned)
|
||||
.map(Cow::Owned)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)?;
|
||||
self.db_cache
|
||||
.word_pair_proximity_docids
|
||||
.insert((proximity, word1, word2), encoded);
|
||||
Some(docids)
|
||||
};
|
||||
|
||||
Ok(docids)
|
||||
}
|
||||
ProximityPrecision::WordScale => {
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(word2).as_str(),
|
||||
),
|
||||
&mut self.db_cache.word_pair_proximity_docids,
|
||||
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_db_word_pair_proximity_docids_len(
|
||||
@ -278,54 +333,95 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<u64>> {
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(word2).as_str(),
|
||||
),
|
||||
&mut self.db_cache.word_pair_proximity_docids,
|
||||
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||
)
|
||||
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
|
||||
ProximityPrecision::AttributeScale => Ok(self
|
||||
.get_db_word_pair_proximity_docids(word1, word2, proximity)?
|
||||
.map(|d| d.len())),
|
||||
ProximityPrecision::WordScale => {
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(word2).as_str(),
|
||||
),
|
||||
&mut self.db_cache.word_pair_proximity_docids,
|
||||
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_db_word_prefix_pair_proximity_docids(
|
||||
&mut self,
|
||||
word1: Interned<String>,
|
||||
prefix2: Interned<String>,
|
||||
proximity: u8,
|
||||
mut proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
let docids = match self
|
||||
.db_cache
|
||||
.word_prefix_pair_proximity_docids
|
||||
.entry((proximity, word1, prefix2))
|
||||
{
|
||||
Entry::Occupied(docids) => docids.get().clone(),
|
||||
Entry::Vacant(entry) => {
|
||||
// compute docids using prefix iter and store the result in the cache.
|
||||
let key = U8StrStrCodec::bytes_encode(&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(prefix2).as_str(),
|
||||
))
|
||||
.unwrap()
|
||||
.into_owned();
|
||||
let mut prefix_docids = RoaringBitmap::new();
|
||||
let remap_key_type = self
|
||||
.index
|
||||
.word_pair_proximity_docids
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter(self.txn, &key)?;
|
||||
for result in remap_key_type {
|
||||
let (_, docids) = result?;
|
||||
let proximity_precision = self.index.proximity_precision(self.txn)?.unwrap_or_default();
|
||||
if proximity_precision == ProximityPrecision::AttributeScale {
|
||||
// Force proximity to 0 because:
|
||||
// in AttributeScale, there are only 2 possible distances:
|
||||
// 1. words in same attribute: in that the DB contains (0, word1, word2)
|
||||
// 2. words in different attributes: no DB entry for these two words.
|
||||
proximity = 0;
|
||||
}
|
||||
|
||||
prefix_docids |= docids;
|
||||
let docids = if let Some(docids) =
|
||||
self.db_cache.word_prefix_pair_proximity_docids.get(&(proximity, word1, prefix2))
|
||||
{
|
||||
docids.clone()
|
||||
} else {
|
||||
let prefix_docids = match proximity_precision {
|
||||
ProximityPrecision::AttributeScale => {
|
||||
// Compute the distance at the attribute level and store it in the cache.
|
||||
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
||||
fids
|
||||
} else {
|
||||
self.index.fields_ids_map(self.txn)?.ids().collect()
|
||||
};
|
||||
let mut prefix_docids = RoaringBitmap::new();
|
||||
// for each field, intersect left word bitmap and right word bitmap,
|
||||
// then merge the result in a global bitmap before storing it in the cache.
|
||||
for fid in fids {
|
||||
let word1_docids = self.get_db_word_fid_docids(word1, fid)?;
|
||||
let prefix2_docids = self.get_db_word_prefix_fid_docids(prefix2, fid)?;
|
||||
if let (Some(word1_docids), Some(prefix2_docids)) =
|
||||
(word1_docids, prefix2_docids)
|
||||
{
|
||||
prefix_docids |= word1_docids & prefix2_docids;
|
||||
}
|
||||
}
|
||||
prefix_docids
|
||||
}
|
||||
entry.insert(Some(prefix_docids.clone()));
|
||||
Some(prefix_docids)
|
||||
}
|
||||
ProximityPrecision::WordScale => {
|
||||
// compute docids using prefix iter and store the result in the cache.
|
||||
let key = U8StrStrCodec::bytes_encode(&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(prefix2).as_str(),
|
||||
))
|
||||
.unwrap()
|
||||
.into_owned();
|
||||
let mut prefix_docids = RoaringBitmap::new();
|
||||
let remap_key_type = self
|
||||
.index
|
||||
.word_pair_proximity_docids
|
||||
.remap_key_type::<Bytes>()
|
||||
.prefix_iter(self.txn, &key)?;
|
||||
for result in remap_key_type {
|
||||
let (_, docids) = result?;
|
||||
|
||||
prefix_docids |= docids;
|
||||
}
|
||||
prefix_docids
|
||||
}
|
||||
};
|
||||
self.db_cache
|
||||
.word_prefix_pair_proximity_docids
|
||||
.insert((proximity, word1, prefix2), Some(prefix_docids.clone()));
|
||||
Some(prefix_docids)
|
||||
};
|
||||
Ok(docids)
|
||||
}
|
||||
@ -355,7 +451,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
(word, fid),
|
||||
&(self.word_interner.get(word).as_str(), fid),
|
||||
&mut self.db_cache.word_fid_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
|
||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||
)
|
||||
}
|
||||
|
||||
@ -374,7 +470,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
(word_prefix, fid),
|
||||
&(self.word_interner.get(word_prefix).as_str(), fid),
|
||||
&mut self.db_cache.word_prefix_fid_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||
)
|
||||
}
|
||||
|
||||
@ -388,7 +484,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
let remap_key_type = self
|
||||
.index
|
||||
.word_fid_docids
|
||||
.remap_types::<ByteSlice, ByteSlice>()
|
||||
.remap_types::<Bytes, Bytes>()
|
||||
.prefix_iter(self.txn, &key)?
|
||||
.remap_key_type::<StrBEU16Codec>();
|
||||
for result in remap_key_type {
|
||||
@ -414,7 +510,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
let remap_key_type = self
|
||||
.index
|
||||
.word_prefix_fid_docids
|
||||
.remap_types::<ByteSlice, ByteSlice>()
|
||||
.remap_types::<Bytes, Bytes>()
|
||||
.prefix_iter(self.txn, &key)?
|
||||
.remap_key_type::<StrBEU16Codec>();
|
||||
for result in remap_key_type {
|
||||
@ -442,7 +538,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
(word, position),
|
||||
&(self.word_interner.get(word).as_str(), position),
|
||||
&mut self.db_cache.word_position_docids,
|
||||
self.index.word_position_docids.remap_data_type::<ByteSlice>(),
|
||||
self.index.word_position_docids.remap_data_type::<Bytes>(),
|
||||
)
|
||||
}
|
||||
|
||||
@ -456,7 +552,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
(word_prefix, position),
|
||||
&(self.word_interner.get(word_prefix).as_str(), position),
|
||||
&mut self.db_cache.word_prefix_position_docids,
|
||||
self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(),
|
||||
self.index.word_prefix_position_docids.remap_data_type::<Bytes>(),
|
||||
)
|
||||
}
|
||||
|
||||
@ -470,7 +566,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
let remap_key_type = self
|
||||
.index
|
||||
.word_position_docids
|
||||
.remap_types::<ByteSlice, ByteSlice>()
|
||||
.remap_types::<Bytes, Bytes>()
|
||||
.prefix_iter(self.txn, &key)?
|
||||
.remap_key_type::<StrBEU16Codec>();
|
||||
for result in remap_key_type {
|
||||
@ -501,7 +597,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
let remap_key_type = self
|
||||
.index
|
||||
.word_prefix_position_docids
|
||||
.remap_types::<ByteSlice, ByteSlice>()
|
||||
.remap_types::<Bytes, Bytes>()
|
||||
.prefix_iter(self.txn, &key)?
|
||||
.remap_key_type::<StrBEU16Codec>();
|
||||
for result in remap_key_type {
|
||||
|
@ -1,4 +1,4 @@
|
||||
use heed::types::{ByteSlice, Str, Unit};
|
||||
use heed::types::{Bytes, Str, Unit};
|
||||
use heed::{Database, RoPrefix, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
@ -8,7 +8,7 @@ const DOCID_SIZE: usize = 4;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec,
|
||||
};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{Index, Result, SearchContext};
|
||||
|
||||
pub struct DistinctOutput {
|
||||
@ -71,7 +71,7 @@ pub fn distinct_single_docid(
|
||||
|
||||
/// Return all the docids containing the given value in the given field
|
||||
fn facet_value_docids(
|
||||
database: Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
database: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
txn: &RoTxn,
|
||||
field_id: u16,
|
||||
facet_value: &[u8],
|
||||
@ -87,12 +87,12 @@ fn facet_number_values<'a>(
|
||||
field_id: u16,
|
||||
index: &Index,
|
||||
txn: &'a RoTxn,
|
||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Unit>> {
|
||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Unit>> {
|
||||
let key = facet_values_prefix_key(field_id, docid);
|
||||
|
||||
let iter = index
|
||||
.field_id_docid_facet_f64s
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.remap_key_type::<Bytes>()
|
||||
.prefix_iter(txn, &key)?
|
||||
.remap_key_type();
|
||||
|
||||
@ -105,12 +105,12 @@ pub fn facet_string_values<'a>(
|
||||
field_id: u16,
|
||||
index: &Index,
|
||||
txn: &'a RoTxn,
|
||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Str>> {
|
||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Str>> {
|
||||
let key = facet_values_prefix_key(field_id, docid);
|
||||
|
||||
let iter = index
|
||||
.field_id_docid_facet_strings
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.remap_key_type::<Bytes>()
|
||||
.prefix_iter(txn, &key)?
|
||||
.remap_types();
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use heed::types::{ByteSlice, Unit};
|
||||
use heed::types::{Bytes, Unit};
|
||||
use heed::{RoPrefix, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
@ -34,7 +34,7 @@ fn facet_number_values<'a>(
|
||||
|
||||
let iter = index
|
||||
.field_id_docid_facet_f64s
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.remap_key_type::<Bytes>()
|
||||
.prefix_iter(txn, &key)?
|
||||
.remap_key_type();
|
||||
|
||||
@ -163,7 +163,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
// computing the distance between two points is expensive thus we cache the result
|
||||
documents
|
||||
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
|
||||
self.cached_sorted_docids.extend(documents.into_iter());
|
||||
self.cached_sorted_docids.extend(documents);
|
||||
};
|
||||
|
||||
Ok(())
|
||||
|
@ -228,7 +228,7 @@ impl<T> Ord for Interned<T> {
|
||||
|
||||
impl<T> PartialOrd for Interned<T> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
self.idx.partial_cmp(&other.idx)
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
@ -241,7 +241,7 @@ impl<T> PartialEq for Interned<T> {
|
||||
}
|
||||
impl<T> Clone for Interned<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self { idx: self.idx, _phantom: PhantomData }
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -50,9 +50,7 @@ use crate::distance::NDotProductPoint;
|
||||
use crate::error::FieldIdMapMissingEntry;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
use crate::{
|
||||
AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32,
|
||||
};
|
||||
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
|
||||
|
||||
/// A structure used throughout the execution of a search query.
|
||||
pub struct SearchContext<'ctx> {
|
||||
@ -434,14 +432,25 @@ pub fn execute_search(
|
||||
let mut search = Search::default();
|
||||
let docids = match ctx.index.vector_hnsw(ctx.txn)? {
|
||||
Some(hnsw) => {
|
||||
if let Some(expected_size) = hnsw.iter().map(|(_, point)| point.len()).next() {
|
||||
if vector.len() != expected_size {
|
||||
return Err(UserError::InvalidVectorDimensions {
|
||||
expected: expected_size,
|
||||
found: vector.len(),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
|
||||
let vector = NDotProductPoint::new(vector.clone());
|
||||
|
||||
let neighbors = hnsw.search(&vector, &mut search);
|
||||
|
||||
let mut docids = Vec::new();
|
||||
let mut uniq_docids = RoaringBitmap::new();
|
||||
for instant_distance::Item { distance: _, pid, point: _ } in neighbors {
|
||||
let index = BEU32::new(pid.into_inner());
|
||||
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
|
||||
let index = pid.into_inner();
|
||||
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap();
|
||||
if universe.contains(docid) && uniq_docids.insert(docid) {
|
||||
docids.push(docid);
|
||||
if docids.len() == (from + length) {
|
||||
@ -598,7 +607,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
|
||||
field: field.to_string(),
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
})?;
|
||||
}
|
||||
.into());
|
||||
}
|
||||
Member::Geo(_) if !sortable_fields.contains("_geo") => {
|
||||
let (valid_fields, hidden_fields) =
|
||||
@ -608,7 +618,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
|
||||
field: "_geo".to_string(),
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
})?;
|
||||
}
|
||||
.into());
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
@ -175,7 +175,7 @@ impl QueryTermSubset {
|
||||
|
||||
pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {
|
||||
let original = ctx.term_interner.get(self.original);
|
||||
let Some(use_prefix_db) = original.zero_typo.use_prefix_db else { return None };
|
||||
let use_prefix_db = original.zero_typo.use_prefix_db?;
|
||||
let word = match &self.zero_typo_subset {
|
||||
NTypoTermSubset::All => Some(use_prefix_db),
|
||||
NTypoTermSubset::Subset { words, phrases: _ } => {
|
||||
|
@ -29,7 +29,7 @@ use std::hash::Hash;
|
||||
pub use cheapest_paths::PathVisitor;
|
||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||
pub use dead_ends_cache::DeadEndsCache;
|
||||
pub use exactness::{ExactnessCondition, ExactnessGraph};
|
||||
pub use exactness::ExactnessGraph;
|
||||
pub use fid::{FidCondition, FidGraph};
|
||||
pub use position::{PositionCondition, PositionGraph};
|
||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||
|
@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
|
||||
use super::logger::SearchLogger;
|
||||
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
|
||||
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||
use crate::{FieldId, Index, Result};
|
||||
@ -100,11 +100,11 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
|
||||
let number_db = ctx
|
||||
.index
|
||||
.facet_id_f64_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||
let string_db = ctx
|
||||
.index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||
|
||||
let (number_iter, string_iter) = if self.is_ascending {
|
||||
let number_iter = ascending_facet_sort(
|
||||
|
@ -124,8 +124,7 @@ fn test_attribute_fid_simple() {
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
let document_ids_scores: Vec<_> =
|
||||
documents_ids.iter().zip(document_scores.into_iter()).collect();
|
||||
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
|
||||
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
|
||||
}
|
||||
|
||||
@ -142,7 +141,6 @@ fn test_attribute_fid_ngrams() {
|
||||
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
|
||||
let document_ids_scores: Vec<_> =
|
||||
documents_ids.iter().zip(document_scores.into_iter()).collect();
|
||||
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
|
||||
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user