Compare commits

...

25 Commits

Author SHA1 Message Date
c9860c7913 Small test fixes 2023-12-06 15:49:05 +01:00
03ffabe889 Add a new dump test 2023-12-06 15:49:05 +01:00
1f4fc9c229 Make the feature experimental 2023-12-06 15:49:05 +01:00
8cc3c54117 Add proximityPrecision setting in settings route 2023-12-06 15:49:05 +01:00
467b49153d Implement proximityPrecision setting on milli side 2023-12-06 15:49:02 +01:00
0c3fa8cbc4 Add tests on proximityPrecision setting 2023-12-06 14:59:23 +01:00
bddc168d83 List TODOs 2023-12-06 14:59:23 +01:00
84a36002d7 Merge #4239
4239: Remove the actix-web dependency from milli r=dureuill a=Kerollmops

Just remove actix-web from milli.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-11-29 10:19:40 +00:00
170e063b80 Remove the actix-web dependency from milli 2023-11-28 17:19:57 +01:00
6376c342c1 Merge #4223
4223: Update to heed 0.20 r=dureuill a=Kerollmops

This PR brings the v0.20-alpha.9 version of heed into Meilisearch 🎉 The main goal is to test it in a real environment to make the necessary changes if needed. We also want to merge it as soon as possible during the pre-release phase to ensure we catch bugs before the release.

Most of the calls to heed are the same as before, except:
 - The `PolyDatabase` has been replaced with a `Database<Unspecified, Unspecified>`. We replaced the `get<T, U>()` by a `remap<T, U>().get()` calls.
 - The `Database` `append(...)` method has been replaced with a `put_with_flags(PutFlags::APPEND, ...)`.
 - The `RwTxn<'e, 'p>` has been simplified into a `RwTxn<'e>`.
 - The `BytesEncode/Decode` traits return a `Result<_, BoxedError>` instead of an `Option<_>`.
 - We no longer need to wrap and unwrap the `BEU32` integer when storing/getting them from heed.

### TODO
 - [x] Create actual, simple error types instead of using strings in the codecs.

### Follow-up work
 - Move the codecs into another member crate (we depend on the uuid one in the meilitool crate).
 - Display the internal decoding error in the `SerializationError` internal error variant.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-11-28 13:39:44 +00:00
5b563f872b Move the clippy attribute on the problematic part of the code 2023-11-28 14:37:58 +01:00
ec9b52d608 Rename copy_to_path to copy_to_file 2023-11-28 14:32:30 +01:00
34c67ac389 Remove the possibility to fail fetching the env info 2023-11-28 14:31:23 +01:00
d050c9b4ae Only remap the main database once 2023-11-28 14:27:30 +01:00
7dd1226faf Clarify an unreachable unwrap 2023-11-28 14:26:31 +01:00
1575456594 Further reduce an async block 2023-11-28 14:23:32 +01:00
add2ceef67 Introduce error types to avoid panics 2023-11-28 14:21:49 +01:00
548c8247c2 Create and use real error types in the codecs 2023-11-28 10:11:17 +01:00
181ca48482 Merge #4234
4234: Fix puffin in the index scheduler r=dureuill a=irevoire

Currently, we can't compile the index scheduler without this feature.

It could be cool to specify the dependencies in the main workspace cargo toml like quickwit does to avoid this kind of error in the future; https://github.com/quickwit-oss/quickwit/blob/main/quickwit/Cargo.toml#L41

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-11-28 08:23:48 +00:00
d32eb11329 Move to the v0.20.0-alpha.9 of heed 2023-11-27 11:52:22 +01:00
58dac8af42 Remove the panics and unwraps 2023-11-23 15:00:48 +01:00
0dbf1a16ff Make clippy happy 2023-11-23 14:11:38 +01:00
462b4c0080 Fix the tests 2023-11-23 12:07:35 +01:00
0d4482625a Make the changes to use heed v0.20-alpha.6 2023-11-23 11:43:58 +01:00
56a0d91ecd Update the heed dependency and lock file 2023-11-22 15:11:09 +01:00
99 changed files with 1823 additions and 971 deletions

103
Cargo.lock generated
View File

@ -520,6 +520,9 @@ name = "bitflags"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
dependencies = [
"serde",
]
[[package]]
name = "block-buffer"
@ -1255,6 +1258,15 @@ dependencies = [
"syn 2.0.28",
]
[[package]]
name = "doxygen-rs"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505"
dependencies = [
"phf",
]
[[package]]
name = "dump"
version = "1.5.0"
@ -1811,36 +1823,40 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "heed"
version = "0.12.7"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632"
version = "0.20.0-alpha.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934"
dependencies = [
"bitflags 2.3.3",
"bytemuck",
"byteorder",
"heed-traits",
"heed-types",
"libc",
"lmdb-rkv-sys",
"lmdb-master-sys",
"once_cell",
"page_size 0.4.2",
"page_size 0.6.0",
"synchronoise",
"url",
"zerocopy",
]
[[package]]
name = "heed-traits"
version = "0.7.0"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632"
version = "0.20.0-alpha.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298"
[[package]]
name = "heed-types"
version = "0.7.2"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632"
version = "0.20.0-alpha.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a"
dependencies = [
"bincode",
"byteorder",
"heed-traits",
"serde",
"serde_json",
"zerocopy",
]
[[package]]
@ -2968,11 +2984,13 @@ dependencies = [
]
[[package]]
name = "lmdb-rkv-sys"
version = "0.15.1"
source = "git+https://github.com/meilisearch/lmdb-rs#501aa34a1ab7f092e3ff54a6c22ff6c55931a2d8"
name = "lmdb-master-sys"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd"
dependencies = [
"cc",
"doxygen-rs",
"libc",
"pkg-config",
]
@ -3472,9 +3490,9 @@ dependencies = [
[[package]]
name = "page_size"
version = "0.4.2"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd"
checksum = "1b7663cbd190cfd818d08efa8497f6cd383076688c49a391ef7c0d03cd12b561"
dependencies = [
"libc",
"winapi",
@ -3482,9 +3500,9 @@ dependencies = [
[[package]]
name = "page_size"
version = "0.5.0"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b7663cbd190cfd818d08efa8497f6cd383076688c49a391ef7c0d03cd12b561"
checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da"
dependencies = [
"libc",
"winapi",
@ -3630,6 +3648,7 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_macros",
"phf_shared",
]
@ -3653,6 +3672,19 @@ dependencies = [
"rand",
]
[[package]]
name = "phf_macros"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
"syn 2.0.28",
]
[[package]]
name = "phf_shared"
version = "0.11.2"
@ -4479,18 +4511,6 @@ dependencies = [
"crossbeam-queue",
]
[[package]]
name = "synstructure"
version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
"unicode-xid",
]
[[package]]
name = "synstructure"
version = "0.13.0"
@ -5359,28 +5379,7 @@ dependencies = [
"proc-macro2",
"quote",
"syn 2.0.28",
"synstructure 0.13.0",
]
[[package]]
name = "zerocopy"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6580539ad917b7c026220c4b3f2c08d52ce54d6ce0dc491e66002e35388fab46"
dependencies = [
"byteorder",
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb"
dependencies = [
"proc-macro2",
"syn 1.0.109",
"synstructure 0.12.6",
"synstructure",
]
[[package]]
@ -5401,7 +5400,7 @@ dependencies = [
"proc-macro2",
"quote",
"syn 2.0.28",
"synstructure 0.13.0",
"synstructure",
]
[[package]]

View File

@ -36,7 +36,7 @@ fn setup_index() -> Index {
}
fn setup_settings<'t>(
wtxn: &mut RwTxn<'t, '_>,
wtxn: &mut RwTxn<'t>,
index: &'t Index,
primary_key: &str,
searchable_fields: &[&str],

View File

@ -267,6 +267,7 @@ pub(crate) mod test {
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::Set(FacetingSettings {
max_values_per_facet: Setting::Set(111),

View File

@ -345,6 +345,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
dictionary: v6::Setting::NotSet,
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
proximity_precision: v6::Setting::NotSet,
typo_tolerance: match settings.typo_tolerance {
v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
enabled: typo.enabled.into(),

View File

@ -13,12 +13,12 @@ use crate::{Result, Version};
mod compat;
pub(self) mod v1;
pub(self) mod v2;
pub(self) mod v3;
pub(self) mod v4;
pub(self) mod v5;
pub(self) mod v6;
mod v1;
mod v2;
mod v3;
mod v4;
mod v5;
mod v6;
pub type Document = serde_json::Map<String, serde_json::Value>;
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;

View File

@ -56,8 +56,7 @@ pub enum RankingRule {
Desc(String),
}
static ASC_DESC_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(asc|desc)\(([\w_-]+)\)").unwrap());
impl FromStr for RankingRule {
type Err = ();

View File

@ -564,10 +564,10 @@ pub mod tests {
#[test]
fn parse_escaped() {
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequencies
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
}

View File

@ -270,8 +270,8 @@ pub mod test {
("aaaa", "", rtok("", "aaaa"), "aaaa"),
(r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
(r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
(r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
(r"aa\\\aa", r#""#, rtok("", r"aa\\\aa"), r"aa\\\aa"),
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r"aa\\"), r"aa\\"),
(r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
(r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
];
@ -301,12 +301,12 @@ pub mod test {
);
// simple quote
assert_eq!(
unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
unescape(Span::new_extra(r"Hello \'World\'", ""), '\''),
r#"Hello 'World'"#.to_string()
);
assert_eq!(
unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
r#"Hello \\'World\\'"#.to_string()
unescape(Span::new_extra(r"Hello \\\'World\\\'", ""), '\''),
r"Hello \\'World\\'".to_string()
);
}
@ -335,19 +335,19 @@ pub mod test {
("\"cha'nnel\"", "cha'nnel", false),
("I'm tamo", "I", false),
// escaped thing but not quote
(r#""\\""#, r#"\"#, true),
(r#""\\\\\\""#, r#"\\\"#, true),
(r#""aa\\aa""#, r#"aa\aa"#, true),
(r#""\\""#, r"\", true),
(r#""\\\\\\""#, r"\\\", true),
(r#""aa\\aa""#, r"aa\aa", true),
// with double quote
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
(r#""\"\"""#, r#""""#, true),
// with simple quote
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
(r"'Hello \'world\''", r#"Hello 'world'"#, true),
(r"'Hello \\\'world\\\''", r"Hello \'world\'", true),
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
(r#"'\'\''"#, r#"''"#, true),
(r"'\'\''", r#"''"#, true),
];
for (input, expected, escaped) in test_case {

View File

@ -113,7 +113,7 @@ fn main() {
index.documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed);
}
wtxn.abort().unwrap();
wtxn.abort();
});
if let err @ Err(_) = handle.join() {
stop.store(true, Ordering::Relaxed);

View File

@ -32,7 +32,7 @@ use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
};
use meilisearch_types::milli::{self, Filter, BEU32};
use meilisearch_types::milli::{self, Filter};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@ -715,7 +715,7 @@ impl IndexScheduler {
// 2. Snapshot the index-scheduler LMDB env
//
// When we call copy_to_path, LMDB opens a read transaction by itself,
// When we call copy_to_file, LMDB opens a read transaction by itself,
// we can't provide our own. It is an issue as we would like to know
// the update files to copy but new ones can be enqueued between the copy
// of the env and the new transaction we open to retrieve the enqueued tasks.
@ -728,7 +728,7 @@ impl IndexScheduler {
// 2.1 First copy the LMDB env of the index-scheduler
let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?;
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?;
@ -753,7 +753,7 @@ impl IndexScheduler {
let index = self.index_mapper.index(&rtxn, name)?;
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?;
index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
}
drop(rtxn);
@ -766,7 +766,7 @@ impl IndexScheduler {
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.auth_path)?;
auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot
// 5.1 Find the original name of the database
@ -1106,7 +1106,7 @@ impl IndexScheduler {
for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
swap_index_uid_in_task(&mut task, (lhs, rhs));
self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?;
self.all_tasks.put(wtxn, &task_id, &task)?;
}
// 4. remove the task from indexuid = before_name
@ -1132,7 +1132,7 @@ impl IndexScheduler {
/// The list of processed tasks.
fn apply_index_operation<'i>(
&self,
index_wtxn: &mut RwTxn<'i, '_>,
index_wtxn: &mut RwTxn<'i>,
index: &'i Index,
operation: IndexOperation,
) -> Result<Vec<Task>> {
@ -1343,6 +1343,9 @@ impl IndexScheduler {
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
let checked_settings = settings.clone().check();
if checked_settings.proximity_precision.set().is_some() {
self.features.features().check_proximity_precision()?;
}
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
apply_settings_to_builder(&checked_settings, &mut builder);
@ -1479,10 +1482,9 @@ impl IndexScheduler {
}
for task in to_delete_tasks.iter() {
self.all_tasks.delete(wtxn, &BEU32::new(task))?;
self.all_tasks.delete(wtxn, &task)?;
}
for canceled_by in affected_canceled_by {
let canceled_by = BEU32::new(canceled_by);
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
tasks -= &to_delete_tasks;
if tasks.is_empty() {
@ -1530,14 +1532,14 @@ impl IndexScheduler {
task.details = task.details.map(|d| d.to_failed());
self.update_task(wtxn, &task)?;
}
self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?;
self.canceled_by.put(wtxn, &cancel_task_id, &tasks_to_cancel)?;
Ok(content_files_to_delete)
}
}
fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a, '_>,
wtxn: &mut RwTxn<'a>,
filter: &serde_json::Value,
indexer_config: &IndexerConfig,
must_stop_processing: MustStopProcessing,

View File

@ -81,6 +81,19 @@ impl RoFeatures {
.into())
}
}
pub fn check_proximity_precision(&self) -> Result<()> {
if self.runtime.proximity_precision {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Using `proximityPrecision` index setting",
feature: "proximity precision",
issue_link: "https://github.com/orgs/meilisearch/discussions/710",
}
.into())
}
}
}
impl FeatureData {

View File

@ -1,12 +1,8 @@
/// the map size to use when we don't succeed in reading it in indexes.
const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
use std::collections::BTreeMap;
use std::path::Path;
use std::time::Duration;
use meilisearch_types::heed::flags::Flags;
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
use meilisearch_types::milli::Index;
use time::OffsetDateTime;
use uuid::Uuid;
@ -236,7 +232,7 @@ impl IndexMap {
enable_mdb_writemap: bool,
map_size_growth: usize,
) {
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
let map_size = index.map_size() + map_size_growth;
let closing_event = index.prepare_for_closing();
let generation = self.next_generation();
self.unavailable.insert(
@ -309,7 +305,7 @@ fn create_or_open_index(
options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024);
if enable_mdb_writemap {
unsafe { options.flag(Flags::MdbWriteMap) };
unsafe { options.flags(EnvFlags::WRITE_MAP) };
}
if let Some((created, updated)) = date {
@ -388,7 +384,7 @@ mod tests {
fn assert_index_size(index: Index, expected: usize) {
let expected = clamp_to_page_size(expected);
let index_map_size = index.map_size().unwrap();
let index_map_size = index.map_size();
assert_eq!(index_map_size, expected);
}
}

View File

@ -1,7 +1,7 @@
use std::collections::BTreeSet;
use std::fmt::Write;
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Task};
@ -115,7 +115,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
snap
}
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
@ -125,10 +125,7 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson
snap
}
pub fn snapshot_date_db(
rtxn: &RoTxn,
db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
) -> String {
pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
@ -248,10 +245,7 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>)
}
snap
}
pub fn snapshot_canceled_by(
rtxn: &RoTxn,
db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
) -> String {
pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {

View File

@ -47,8 +47,9 @@ pub use features::RoFeatures;
use file_store::FileStore;
use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
@ -64,8 +65,7 @@ use uuid::Uuid;
use crate::index_mapper::IndexMapper;
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
pub(crate) type BEI128 =
meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
pub(crate) type BEI128 = I128<BE>;
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
///
@ -278,7 +278,7 @@ pub struct IndexScheduler {
pub(crate) file_store: FileStore,
// The main database, it contains all the tasks accessible by their Id.
pub(crate) all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>>,
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
/// All the tasks ids grouped by their status.
// TODO we should not be able to serialize a `Status::Processing` in this database.
@ -289,16 +289,16 @@ pub struct IndexScheduler {
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
/// Store the tasks that were canceled by a task uid
pub(crate) canceled_by: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
/// Store the task ids of tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the task ids of finished tasks which started being processed at a specific date
pub(crate) started_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the task ids of tasks which finished at a specific date
pub(crate) finished_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
/// In charge of creating, opening, storing and returning indexes.
pub(crate) index_mapper: IndexMapper,
@ -730,9 +730,7 @@ impl IndexScheduler {
if let Some(canceled_by) = &query.canceled_by {
let mut all_canceled_tasks = RoaringBitmap::new();
for cancel_task_uid in canceled_by {
if let Some(canceled_by_uid) =
self.canceled_by.get(rtxn, &BEU32::new(*cancel_task_uid))?
{
if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, cancel_task_uid)? {
all_canceled_tasks |= canceled_by_uid;
}
}
@ -983,7 +981,7 @@ impl IndexScheduler {
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
&& (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
{
return Err(Error::NoSpaceLeftInTaskQueue);
}
@ -1009,7 +1007,7 @@ impl IndexScheduler {
// Get rid of the mutability.
let task = task;
self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?;
self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?;
for index in task.indexes() {
self.update_index(&mut wtxn, index, |bitmap| {
@ -1187,7 +1185,7 @@ impl IndexScheduler {
| Err(Error::AbortedTask) => {
#[cfg(test)]
self.breakpoint(Breakpoint::AbortedIndexation);
wtxn.abort().map_err(Error::HeedTransaction)?;
wtxn.abort();
// We make sure that we don't call `stop_processing` on the `processing_tasks`,
// this is because we want to let the next tick call `create_next_batch` and keep
@ -1208,7 +1206,7 @@ impl IndexScheduler {
let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen
self.index_mapper.resize_index(&wtxn, &index_uid)?;
wtxn.abort().map_err(Error::HeedTransaction)?;
wtxn.abort();
return Ok(TickOutcome::TickAgain(0));
}
@ -1354,7 +1352,7 @@ impl IndexScheduler {
pub struct Dump<'a> {
index_scheduler: &'a IndexScheduler,
wtxn: RwTxn<'a, 'a>,
wtxn: RwTxn<'a>,
indexes: HashMap<String, RoaringBitmap>,
statuses: HashMap<Status, RoaringBitmap>,
@ -1469,7 +1467,7 @@ impl<'a> Dump<'a> {
},
};
self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?;
self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
for index in task.indexes() {
match self.indexes.get_mut(index) {
@ -1511,8 +1509,8 @@ impl<'a> Dump<'a> {
}
}
self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid);
self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid);
self.statuses.entry(task.status).or_default().insert(task.uid);
self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);
Ok(task)
}

View File

@ -3,9 +3,9 @@
use std::collections::{BTreeSet, HashSet};
use std::ops::Bound;
use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
use meilisearch_types::heed::types::DecodeIgnore;
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::milli::CboRoaringBitmapCodec;
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
@ -18,7 +18,7 @@ impl IndexScheduler {
}
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k.get() + 1))
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
}
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
@ -26,7 +26,7 @@ impl IndexScheduler {
}
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
Ok(self.all_tasks.get(rtxn, &task_id)?)
}
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
@ -88,7 +88,7 @@ impl IndexScheduler {
}
}
self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
self.all_tasks.put(wtxn, &task.uid, task)?;
Ok(())
}
@ -169,11 +169,11 @@ impl IndexScheduler {
pub(crate) fn insert_task_datetime(
wtxn: &mut RwTxn,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
database: Database<BEI128, CboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
let timestamp = BEI128::new(time.unix_timestamp_nanos());
let timestamp = time.unix_timestamp_nanos();
let mut task_ids = database.get(wtxn, &timestamp)?.unwrap_or_default();
task_ids.insert(task_id);
database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(task_ids))?;
@ -182,11 +182,11 @@ pub(crate) fn insert_task_datetime(
pub(crate) fn remove_task_datetime(
wtxn: &mut RwTxn,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
database: Database<BEI128, CboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
let timestamp = BEI128::new(time.unix_timestamp_nanos());
let timestamp = time.unix_timestamp_nanos();
if let Some(mut existing) = database.get(wtxn, &timestamp)? {
existing.remove(task_id);
if existing.is_empty() {
@ -202,7 +202,7 @@ pub(crate) fn remove_task_datetime(
pub(crate) fn keep_tasks_within_datetimes(
rtxn: &RoTxn,
tasks: &mut RoaringBitmap,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
database: Database<BEI128, CboRoaringBitmapCodec>,
after: Option<OffsetDateTime>,
before: Option<OffsetDateTime>,
) -> Result<()> {
@ -213,8 +213,8 @@ pub(crate) fn keep_tasks_within_datetimes(
(Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
};
let mut collected_task_ids = RoaringBitmap::new();
let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
let start = map_bound(start, |b| b.unix_timestamp_nanos());
let end = map_bound(end, |b| b.unix_timestamp_nanos());
let iter = database.range(rtxn, &(start, end))?;
for r in iter {
let (_timestamp, task_ids) = r?;
@ -337,8 +337,6 @@ impl IndexScheduler {
let rtxn = self.env.read_txn().unwrap();
for task in self.all_tasks.iter(&rtxn).unwrap() {
let (task_id, task) = task.unwrap();
let task_id = task_id.get();
let task_index_uid = task.index_uid().map(ToOwned::to_owned);
let Task {
@ -361,16 +359,13 @@ impl IndexScheduler {
.unwrap()
.contains(task.uid));
}
let db_enqueued_at = self
.enqueued_at
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
let db_enqueued_at =
self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap();
assert!(db_enqueued_at.contains(task_id));
if let Some(started_at) = started_at {
let db_started_at = self
.started_at
.get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
.get(&rtxn, &started_at.unix_timestamp_nanos())
.unwrap()
.unwrap();
assert!(db_started_at.contains(task_id));
@ -378,7 +373,7 @@ impl IndexScheduler {
if let Some(finished_at) = finished_at {
let db_finished_at = self
.finished_at
.get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
.get(&rtxn, &finished_at.unix_timestamp_nanos())
.unwrap()
.unwrap();
assert!(db_finished_at.contains(task_id));

View File

@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::convert::TryInto;
use meilisearch_types::heed::{BytesDecode, BytesEncode};
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
use uuid::Uuid;
/// A heed codec for value of struct Uuid.
@ -10,15 +10,15 @@ pub struct UuidCodec;
impl<'a> BytesDecode<'a> for UuidCodec {
type DItem = Uuid;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
bytes.try_into().ok().map(Uuid::from_bytes)
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
}
}
impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
Some(Cow::Borrowed(item.as_bytes()))
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
}
}

View File

@ -4,17 +4,20 @@ use std::collections::HashSet;
use std::convert::{TryFrom, TryInto};
use std::fs::create_dir_all;
use std::path::Path;
use std::result::Result as StdResult;
use std::str;
use std::str::FromStr;
use std::sync::Arc;
use hmac::{Hmac, Mac};
use meilisearch_types::heed::BoxedError;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId;
use meilisearch_types::milli;
use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use sha2::Sha256;
use thiserror::Error;
use time::OffsetDateTime;
use uuid::fmt::Hyphenated;
use uuid::Uuid;
@ -30,7 +33,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat
#[derive(Clone)]
pub struct HeedAuthStore {
env: Arc<Env>,
keys: Database<ByteSlice, SerdeJson<Key>>,
keys: Database<Bytes, SerdeJson<Key>>,
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
should_close_on_drop: bool,
}
@ -276,7 +279,7 @@ impl HeedAuthStore {
fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
let mut iter = self
.action_keyid_index_expiration
.remap_types::<ByteSlice, DecodeIgnore>()
.remap_types::<Bytes, DecodeIgnore>()
.prefix_iter_mut(wtxn, key.as_bytes())?;
while iter.next().transpose()?.is_some() {
// safety: we don't keep references from inside the LMDB database.
@ -294,23 +297,24 @@ pub struct KeyIdActionCodec;
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
type DItem = (KeyId, Action, Option<&'a [u8]>);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (key_id_bytes, action_bytes) = try_split_array_at(bytes)?;
let (action_bytes, index) = match try_split_array_at(action_bytes)? {
(action, []) => (action, None),
(action, index) => (action, Some(index)),
};
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
let (key_id_bytes, action_bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let (&action_byte, index) =
match try_split_array_at(action_bytes).ok_or(SliceTooShortError)? {
([action], []) => (action, None),
([action], index) => (action, Some(index)),
};
let key_id = Uuid::from_bytes(*key_id_bytes);
let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?;
let action = Action::from_repr(action_byte).ok_or(InvalidActionError { action_byte })?;
Some((key_id, action, index))
Ok((key_id, action, index))
}
}
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::new();
bytes.extend_from_slice(key_id.as_bytes());
@ -320,10 +324,20 @@ impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
bytes.extend_from_slice(index);
}
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}
#[derive(Error, Debug)]
#[error("the slice is too short")]
pub struct SliceTooShortError;
#[derive(Error, Debug)]
#[error("cannot construct a valid Action from {action_byte}")]
pub struct InvalidActionError {
pub action_byte: u8,
}
pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String {
// format uid as hyphenated allowing user to generate their own keys.
let mut uid_buffer = [0; Hyphenated::LENGTH];

View File

@ -15,7 +15,7 @@ actix-web = { version = "4.3.1", default-features = false }
anyhow = "1.0.70"
convert_case = "0.6.0"
csv = "1.2.1"
deserr = { version = "0.6.0", features = ["actix-web"]}
deserr = { version = "0.6.0", features = ["actix-web"] }
either = { version = "1.8.1", features = ["serde"] }
enum-iterator = "1.4.0"
file-store = { path = "../file-store" }

View File

@ -252,6 +252,7 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
@ -386,11 +387,11 @@ impl ErrorCode for HeedError {
HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile,
HeedError::Io(e) => e.error_code(),
HeedError::Mdb(_)
| HeedError::Encoding
| HeedError::Decoding
| HeedError::Encoding(_)
| HeedError::Decoding(_)
| HeedError::InvalidDatabaseTyping
| HeedError::DatabaseClosing
| HeedError::BadOpenOptions => Code::Internal,
| HeedError::BadOpenOptions { .. } => Code::Internal,
}
}
}

View File

@ -7,6 +7,7 @@ pub struct RuntimeTogglableFeatures {
pub vector_store: bool,
pub metrics: bool,
pub export_puffin_reports: bool,
pub proximity_precision: bool,
}
#[derive(Default, Debug, Clone, Copy)]

View File

@ -8,6 +8,7 @@ use std::str::FromStr;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer;
use milli::proximity::ProximityPrecision;
use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize, Serializer};
@ -186,6 +187,9 @@ pub struct Settings<T> {
#[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
pub distinct_attribute: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
pub proximity_precision: Setting<ProximityPrecisionView>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub typo_tolerance: Setting<TypoSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
@ -214,6 +218,7 @@ impl Settings<Checked> {
separator_tokens: Setting::Reset,
dictionary: Setting::Reset,
distinct_attribute: Setting::Reset,
proximity_precision: Setting::Reset,
typo_tolerance: Setting::Reset,
faceting: Setting::Reset,
pagination: Setting::Reset,
@ -234,6 +239,7 @@ impl Settings<Checked> {
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
@ -252,6 +258,7 @@ impl Settings<Checked> {
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
@ -296,6 +303,7 @@ impl Settings<Unchecked> {
separator_tokens: self.separator_tokens,
dictionary: self.dictionary,
distinct_attribute: self.distinct_attribute,
proximity_precision: self.proximity_precision,
typo_tolerance: self.typo_tolerance,
faceting: self.faceting,
pagination: self.pagination,
@ -390,6 +398,12 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match settings.proximity_precision {
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
Setting::Reset => builder.reset_proximity_precision(),
Setting::NotSet => (),
}
match settings.typo_tolerance {
Setting::Set(ref value) => {
match value.enabled {
@ -509,6 +523,8 @@ pub fn settings(
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
let proximity_precision = index.proximity_precision(rtxn)?.map(ProximityPrecisionView::from);
let synonyms = index.user_defined_synonyms(rtxn)?;
let min_typo_word_len = MinWordSizeTyposSetting {
@ -532,7 +548,10 @@ pub fn settings(
let faceting = FacetingSettings {
max_values_per_facet: Setting::Set(
index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET),
index
.max_values_per_facet(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET),
),
sort_facet_values_by: Setting::Set(
index
@ -545,7 +564,10 @@ pub fn settings(
let pagination = PaginationSettings {
max_total_hits: Setting::Set(
index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
index
.pagination_max_total_hits(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
),
};
@ -569,6 +591,10 @@ pub fn settings(
Some(field) => Setting::Set(field),
None => Setting::Reset,
},
proximity_precision: match proximity_precision {
Some(precision) => Setting::Set(precision),
None => Setting::Reset,
},
synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting),
@ -673,6 +699,31 @@ impl From<RankingRuleView> for Criterion {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
pub enum ProximityPrecisionView {
WordScale,
AttributeScale,
}
impl From<ProximityPrecision> for ProximityPrecisionView {
fn from(value: ProximityPrecision) -> Self {
match value {
ProximityPrecision::WordScale => ProximityPrecisionView::WordScale,
ProximityPrecision::AttributeScale => ProximityPrecisionView::AttributeScale,
}
}
}
impl From<ProximityPrecisionView> for ProximityPrecision {
fn from(value: ProximityPrecisionView) -> Self {
match value {
ProximityPrecisionView::WordScale => ProximityPrecision::WordScale,
ProximityPrecisionView::AttributeScale => ProximityPrecision::AttributeScale,
}
}
}
#[cfg(test)]
pub(crate) mod test {
use super::*;
@ -692,6 +743,7 @@ pub(crate) mod test {
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
@ -716,6 +768,7 @@ pub(crate) mod test {
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,

View File

@ -39,7 +39,7 @@ byte-unit = { version = "4.0.19", default-features = false, features = [
bytes = "1.4.0"
clap = { version = "4.2.1", features = ["derive", "env"] }
crossbeam-channel = "0.5.8"
deserr = { version = "0.6.0", features = ["actix-web"]}
deserr = { version = "0.6.0", features = ["actix-web"] }
dump = { path = "../dump" }
either = "1.8.1"
env_logger = "0.10.0"

View File

@ -48,6 +48,8 @@ pub struct RuntimeTogglableFeatures {
pub metrics: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>,
#[deserr(default)]
pub proximity_precision: Option<bool>,
}
async fn patch_features(
@ -70,6 +72,10 @@ async fn patch_features(
.0
.export_puffin_reports
.unwrap_or(old_features.export_puffin_reports),
proximity_precision: new_features
.0
.proximity_precision
.unwrap_or(old_features.proximity_precision),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
@ -80,6 +86,7 @@ async fn patch_features(
vector_store,
metrics,
export_puffin_reports,
proximity_precision,
} = new_features;
analytics.publish(
@ -89,6 +96,7 @@ async fn patch_features(
"vector_store": vector_store,
"metrics": metrics,
"export_puffin_reports": export_puffin_reports,
"proximity_precision": proximity_precision,
}),
Some(&req),
);

View File

@ -3,7 +3,7 @@ use std::io::ErrorKind;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
use bstr::ByteSlice;
use bstr::ByteSlice as _;
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr;
use futures::StreamExt;

View File

@ -78,6 +78,7 @@ macro_rules! make_setting_route {
let body = body.into_inner();
#[allow(clippy::redundant_closure_call)]
$analytics(&body, &req);
let new_settings = Settings {
@ -434,6 +435,30 @@ make_setting_route!(
}
);
make_setting_route!(
"/proximity-precision",
put,
meilisearch_types::settings::ProximityPrecisionView,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
>,
proximity_precision,
"proximityPrecision",
analytics,
|precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"ProximityPrecision Updated".to_string(),
json!({
"proximity_precision": {
"set": precision.is_some(),
}
}),
Some(req),
);
}
);
make_setting_route!(
"/ranking-rules",
put,
@ -540,6 +565,7 @@ generate_configure!(
displayed_attributes,
searchable_attributes,
distinct_attribute,
proximity_precision,
stop_words,
separator_tokens,
non_separator_tokens,
@ -593,6 +619,9 @@ pub async fn update_all(
"distinct_attribute": {
"set": new_settings.distinct_attribute.as_ref().set().is_some()
},
"proximity_precision": {
"set": new_settings.proximity_precision.as_ref().set().is_some()
},
"typo_tolerance": {
"enabled": new_settings.typo_tolerance
.as_ref()

View File

@ -46,49 +46,46 @@ pub async fn multi_search_with_post(
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
// changes.
let search_results: Result<_, (ResponseError, usize)> = (|| {
async {
let mut search_results = Vec::with_capacity(queries.len());
for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
let search_results: Result<_, (ResponseError, usize)> = async {
let mut search_results = Vec::with_capacity(queries.len());
for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
{
debug!("multi-search #{query_index}: called with params: {:?}", query);
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
return Err(AuthenticationError::InvalidToken).with_index(query_index);
}
// Apply search rules from tenant token
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
{
debug!("multi-search #{query_index}: called with params: {:?}", query);
add_search_rules(&mut query, search_rules);
}
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
return Err(AuthenticationError::InvalidToken).with_index(query_index);
}
// Apply search rules from tenant token
if let Some(search_rules) =
index_scheduler.filters().get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let index = index_scheduler
.index(&index_uid)
.map_err(|err| {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err
})
.with_index(query_index)?;
let index = index_scheduler
.index(&index_uid)
.map_err(|err| {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err
})
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features))
.await
.with_index(query_index)?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query, features))
.await
.with_index(query_index)?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
});
}
Ok(search_results)
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
});
}
})()
Ok(search_results)
}
.await;
if search_results.is_ok() {

View File

@ -360,6 +360,7 @@ fn prepare_search<'t>(
let max_total_hits = index
.pagination_max_total_hits(rtxn)
.map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination);
@ -586,6 +587,7 @@ pub fn perform_search(
let max_values_by_facet = index
.max_values_per_facet(&rtxn)
.map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet);

View File

@ -20,6 +20,8 @@ pub enum GetDump {
RubyGemsWithSettingsV4,
TestV5,
TestV6WithExperimental,
}
impl GetDump {
@ -68,6 +70,10 @@ impl GetDump {
GetDump::TestV5 => {
exist_relative_path!("tests/assets/v5_v0.28.0_test_dump.dump").into()
}
GetDump::TestV6WithExperimental => exist_relative_path!(
"tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump"
)
.into(),
}
}
}

View File

@ -59,6 +59,7 @@ async fn import_dump_v1_movie_raw() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -219,6 +220,7 @@ async fn import_dump_v1_movie_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -365,6 +367,7 @@ async fn import_dump_v1_rubygems_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -497,6 +500,7 @@ async fn import_dump_v2_movie_raw() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -641,6 +645,7 @@ async fn import_dump_v2_movie_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -784,6 +789,7 @@ async fn import_dump_v2_rubygems_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -916,6 +922,7 @@ async fn import_dump_v3_movie_raw() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1060,6 +1067,7 @@ async fn import_dump_v3_movie_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1203,6 +1211,7 @@ async fn import_dump_v3_rubygems_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1335,6 +1344,7 @@ async fn import_dump_v4_movie_raw() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1479,6 +1489,7 @@ async fn import_dump_v4_movie_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1622,6 +1633,7 @@ async fn import_dump_v4_rubygems_with_settings() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
@ -1810,3 +1822,108 @@ async fn import_dump_v5() {
json_string!(tasks, { ".results[].details.dumpUid" => "[uid]", ".results[].duration" => "[duration]" , ".results[].startedAt" => "[date]" , ".results[].finishedAt" => "[date]" })
);
}
#[actix_rt::test]
async fn import_dump_v6_containing_experimental_features() {
let temp = tempfile::tempdir().unwrap();
let options = Opt {
import_dump: Some(GetDump::TestV6WithExperimental.path()),
..default_settings(temp.path())
};
let mut server = Server::new_auth_with_options(options, temp).await;
server.use_api_key("MASTER_KEY");
let (indexes, code) = server.list_indexes(None, None).await;
assert_eq!(code, 200, "{indexes}");
assert_eq!(indexes["results"].as_array().unwrap().len(), 1);
assert_eq!(indexes["results"][0]["uid"], json!("movies"));
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
let (response, code) = server.get_features().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": false
}
"###);
let index = server.index("movies");
let (response, code) = index.settings().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity"
],
"stopWords": [],
"nonSeparatorTokens": [],
"separatorTokens": [],
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "attributeScale",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100,
"sortFacetValuesBy": {
"*": "alpha"
}
},
"pagination": {
"maxTotalHits": 1000
}
}
"###);
// the expected order is [1, 3, 2] instead of [3, 1, 2]
// because the attribute scale doesn't make the difference between 1 and 3.
index
.search(json!({"q": "the soup of day"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish"
}
]
"###);
})
.await;
}

View File

@ -21,7 +21,8 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false
"exportPuffinReports": false,
"proximityPrecision": false
}
"###);
@ -33,7 +34,8 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"exportPuffinReports": false,
"proximityPrecision": false
}
"###);
@ -45,7 +47,8 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"exportPuffinReports": false,
"proximityPrecision": false
}
"###);
@ -58,7 +61,8 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"exportPuffinReports": false,
"proximityPrecision": false
}
"###);
@ -71,7 +75,8 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false
"exportPuffinReports": false,
"proximityPrecision": false
}
"###);
}
@ -91,7 +96,8 @@ async fn experimental_feature_metrics() {
"scoreDetails": false,
"vectorStore": false,
"metrics": true,
"exportPuffinReports": false
"exportPuffinReports": false,
"proximityPrecision": false
}
"###);
@ -146,7 +152,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`, `proximityPrecision`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value};
use crate::json;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 1,
@ -107,8 +107,8 @@ pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
])
});
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
static DOCUMENT_PRIMARY_KEY: &str = "id";
static DOCUMENT_DISTINCT_KEY: &str = "product_id";
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
#[actix_rt::test]

View File

@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value};
use crate::json;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",

View File

@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value};
use crate::json;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 1,

View File

@ -15,7 +15,7 @@ use once_cell::sync::Lazy;
use crate::common::{Server, Value};
use crate::json;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
@ -40,7 +40,7 @@ pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
])
});
pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 852,

View File

@ -54,7 +54,7 @@ async fn get_settings() {
let (response, code) = index.settings().await;
assert_eq!(code, 200);
let settings = response.as_object().unwrap();
assert_eq!(settings.keys().len(), 14);
assert_eq!(settings.keys().len(), 15);
assert_eq!(settings["displayedAttributes"], json!(["*"]));
assert_eq!(settings["searchableAttributes"], json!(["*"]));
assert_eq!(settings["filterableAttributes"], json!([]));

View File

@ -1,4 +1,5 @@
mod distinct;
mod errors;
mod get_settings;
mod proximity_settings;
mod tokenizer_customization;

View File

@ -0,0 +1,396 @@
use meili_snap::{json_string, snapshot};
use once_cell::sync::Lazy;
use crate::common::Server;
use crate::json;
static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
json!([
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish",
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish",
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish",
},
])
});
#[actix_rt::test]
async fn attribute_scale_search() {
let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await;
index.wait_task(0).await;
let (response, code) = index
.update_settings(json!({
"proximityPrecision": "attributeScale",
"rankingRules": ["words", "typo", "proximity"],
}))
.await;
assert_eq!("202", code.as_str(), "{:?}", response);
index.wait_task(1).await;
// the expected order is [1, 3, 2] instead of [3, 1, 2]
// because the attribute scale doesn't make the difference between 1 and 3.
index
.search(json!({"q": "the soup of day"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish"
}
]
"###);
})
.await;
// the expected order is [1, 2, 3] instead of [1, 3, 2]
// because the attribute scale sees all the word in the same attribute
// and so doesn't make the difference between the documents.
index
.search(json!({"q": "many the fish"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish"
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
}
]
"###);
})
.await;
}
#[actix_rt::test]
async fn attribute_scale_phrase_search() {
let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await;
index.wait_task(0).await;
let (_response, _code) = index
.update_settings(json!({
"proximityPrecision": "attributeScale",
"rankingRules": ["words", "typo", "proximity"],
}))
.await;
index.wait_task(1).await;
// the expected order is [1, 3] instead of [3, 1]
// because the attribute scale doesn't make the difference between 1 and 3.
// But 2 shouldn't be returned because "the" is not in the same attribute.
index
.search(json!({"q": "\"the soup of day\""}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
}
]
"###);
})
.await;
// the expected order is [1, 2, 3] instead of [1, 3]
// because the attribute scale sees all the word in the same attribute
// and so doesn't make the difference between the documents.
index
.search(json!({"q": "\"many the fish\""}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish"
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
}
]
"###);
})
.await;
}
#[actix_rt::test]
async fn word_scale_set_and_reset() {
let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await;
index.wait_task(0).await;
// Set and reset the setting ensuring the swap between the 2 settings is applied.
let (_response, _code) = index
.update_settings(json!({
"proximityPrecision": "attributeScale",
"rankingRules": ["words", "typo", "proximity"],
}))
.await;
index.wait_task(1).await;
let (_response, _code) = index
.update_settings(json!({
"proximityPrecision": "wordScale",
"rankingRules": ["words", "typo", "proximity"],
}))
.await;
index.wait_task(2).await;
// [3, 1, 2]
index
.search(json!({"q": "the soup of day"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
},
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish"
}
]
"###);
})
.await;
// [1, 3, 2]
index
.search(json!({"q": "many the fish"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish"
}
]
"###);
})
.await;
// [3]
index
.search(json!({"q": "\"the soup of day\""}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
}
]
"###);
})
.await;
// [1, 3]
index
.search(json!({"q": "\"many the fish\""}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
}
]
"###);
})
.await;
}
#[actix_rt::test]
async fn attribute_scale_default_ranking_rules() {
let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await;
index.wait_task(0).await;
let (response, code) = index
.update_settings(json!({
"proximityPrecision": "attributeScale"
}))
.await;
assert_eq!("202", code.as_str(), "{:?}", response);
index.wait_task(1).await;
// the expected order is [3, 1, 2]
index
.search(json!({"q": "the soup of day"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
},
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish"
}
]
"###);
})
.await;
// the expected order is [1, 3, 2] instead of [1, 3]
// because the attribute scale sees all the word in the same attribute
// and so doesn't remove the document 2.
index
.search(json!({"q": "\"many the fish\""}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"a": "Soup of the day",
"b": "many the fish"
},
{
"id": 3,
"a": "the Soup of day",
"b": "many the fish"
},
{
"id": 2,
"a": "Soup of day",
"b": "many the lazy fish"
}
]
"###);
})
.await;
}

View File

@ -7,8 +7,8 @@ use clap::{Parser, Subcommand};
use dump::{DumpWriter, IndexMetadata};
use file_store::FileStore;
use meilisearch_auth::AuthController;
use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::{obkv_to_json, BEU32};
use meilisearch_types::tasks::{Status, Task};
@ -148,15 +148,17 @@ fn try_opening_poly_database(
env: &Env,
rtxn: &RoTxn,
db_name: &str,
) -> anyhow::Result<PolyDatabase> {
env.open_poly_database(rtxn, Some(db_name))
) -> anyhow::Result<Database<Unspecified, Unspecified>> {
env.database_options()
.name(db_name)
.open(rtxn)
.with_context(|| format!("While opening the {db_name:?} poly database"))?
.with_context(|| format!("Missing the {db_name:?} poly database"))
}
fn try_clearing_poly_database(
wtxn: &mut RwTxn,
database: PolyDatabase,
database: Database<Unspecified, Unspecified>,
db_name: &str,
) -> anyhow::Result<()> {
database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
@ -212,7 +214,7 @@ fn export_a_dump(
eprintln!("Successfully dumped {count} keys!");
let rtxn = env.read_txn()?;
let all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>> =
let all_tasks: Database<BEU32, SerdeJson<Task>> =
try_opening_database(&env, &rtxn, "all-tasks")?;
let index_mapping: Database<Str, UuidCodec> =
try_opening_database(&env, &rtxn, "index-mapping")?;

View File

@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::convert::TryInto;
use meilisearch_types::heed::{BytesDecode, BytesEncode};
use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
use uuid::Uuid;
/// A heed codec for value of struct Uuid.
@ -10,15 +10,15 @@ pub struct UuidCodec;
impl<'a> BytesDecode<'a> for UuidCodec {
type DItem = Uuid;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
bytes.try_into().ok().map(Uuid::from_bytes)
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
}
}
impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
Some(Cow::Borrowed(item.as_bytes()))
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
}
}

View File

@ -20,7 +20,7 @@ byteorder = "1.4.3"
charabia = { version = "0.8.5", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.8"
deserr = { version = "0.6.0", features = ["actix-web"]}
deserr = "0.6.0"
either = { version = "1.8.1", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
@ -29,8 +29,8 @@ geoutils = "0.5.1"
grenad = { version = "0.4.5", default-features = false, features = [
"rayon", "tempfile"
] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
"lmdb", "read-txn-no-tls"
heed = { version = "0.20.0-alpha.9", default-features = false, features = [
"serde-json", "serde-bincode", "read-txn-no-tls"
] }
indexmap = { version = "2.0.0", features = ["serde"] }
instant-distance = { version = "0.6.1", features = ["with-serde"] }

View File

@ -152,7 +152,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
valid_fields: BTreeSet<String>,
hidden_fields: bool,
},
#[error("{}", HeedError::BadOpenOptions)]
#[error("an environment is already opened with different options")]
InvalidLmdbOpenOptions,
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
SortRankingRuleMissing,
@ -326,11 +326,12 @@ impl From<HeedError> for Error {
HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached),
HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile),
HeedError::Mdb(error) => InternalError(Store(error)),
HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })),
HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
// TODO use the encoding
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
HeedError::DatabaseClosing => InternalError(DatabaseClosing),
HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions),
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
}
}
}

View File

@ -1,6 +1,6 @@
use std::collections::HashMap;
use heed::types::{OwnedType, Str};
use heed::types::Str;
use heed::{Database, RoIter, RoTxn, RwTxn};
use crate::{DocumentId, BEU32};
@ -16,10 +16,10 @@ pub struct DocumentOperation {
pub kind: DocumentOperationKind,
}
pub struct ExternalDocumentsIds(Database<Str, OwnedType<BEU32>>);
pub struct ExternalDocumentsIds(Database<Str, BEU32>);
impl ExternalDocumentsIds {
pub fn new(db: Database<Str, OwnedType<BEU32>>) -> ExternalDocumentsIds {
pub fn new(db: Database<Str, BEU32>) -> ExternalDocumentsIds {
ExternalDocumentsIds(db)
}
@ -29,7 +29,7 @@ impl ExternalDocumentsIds {
}
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get()))
self.0.get(rtxn, external_id.as_ref())
}
/// An helper function to debug this type, returns an `HashMap` of both,
@ -38,7 +38,7 @@ impl ExternalDocumentsIds {
let mut map = HashMap::default();
for result in self.0.iter(rtxn)? {
let (external, internal) = result?;
map.insert(external.to_owned(), internal.get());
map.insert(external.to_owned(), internal);
}
Ok(map)
}
@ -55,7 +55,7 @@ impl ExternalDocumentsIds {
for DocumentOperation { external_id, internal_id, kind } in operations {
match kind {
DocumentOperationKind::Create => {
self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?;
self.0.put(wtxn, &external_id, &internal_id)?;
}
DocumentOperationKind::Delete => {
if !self.0.delete(wtxn, &external_id)? {
@ -69,7 +69,7 @@ impl ExternalDocumentsIds {
}
/// Returns an iterator over all the external ids.
pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, OwnedType<BEU32>>> {
pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> {
self.0.iter(rtxn)
}
}

View File

@ -2,26 +2,28 @@ use std::borrow::Cow;
use std::convert::TryInto;
use std::str;
use heed::BoxedError;
pub struct BEU16StrCodec;
impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
type DItem = (u16, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n_bytes, str_bytes) = bytes.split_at(2);
let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?;
let s = str::from_utf8(str_bytes).ok()?;
Some((n, s))
let n = n_bytes.try_into().map(u16::from_be_bytes)?;
let s = str::from_utf8(str_bytes)?;
Ok((n, s))
}
}
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
type EItem = (u16, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 2);
bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes());
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View File

@ -2,26 +2,28 @@ use std::borrow::Cow;
use std::convert::TryInto;
use std::str;
use heed::BoxedError;
pub struct BEU32StrCodec;
impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
type DItem = (u32, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n_bytes, str_bytes) = bytes.split_at(4);
let n = n_bytes.try_into().map(u32::from_be_bytes).ok()?;
let s = str::from_utf8(str_bytes).ok()?;
Some((n, s))
let n = n_bytes.try_into().map(u32::from_be_bytes)?;
let s = str::from_utf8(str_bytes)?;
Ok((n, s))
}
}
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
type EItem = (u32, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 4);
bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes());
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View File

@ -1,23 +1,23 @@
use std::borrow::Cow;
use heed::{BytesDecode, BytesEncode};
use heed::{BoxedError, BytesDecode, BytesEncode};
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
/// A codec for values of type `&[u8]`. Unlike `Bytes`, its `EItem` and `DItem` associated
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
pub struct ByteSliceRefCodec;
pub struct BytesRefCodec;
impl<'a> BytesEncode<'a> for ByteSliceRefCodec {
impl<'a> BytesEncode<'a> for BytesRefCodec {
type EItem = &'a [u8];
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item))
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
Ok(Cow::Borrowed(item))
}
}
impl<'a> BytesDecode<'a> for ByteSliceRefCodec {
impl<'a> BytesDecode<'a> for BytesRefCodec {
type DItem = &'a [u8];
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(bytes)
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(bytes)
}
}

View File

@ -1,8 +1,9 @@
use std::borrow::Cow;
use std::marker::PhantomData;
use heed::{BytesDecode, BytesEncode};
use heed::{BoxedError, BytesDecode, BytesEncode};
use crate::heed_codec::SliceTooShortError;
use crate::{try_split_array_at, DocumentId, FieldId};
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
@ -13,16 +14,16 @@ where
{
type DItem = (FieldId, DocumentId, C::DItem);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let field_id = u16::from_be_bytes(field_id_bytes);
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
let (document_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let document_id = u32::from_be_bytes(document_id_bytes);
let value = C::bytes_decode(bytes)?;
Some((field_id, document_id, value))
Ok((field_id, document_id, value))
}
}
@ -32,13 +33,15 @@ where
{
type EItem = (FieldId, DocumentId, C::EItem);
fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode(
(field_id, document_id, value): &'a Self::EItem,
) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(32);
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes
let value_bytes = C::bytes_encode(value)?;
// variable length, if f64 -> 16 bytes, if string -> large, potentially
bytes.extend_from_slice(&value_bytes);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View File

@ -5,8 +5,8 @@ use std::borrow::Cow;
use std::convert::TryFrom;
use std::marker::PhantomData;
use heed::types::{DecodeIgnore, OwnedType};
use heed::{BytesDecode, BytesEncode};
use heed::types::DecodeIgnore;
use heed::{BoxedError, BytesDecode, BytesEncode};
use roaring::RoaringBitmap;
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
@ -18,7 +18,7 @@ pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
pub type FieldIdCodec = OwnedType<BEU16>;
pub type FieldIdCodec = BEU16;
/// Tries to split a slice in half at the given middle point,
/// `None` if the slice is too short.
@ -58,7 +58,7 @@ where
{
type EItem = FacetGroupKey<T::EItem>;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut v = vec![];
v.extend_from_slice(&value.field_id.to_be_bytes());
v.extend_from_slice(&[value.level]);
@ -66,7 +66,7 @@ where
let bound = T::bytes_encode(&value.left_bound)?;
v.extend_from_slice(&bound);
Some(Cow::Owned(v))
Ok(Cow::Owned(v))
}
}
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
@ -75,11 +75,11 @@ where
{
type DItem = FacetGroupKey<T::DItem>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?);
let level = bytes[2];
let bound = T::bytes_decode(&bytes[3..])?;
Some(FacetGroupKey { field_id: fid, level, left_bound: bound })
Ok(FacetGroupKey { field_id: fid, level, left_bound: bound })
}
}
@ -87,17 +87,17 @@ pub struct FacetGroupValueCodec;
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
type EItem = FacetGroupValue;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut v = vec![value.size];
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
Some(Cow::Owned(v))
Ok(Cow::Owned(v))
}
}
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
type DItem = FacetGroupValue;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
Some(FacetGroupValue { size, bitmap })
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
Ok(FacetGroupValue { size, bitmap })
}
}

View File

@ -1,37 +1,45 @@
use std::borrow::Cow;
use std::convert::TryInto;
use heed::BytesDecode;
use heed::{BoxedError, BytesDecode};
use thiserror::Error;
use crate::facet::value_encoding::f64_into_bytes;
use crate::heed_codec::SliceTooShortError;
pub struct OrderedF64Codec;
impl<'a> BytesDecode<'a> for OrderedF64Codec {
type DItem = f64;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
if bytes.len() < 16 {
return None;
Err(SliceTooShortError.into())
} else {
bytes[8..].try_into().map(f64::from_be_bytes).map_err(Into::into)
}
let f = bytes[8..].try_into().ok().map(f64::from_be_bytes)?;
Some(f)
}
}
impl heed::BytesEncode<'_> for OrderedF64Codec {
type EItem = f64;
fn bytes_encode(f: &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut buffer = [0u8; 16];
// write the globally ordered float
let bytes = f64_into_bytes(*f)?;
let bytes = f64_into_bytes(*f).ok_or(InvalidGloballyOrderedFloatError { float: *f })?;
buffer[..8].copy_from_slice(&bytes[..]);
// Then the f64 value just to be able to read it back
let bytes = f.to_be_bytes();
buffer[8..16].copy_from_slice(&bytes[..]);
Some(Cow::Owned(buffer.to_vec()))
Ok(Cow::Owned(buffer.to_vec()))
}
}
#[derive(Error, Debug)]
#[error("the float {float} cannot be converted to a globally ordered representation")]
pub struct InvalidGloballyOrderedFloatError {
float: f64,
}

View File

@ -1,5 +1,8 @@
use std::borrow::Cow;
use heed::BoxedError;
use super::SliceTooShortError;
use crate::{try_split_array_at, FieldId};
pub struct FieldIdWordCountCodec;
@ -7,21 +10,21 @@ pub struct FieldIdWordCountCodec;
impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
type DItem = (FieldId, u8);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
let field_id = u16::from_be_bytes(field_id_bytes);
let ([word_count], _nothing) = try_split_array_at(bytes)?;
Some((field_id, word_count))
let ([word_count], _nothing) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
Ok((field_id, word_count))
}
}
impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
type EItem = (FieldId, u8);
fn bytes_encode((field_id, word_count): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(2 + 1);
bytes.extend_from_slice(&field_id.to_be_bytes());
bytes.push(*word_count);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View File

@ -1,7 +1,7 @@
use std::borrow::Cow;
use fst::Set;
use heed::{BytesDecode, BytesEncode};
use heed::{BoxedError, BytesDecode, BytesEncode};
/// A codec for values of type `Set<&[u8]>`.
pub struct FstSetCodec;
@ -9,15 +9,15 @@ pub struct FstSetCodec;
impl<'a> BytesEncode<'a> for FstSetCodec {
type EItem = Set<Vec<u8>>;
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item.as_fst().as_bytes()))
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_fst().as_bytes()))
}
}
impl<'a> BytesDecode<'a> for FstSetCodec {
type DItem = Set<&'a [u8]>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Set::new(bytes).ok()
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Set::new(bytes).map_err(Into::into)
}
}

View File

@ -12,8 +12,10 @@ mod str_beu32_codec;
mod str_ref;
mod str_str_u8_codec;
pub use byte_slice_ref::ByteSliceRefCodec;
pub use byte_slice_ref::BytesRefCodec;
use heed::BoxedError;
pub use str_ref::StrRefCodec;
use thiserror::Error;
pub use self::beu16_str_codec::BEU16StrCodec;
pub use self::beu32_str_codec::BEU32StrCodec;
@ -31,5 +33,9 @@ pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
pub trait BytesDecodeOwned {
type DItem;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>;
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError>;
}
#[derive(Error, Debug)]
#[error("the slice is too short")]
pub struct SliceTooShortError;

View File

@ -1,5 +1,6 @@
use std::borrow::Cow;
use heed::BoxedError;
use obkv::{KvReaderU16, KvWriterU16};
pub struct ObkvCodec;
@ -7,15 +8,15 @@ pub struct ObkvCodec;
impl<'a> heed::BytesDecode<'a> for ObkvCodec {
type DItem = KvReaderU16<'a>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(KvReaderU16::new(bytes))
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(KvReaderU16::new(bytes))
}
}
impl heed::BytesEncode<'_> for ObkvCodec {
type EItem = KvWriterU16<Vec<u8>>;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
item.clone().into_inner().map(Cow::Owned).ok()
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
item.clone().into_inner().map(Cow::Owned).map_err(Into::into)
}
}

View File

@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::convert::TryInto;
use std::mem::size_of;
use heed::BytesDecode;
use heed::{BoxedError, BytesDecode};
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
@ -19,22 +19,22 @@ impl BoRoaringBitmapCodec {
impl BytesDecode<'_> for BoRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
let mut bitmap = RoaringBitmap::new();
for chunk in bytes.chunks(size_of::<u32>()) {
let bytes = chunk.try_into().ok()?;
let bytes = chunk.try_into()?;
bitmap.push(u32::from_ne_bytes(bytes));
}
Some(bitmap)
Ok(bitmap)
}
}
impl BytesDecodeOwned for BoRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes)
}
}
@ -42,9 +42,9 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec {
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut out = Vec::new();
BoRoaringBitmapCodec::serialize_into(item, &mut out);
Some(Cow::Owned(out))
Ok(Cow::Owned(out))
}
}

View File

@ -3,6 +3,7 @@ use std::io;
use std::mem::size_of;
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
use heed::BoxedError;
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
@ -132,26 +133,26 @@ impl CboRoaringBitmapCodec {
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
Self::deserialize_from(bytes).ok()
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).map_err(Into::into)
}
}
impl BytesDecodeOwned for CboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
Self::deserialize_from(bytes).ok()
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).map_err(Into::into)
}
}
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut vec = Vec::with_capacity(Self::serialized_size(item));
Self::serialize_into(item, &mut vec);
Some(Cow::Owned(vec))
Ok(Cow::Owned(vec))
}
}

View File

@ -1,5 +1,6 @@
use std::borrow::Cow;
use heed::BoxedError;
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
@ -9,25 +10,25 @@ pub struct RoaringBitmapCodec;
impl heed::BytesDecode<'_> for RoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmap::deserialize_unchecked_from(bytes).ok()
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmap::deserialize_unchecked_from(bytes).map_err(Into::into)
}
}
impl BytesDecodeOwned for RoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmap::deserialize_from(bytes).ok()
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmap::deserialize_from(bytes).map_err(Into::into)
}
}
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(item.serialized_size());
item.serialize_into(&mut bytes).ok()?;
Some(Cow::Owned(bytes))
item.serialize_into(&mut bytes)?;
Ok(Cow::Owned(bytes))
}
}

View File

@ -1,6 +1,6 @@
use std::mem;
use heed::BytesDecode;
use heed::{BoxedError, BytesDecode};
use crate::heed_codec::BytesDecodeOwned;
@ -9,15 +9,15 @@ pub struct BoRoaringBitmapLenCodec;
impl BytesDecode<'_> for BoRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
Some((bytes.len() / mem::size_of::<u32>()) as u64)
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Ok((bytes.len() / mem::size_of::<u32>()) as u64)
}
}
impl BytesDecodeOwned for BoRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes)
}
}

View File

@ -1,6 +1,6 @@
use std::mem;
use heed::BytesDecode;
use heed::{BoxedError, BytesDecode};
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
@ -11,7 +11,7 @@ pub struct CboRoaringBitmapLenCodec;
impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
if bytes.len() <= THRESHOLD * mem::size_of::<u32>() {
// If there is threshold or less than threshold integers that can fit into this array
// of bytes it means that we used the ByteOrder codec serializer.
@ -27,7 +27,7 @@ impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
impl BytesDecodeOwned for CboRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes)
}
}

View File

@ -2,6 +2,7 @@ use std::io::{self, BufRead, Read};
use std::mem;
use byteorder::{LittleEndian, ReadBytesExt};
use heed::BoxedError;
use crate::heed_codec::BytesDecodeOwned;
@ -56,16 +57,16 @@ impl RoaringBitmapLenCodec {
impl heed::BytesDecode<'_> for RoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
}
}
impl BytesDecodeOwned for RoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
}
}

View File

@ -1,30 +1,31 @@
use std::borrow::Cow;
use std::ffi::CStr;
use std::str;
use charabia::{Language, Script};
use heed::BoxedError;
pub struct ScriptLanguageCodec;
impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
type DItem = (Script, Language);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let sep = bytes.iter().position(|b| *b == 0)?;
let (s_bytes, l_bytes) = bytes.split_at(sep);
let script = str::from_utf8(s_bytes).ok()?;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let cstr = CStr::from_bytes_until_nul(bytes)?;
let script = cstr.to_str()?;
let script_name = Script::from_name(script);
let lan = str::from_utf8(l_bytes).ok()?;
// skip '\0' byte between the two strings.
let lan_name = Language::from_name(&lan[1..]);
let lan = str::from_utf8(&bytes[script.len() + 1..])?;
let lan_name = Language::from_name(lan);
Some((script_name, lan_name))
Ok((script_name, lan_name))
}
}
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
type EItem = (Script, Language);
fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let script_name = script.name().as_bytes();
let lan_name = lan.name().as_bytes();
@ -33,6 +34,6 @@ impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
bytes.push(0);
bytes.extend_from_slice(lan_name);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View File

@ -3,37 +3,41 @@ use std::convert::TryInto;
use std::mem::size_of;
use std::str;
use heed::BoxedError;
use super::SliceTooShortError;
pub struct StrBEU32Codec;
impl<'a> heed::BytesDecode<'a> for StrBEU32Codec {
type DItem = (&'a str, u32);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let footer_len = size_of::<u32>();
if bytes.len() < footer_len {
return None;
return Err(SliceTooShortError.into());
}
let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
let word = str::from_utf8(word).ok()?;
let pos = bytes.try_into().map(u32::from_be_bytes).ok()?;
let word = str::from_utf8(word)?;
let pos = bytes.try_into().map(u32::from_be_bytes)?;
Some((word, pos))
Ok((word, pos))
}
}
impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
type EItem = (&'a str, u32);
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + pos.len());
bytes.extend_from_slice(word.as_bytes());
bytes.extend_from_slice(&pos[..]);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}
@ -42,26 +46,27 @@ pub struct StrBEU16Codec;
impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
type DItem = (&'a str, u16);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let footer_len = size_of::<u16>();
if bytes.len() < footer_len + 1 {
return None;
return Err(SliceTooShortError.into());
}
let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len);
let (_, word) = word_plus_nul_byte.split_last()?;
let word = str::from_utf8(word).ok()?;
let pos = bytes.try_into().map(u16::from_be_bytes).ok()?;
// unwrap: we just checked the footer + 1 above.
let (_, word) = word_plus_nul_byte.split_last().unwrap();
let word = str::from_utf8(word)?;
let pos = bytes.try_into().map(u16::from_be_bytes)?;
Some((word, pos))
Ok((word, pos))
}
}
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
type EItem = (&'a str, u16);
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
@ -69,6 +74,6 @@ impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
bytes.push(0);
bytes.extend_from_slice(&pos[..]);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View File

@ -1,6 +1,6 @@
use std::borrow::Cow;
use heed::{BytesDecode, BytesEncode};
use heed::{BoxedError, BytesDecode, BytesEncode};
/// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated
/// types are equivalent (= `&'a str`) and these values can reside within another structure.
@ -8,15 +8,14 @@ pub struct StrRefCodec;
impl<'a> BytesEncode<'a> for StrRefCodec {
type EItem = &'a str;
fn bytes_encode(item: &'a &'a str) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item.as_bytes()))
fn bytes_encode(item: &'a &'a str) -> Result<Cow<'a, [u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
}
}
impl<'a> BytesDecode<'a> for StrRefCodec {
type DItem = &'a str;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let s = std::str::from_utf8(bytes).ok()?;
Some(s)
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
std::str::from_utf8(bytes).map_err(Into::into)
}
}

View File

@ -1,32 +1,36 @@
use std::borrow::Cow;
use std::ffi::CStr;
use std::str;
use heed::BoxedError;
use super::SliceTooShortError;
pub struct U8StrStrCodec;
impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
type DItem = (u8, &'a str, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (n, bytes) = bytes.split_first()?;
let s1_end = bytes.iter().position(|b| *b == 0)?;
let (s1_bytes, rest) = bytes.split_at(s1_end);
let s2_bytes = &rest[1..];
let s1 = str::from_utf8(s1_bytes).ok()?;
let s2 = str::from_utf8(s2_bytes).ok()?;
Some((*n, s1, s2))
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?;
let cstr = CStr::from_bytes_until_nul(bytes)?;
let s1 = cstr.to_str()?;
// skip '\0' byte between the two strings.
let s2 = str::from_utf8(&bytes[s1.len() + 1..])?;
Ok((*n, s1, s2))
}
}
impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
type EItem = (u8, &'a str, &'a str);
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1.as_bytes());
bytes.push(0);
bytes.extend_from_slice(s2.as_bytes());
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}
pub struct UncheckedU8StrStrCodec;
@ -34,24 +38,25 @@ pub struct UncheckedU8StrStrCodec;
impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
type DItem = (u8, &'a [u8], &'a [u8]);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (n, bytes) = bytes.split_first()?;
let s1_end = bytes.iter().position(|b| *b == 0)?;
let (s1_bytes, rest) = bytes.split_at(s1_end);
let s2_bytes = &rest[1..];
Some((*n, s1_bytes, s2_bytes))
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?;
let cstr = CStr::from_bytes_until_nul(bytes)?;
let s1_bytes = cstr.to_bytes();
// skip '\0' byte between the two strings.
let s2_bytes = &bytes[s1_bytes.len() + 1..];
Ok((*n, s1_bytes, s2_bytes))
}
}
impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
type EItem = (u8, &'a [u8], &'a [u8]);
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1);
bytes.push(0);
bytes.extend_from_slice(s2);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View File

@ -4,9 +4,8 @@ use std::fs::File;
use std::path::Path;
use charabia::{Language, Script};
use heed::flags::Flags;
use heed::types::*;
use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
use roaring::RoaringBitmap;
use rstar::RTree;
use time::OffsetDateTime;
@ -22,12 +21,13 @@ use crate::heed_codec::facet::{
use crate::heed_codec::{
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
};
use crate::proximity::ProximityPrecision;
use crate::readable_slices::ReadableSlices;
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
BEU32,
BEU32, BEU64,
};
/// The HNSW data-structure that we serialize, fill and search in.
@ -73,6 +73,7 @@ pub mod main_key {
pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet";
pub const SORT_FACET_VALUES_BY: &str = "sort-facet-values-by";
pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
pub const PROXIMITY_PRECISION: &str = "proximity-precision";
}
pub mod db_name {
@ -109,10 +110,10 @@ pub struct Index {
pub(crate) env: heed::Env,
/// Contains many different types (e.g. the fields ids map).
pub(crate) main: PolyDatabase,
pub(crate) main: Database<Unspecified, Unspecified>,
/// Maps the external documents ids with the internal document id.
pub external_documents_ids: Database<Str, OwnedType<BEU32>>,
pub external_documents_ids: Database<Str, BEU32>,
/// A word and all the documents ids containing the word.
pub word_docids: Database<Str, CboRoaringBitmapCodec>,
@ -158,7 +159,7 @@ pub struct Index {
/// Maps the facet field id of the normalized-for-search string facets with their original versions.
pub facet_id_normalized_string_strings: Database<BEU16StrCodec, SerdeJson<BTreeSet<String>>>,
/// Maps the facet field id of the string facets with an FST containing all the facets values.
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
pub facet_id_string_fst: Database<BEU16, FstSetCodec>,
/// Maps the document id, the facet field id and the numbers.
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
@ -166,10 +167,10 @@ pub struct Index {
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
/// Maps a vector id to the document id that have it.
pub vector_id_docid: Database<OwnedType<BEU32>, OwnedType<BEU32>>,
pub vector_id_docid: Database<BEU32, BEU32>,
/// Maps the document id to the document as an obkv store.
pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>,
pub(crate) documents: Database<BEU32, ObkvCodec>,
}
impl Index {
@ -182,11 +183,10 @@ impl Index {
use db_name::*;
options.max_dbs(24);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?;
let mut wtxn = env.write_txn()?;
let main = env.create_poly_database(&mut wtxn, Some(MAIN))?;
let main = env.database_options().name(MAIN).create(&mut wtxn)?;
let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
let external_documents_ids =
env.create_database(&mut wtxn, Some(EXTERNAL_DOCUMENTS_IDS))?;
@ -264,24 +264,16 @@ impl Index {
fn set_creation_dates(
env: &heed::Env,
main: PolyDatabase,
main: Database<Unspecified, Unspecified>,
created_at: OffsetDateTime,
updated_at: OffsetDateTime,
) -> heed::Result<()> {
let mut txn = env.write_txn()?;
// The db was just created, we update its metadata with the relevant information.
if main.get::<_, Str, SerdeJson<OffsetDateTime>>(&txn, main_key::CREATED_AT_KEY)?.is_none()
{
main.put::<_, Str, SerdeJson<OffsetDateTime>>(
&mut txn,
main_key::UPDATED_AT_KEY,
&updated_at,
)?;
main.put::<_, Str, SerdeJson<OffsetDateTime>>(
&mut txn,
main_key::CREATED_AT_KEY,
&created_at,
)?;
let main = main.remap_types::<Str, SerdeJson<OffsetDateTime>>();
if main.get(&txn, main_key::CREATED_AT_KEY)?.is_none() {
main.put(&mut txn, main_key::UPDATED_AT_KEY, &updated_at)?;
main.put(&mut txn, main_key::CREATED_AT_KEY, &created_at)?;
txn.commit()?;
}
Ok(())
@ -318,12 +310,12 @@ impl Index {
///
/// This value is the maximum between the map size passed during the opening of the index
/// and the on-disk size of the index at the time of opening.
pub fn map_size(&self) -> Result<usize> {
Ok(self.env.map_size()?)
pub fn map_size(&self) -> usize {
self.env.info().map_size
}
pub fn copy_to_path<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
self.env.copy_to_path(path, option).map_err(Into::into)
pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
self.env.copy_to_file(path, option).map_err(Into::into)
}
/// Returns an `EnvClosingEvent` that can be used to wait for the closing event,
@ -343,21 +335,28 @@ impl Index {
wtxn: &mut RwTxn,
docids: &RoaringBitmap,
) -> heed::Result<()> {
self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids)
self.main.remap_types::<Str, RoaringBitmapCodec>().put(
wtxn,
main_key::DOCUMENTS_IDS_KEY,
docids,
)
}
/// Returns the internal documents ids.
pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
Ok(self
.main
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?
.remap_types::<Str, RoaringBitmapCodec>()
.get(rtxn, main_key::DOCUMENTS_IDS_KEY)?
.unwrap_or_default())
}
/// Returns the number of documents indexed in the database.
pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> {
let count =
self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
let count = self
.main
.remap_types::<Str, RoaringBitmapLenCodec>()
.get(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
Ok(count.unwrap_or_default())
}
@ -366,17 +365,17 @@ impl Index {
/// Writes the documents primary key, this is the field name that is used to store the id.
pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
self.main.remap_types::<Str, Str>().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
}
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::PRIMARY_KEY_KEY)
}
/// Returns the documents primary key, `None` if it hasn't been defined.
pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
self.main.get::<_, Str, Str>(rtxn, main_key::PRIMARY_KEY_KEY)
self.main.remap_types::<Str, Str>().get(rtxn, main_key::PRIMARY_KEY_KEY)
}
/* external documents ids */
@ -396,7 +395,11 @@ impl Index {
wtxn: &mut RwTxn,
map: &FieldsIdsMap,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map)
self.main.remap_types::<Str, SerdeJson<FieldsIdsMap>>().put(
wtxn,
main_key::FIELDS_IDS_MAP_KEY,
map,
)
}
/// Returns the fields ids map which associate the documents keys with an internal field id
@ -404,7 +407,8 @@ impl Index {
pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result<FieldsIdsMap> {
Ok(self
.main
.get::<_, Str, SerdeJson<FieldsIdsMap>>(rtxn, main_key::FIELDS_IDS_MAP_KEY)?
.remap_types::<Str, SerdeJson<FieldsIdsMap>>()
.get(rtxn, main_key::FIELDS_IDS_MAP_KEY)?
.unwrap_or_default())
}
@ -416,19 +420,24 @@ impl Index {
wtxn: &mut RwTxn,
rtree: &RTree<GeoPoint>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<RTree<GeoPoint>>>(wtxn, main_key::GEO_RTREE_KEY, rtree)
self.main.remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>().put(
wtxn,
main_key::GEO_RTREE_KEY,
rtree,
)
}
/// Delete the `rtree` which associates coordinates to documents ids.
pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::GEO_RTREE_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_RTREE_KEY)
}
/// Returns the `rtree` which associates coordinates to documents ids.
pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result<Option<RTree<GeoPoint>>> {
match self
.main
.get::<_, Str, SerdeBincode<RTree<GeoPoint>>>(rtxn, main_key::GEO_RTREE_KEY)?
.remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>()
.get(rtxn, main_key::GEO_RTREE_KEY)?
{
Some(rtree) => Ok(Some(rtree)),
None => Ok(None),
@ -443,7 +452,7 @@ impl Index {
wtxn: &mut RwTxn,
docids: &RoaringBitmap,
) -> heed::Result<()> {
self.main.put::<_, Str, RoaringBitmapCodec>(
self.main.remap_types::<Str, RoaringBitmapCodec>().put(
wtxn,
main_key::GEO_FACETED_DOCUMENTS_IDS_KEY,
docids,
@ -452,14 +461,15 @@ impl Index {
/// Delete the documents ids that are faceted with a _geo field.
pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)
}
/// Retrieve all the documents ids that are faceted with a _geo field.
pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
match self
.main
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
.remap_types::<Str, RoaringBitmapCodec>()
.get(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)?
{
Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()),
@ -474,22 +484,22 @@ impl Index {
self.delete_vector_hnsw(wtxn)?;
let chunk_size = 1024 * 1024 * (1024 + 512); // 1.5 GiB
let bytes = bincode::serialize(hnsw).map_err(|_| heed::Error::Encoding)?;
let bytes = bincode::serialize(hnsw).map_err(Into::into).map_err(heed::Error::Encoding)?;
for (i, chunk) in bytes.chunks(chunk_size).enumerate() {
let i = i as u32;
let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec();
key.extend_from_slice(&i.to_be_bytes());
self.main.put::<_, ByteSlice, ByteSlice>(wtxn, &key, chunk)?;
self.main.remap_types::<Bytes, Bytes>().put(wtxn, &key, chunk)?;
}
Ok(())
}
/// Delete the `hnsw`.
pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
let mut iter = self.main.prefix_iter_mut::<_, ByteSlice, DecodeIgnore>(
wtxn,
main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes(),
)?;
let mut iter = self
.main
.remap_types::<Bytes, DecodeIgnore>()
.prefix_iter_mut(wtxn, main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes())?;
let mut deleted = false;
while iter.next().transpose()?.is_some() {
// We do not keep a reference to the key or the value.
@ -501,8 +511,10 @@ impl Index {
/// Returns the `hnsw`.
pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result<Option<Hnsw>> {
let mut slices = Vec::new();
for result in
self.main.prefix_iter::<_, Str, ByteSlice>(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)?
for result in self
.main
.remap_types::<Str, Bytes>()
.prefix_iter(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)?
{
let (_, slice) = result?;
slices.push(slice);
@ -512,7 +524,11 @@ impl Index {
Ok(None)
} else {
let readable_slices: ReadableSlices<_> = slices.into_iter().collect();
Ok(Some(bincode::deserialize_from(readable_slices).map_err(|_| heed::Error::Decoding)?))
Ok(Some(
bincode::deserialize_from(readable_slices)
.map_err(Into::into)
.map_err(heed::Error::Decoding)?,
))
}
}
@ -525,7 +541,7 @@ impl Index {
wtxn: &mut RwTxn,
distribution: &FieldDistribution,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<FieldDistribution>>(
self.main.remap_types::<Str, SerdeJson<FieldDistribution>>().put(
wtxn,
main_key::FIELD_DISTRIBUTION_KEY,
distribution,
@ -537,7 +553,8 @@ impl Index {
pub fn field_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldDistribution> {
Ok(self
.main
.get::<_, Str, SerdeJson<FieldDistribution>>(rtxn, main_key::FIELD_DISTRIBUTION_KEY)?
.remap_types::<Str, SerdeJson<FieldDistribution>>()
.get(rtxn, main_key::FIELD_DISTRIBUTION_KEY)?
.unwrap_or_default())
}
@ -550,7 +567,7 @@ impl Index {
wtxn: &mut RwTxn,
fields: &[&str],
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<&[&str]>>(
self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
wtxn,
main_key::DISPLAYED_FIELDS_KEY,
&fields,
@ -560,13 +577,15 @@ impl Index {
/// Deletes the displayed fields ids, this will make the engine to display
/// all the documents attributes in the order of the `FieldsIdsMap`.
pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISPLAYED_FIELDS_KEY)
}
/// Returns the displayed fields in the order they were set by the user. If it returns
/// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::DISPLAYED_FIELDS_KEY)
self.main
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
.get(rtxn, main_key::DISPLAYED_FIELDS_KEY)
}
/// Identical to `displayed_fields`, but returns the ids instead.
@ -646,7 +665,7 @@ impl Index {
/// Writes the searchable fields, when this list is specified, only these are indexed.
fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<&[&str]>>(
self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
wtxn,
main_key::SEARCHABLE_FIELDS_KEY,
&fields,
@ -655,13 +674,15 @@ impl Index {
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
}
/// Returns the searchable fields, those are the fields that are indexed,
/// if the searchable fields aren't there it means that **all** the fields are indexed.
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
self.main
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
}
/// Identical to `searchable_fields`, but returns the ids instead.
@ -687,7 +708,7 @@ impl Index {
wtxn: &mut RwTxn,
fields: &[&str],
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(
self.main.remap_types::<Str, SerdeBincode<_>>().put(
wtxn,
main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY,
&fields,
@ -699,7 +720,7 @@ impl Index {
&self,
wtxn: &mut RwTxn,
) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
}
/// Returns the user defined searchable fields.
@ -708,7 +729,8 @@ impl Index {
rtxn: &'t RoTxn,
) -> heed::Result<Option<Vec<&'t str>>> {
self.main
.get::<_, Str, SerdeBincode<Vec<_>>>(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
.remap_types::<Str, SerdeBincode<Vec<_>>>()
.get(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
}
/* filterable fields */
@ -719,19 +741,24 @@ impl Index {
wtxn: &mut RwTxn,
fields: &HashSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields)
self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::FILTERABLE_FIELDS_KEY,
fields,
)
}
/// Deletes the filterable fields ids in the database.
pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY)
}
/// Returns the filterable fields names.
pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self
.main
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::FILTERABLE_FIELDS_KEY)?
.remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::FILTERABLE_FIELDS_KEY)?
.unwrap_or_default())
}
@ -758,19 +785,24 @@ impl Index {
wtxn: &mut RwTxn,
fields: &HashSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::SORTABLE_FIELDS_KEY, fields)
self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::SORTABLE_FIELDS_KEY,
fields,
)
}
/// Deletes the sortable fields ids in the database.
pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SORTABLE_FIELDS_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SORTABLE_FIELDS_KEY)
}
/// Returns the sortable fields names.
pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self
.main
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::SORTABLE_FIELDS_KEY)?
.remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::SORTABLE_FIELDS_KEY)?
.unwrap_or_default())
}
@ -789,14 +821,19 @@ impl Index {
wtxn: &mut RwTxn,
fields: &HashSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::HIDDEN_FACETED_FIELDS_KEY, fields)
self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::HIDDEN_FACETED_FIELDS_KEY,
fields,
)
}
/// Returns the faceted fields names.
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self
.main
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
.remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
.unwrap_or_default())
}
@ -863,7 +900,7 @@ impl Index {
rtxn: &RoTxn,
field_id: FieldId,
) -> heed::Result<RoaringBitmap> {
match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? {
match self.facet_id_is_null_docids.get(rtxn, &field_id)? {
Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()),
}
@ -875,7 +912,7 @@ impl Index {
rtxn: &RoTxn,
field_id: FieldId,
) -> heed::Result<RoaringBitmap> {
match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? {
match self.facet_id_is_empty_docids.get(rtxn, &field_id)? {
Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()),
}
@ -887,7 +924,7 @@ impl Index {
rtxn: &RoTxn,
field_id: FieldId,
) -> heed::Result<RoaringBitmap> {
match self.facet_id_exists_docids.get(rtxn, &BEU16::new(field_id))? {
match self.facet_id_exists_docids.get(rtxn, &field_id)? {
Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()),
}
@ -900,15 +937,15 @@ impl Index {
wtxn: &mut RwTxn,
distinct_field: &str,
) -> heed::Result<()> {
self.main.put::<_, Str, Str>(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
self.main.remap_types::<Str, Str>().put(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
}
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
self.main.get::<_, Str, Str>(rtxn, main_key::DISTINCT_FIELD_KEY)
self.main.remap_types::<Str, Str>().get(rtxn, main_key::DISTINCT_FIELD_KEY)
}
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::DISTINCT_FIELD_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISTINCT_FIELD_KEY)
}
/* criteria */
@ -918,15 +955,23 @@ impl Index {
wtxn: &mut RwTxn,
criteria: &[Criterion],
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
self.main.remap_types::<Str, SerdeJson<&[Criterion]>>().put(
wtxn,
main_key::CRITERIA_KEY,
&criteria,
)
}
pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::CRITERIA_KEY)
}
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, main_key::CRITERIA_KEY)? {
match self
.main
.remap_types::<Str, SerdeJson<Vec<Criterion>>>()
.get(rtxn, main_key::CRITERIA_KEY)?
{
Some(criteria) => Ok(criteria),
None => Ok(default_criteria()),
}
@ -940,12 +985,16 @@ impl Index {
wtxn: &mut RwTxn,
fst: &fst::Set<A>,
) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes())
self.main.remap_types::<Str, Bytes>().put(
wtxn,
main_key::WORDS_FST_KEY,
fst.as_fst().as_bytes(),
)
}
/// Returns the FST which is the words dictionary of the engine.
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_FST_KEY)? {
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
}
@ -958,15 +1007,19 @@ impl Index {
wtxn: &mut RwTxn,
fst: &fst::Set<A>,
) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes())
self.main.remap_types::<Str, Bytes>().put(
wtxn,
main_key::STOP_WORDS_KEY,
fst.as_fst().as_bytes(),
)
}
pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::STOP_WORDS_KEY)
}
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::STOP_WORDS_KEY)? {
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::STOP_WORDS_KEY)? {
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
None => Ok(None),
}
@ -979,18 +1032,22 @@ impl Index {
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set)
self.main.remap_types::<Str, SerdeBincode<_>>().put(
wtxn,
main_key::NON_SEPARATOR_TOKENS_KEY,
set,
)
}
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
}
pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>(
rtxn,
main_key::NON_SEPARATOR_TOKENS_KEY,
)?)
Ok(self
.main
.remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
.get(rtxn, main_key::NON_SEPARATOR_TOKENS_KEY)?)
}
/* separator tokens */
@ -1000,17 +1057,22 @@ impl Index {
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set)
self.main.remap_types::<Str, SerdeBincode<_>>().put(
wtxn,
main_key::SEPARATOR_TOKENS_KEY,
set,
)
}
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEPARATOR_TOKENS_KEY)
}
pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self
.main
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
.remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
.get(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
}
/* separators easing method */
@ -1040,17 +1102,18 @@ impl Index {
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set)
self.main.remap_types::<Str, SerdeBincode<_>>().put(wtxn, main_key::DICTIONARY_KEY, set)
}
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::DICTIONARY_KEY)
}
pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self
.main
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?)
.remap_types::<Str, SerdeBincode<BTreeSet<String>>>()
.get(rtxn, main_key::DICTIONARY_KEY)?)
}
/* synonyms */
@ -1061,8 +1124,12 @@ impl Index {
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
user_defined_synonyms: &BTreeMap<String, Vec<String>>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?;
self.main.put::<_, Str, SerdeBincode<_>>(
self.main.remap_types::<Str, SerdeBincode<_>>().put(
wtxn,
main_key::SYNONYMS_KEY,
synonyms,
)?;
self.main.remap_types::<Str, SerdeBincode<_>>().put(
wtxn,
main_key::USER_DEFINED_SYNONYMS_KEY,
user_defined_synonyms,
@ -1070,8 +1137,8 @@ impl Index {
}
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?;
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SYNONYMS_KEY)?;
self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
}
pub fn user_defined_synonyms(
@ -1080,14 +1147,16 @@ impl Index {
) -> heed::Result<BTreeMap<String, Vec<String>>> {
Ok(self
.main
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
.remap_types::<Str, SerdeBincode<_>>()
.get(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
.unwrap_or_default())
}
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
Ok(self
.main
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::SYNONYMS_KEY)?
.remap_types::<Str, SerdeBincode<_>>()
.get(rtxn, main_key::SYNONYMS_KEY)?
.unwrap_or_default())
}
@ -1108,7 +1177,7 @@ impl Index {
wtxn: &mut RwTxn,
fst: &fst::Set<A>,
) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(
self.main.remap_types::<Str, Bytes>().put(
wtxn,
main_key::WORDS_PREFIXES_FST_KEY,
fst.as_fst().as_bytes(),
@ -1117,7 +1186,7 @@ impl Index {
/// Returns the FST which is the words prefixes dictionnary of the engine.
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
}
@ -1142,7 +1211,7 @@ impl Index {
Ok(ids.into_iter().map(move |id| {
let kv = self
.documents
.get(rtxn, &BEU32::new(id))?
.get(rtxn, &id)?
.ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
Ok((id, kv))
}))
@ -1207,7 +1276,8 @@ impl Index {
pub fn created_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> {
Ok(self
.main
.get::<_, Str, SerdeJson<OffsetDateTime>>(rtxn, main_key::CREATED_AT_KEY)?
.remap_types::<Str, SerdeJson<OffsetDateTime>>()
.get(rtxn, main_key::CREATED_AT_KEY)?
.ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::MAIN,
key: Some(main_key::CREATED_AT_KEY),
@ -1218,7 +1288,8 @@ impl Index {
pub fn updated_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> {
Ok(self
.main
.get::<_, Str, SerdeJson<OffsetDateTime>>(rtxn, main_key::UPDATED_AT_KEY)?
.remap_types::<Str, SerdeJson<OffsetDateTime>>()
.get(rtxn, main_key::UPDATED_AT_KEY)?
.ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::MAIN,
key: Some(main_key::UPDATED_AT_KEY),
@ -1230,14 +1301,18 @@ impl Index {
wtxn: &mut RwTxn,
time: &OffsetDateTime,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, time)
self.main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put(
wtxn,
main_key::UPDATED_AT_KEY,
time,
)
}
pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos.
match self.main.get::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS)? {
match self.main.remap_types::<Str, U8>().get(txn, main_key::AUTHORIZE_TYPOS)? {
Some(0) => Ok(false),
_ => Ok(true),
}
@ -1247,7 +1322,7 @@ impl Index {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos.
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?;
self.main.remap_types::<Str, U8>().put(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?;
Ok(())
}
@ -1258,7 +1333,8 @@ impl Index {
// because by default, we authorize typos.
Ok(self
.main
.get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)?
.remap_types::<Str, U8>()
.get(txn, main_key::ONE_TYPO_WORD_LEN)?
.unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO))
}
@ -1266,7 +1342,7 @@ impl Index {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos.
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
self.main.remap_types::<Str, U8>().put(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
Ok(())
}
@ -1276,7 +1352,8 @@ impl Index {
// because by default, we authorize typos.
Ok(self
.main
.get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)?
.remap_types::<Str, U8>()
.get(txn, main_key::TWO_TYPOS_WORD_LEN)?
.unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
}
@ -1284,13 +1361,13 @@ impl Index {
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
// identify 0 as being false, and anything else as true. The absence of a value is true,
// because by default, we authorize typos.
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
self.main.remap_types::<Str, U8>().put(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
Ok(())
}
/// List the words on which typo are not allowed
pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> {
match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? {
match self.main.remap_types::<Str, Bytes>().get(txn, main_key::EXACT_WORDS)? {
Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)),
None => Ok(None),
}
@ -1301,7 +1378,7 @@ impl Index {
txn: &mut RwTxn,
words: &fst::Set<A>,
) -> Result<()> {
self.main.put::<_, Str, ByteSlice>(
self.main.remap_types::<Str, Bytes>().put(
txn,
main_key::EXACT_WORDS,
words.as_fst().as_bytes(),
@ -1313,7 +1390,8 @@ impl Index {
pub fn exact_attributes<'t>(&self, txn: &'t RoTxn) -> Result<Vec<&'t str>> {
Ok(self
.main
.get::<_, Str, SerdeBincode<Vec<&str>>>(txn, main_key::EXACT_ATTRIBUTES)?
.remap_types::<Str, SerdeBincode<Vec<&str>>>()
.get(txn, main_key::EXACT_ATTRIBUTES)?
.unwrap_or_default())
}
@ -1326,34 +1404,36 @@ impl Index {
/// Writes the exact attributes to the database.
pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn, attrs: &[&str]) -> Result<()> {
self.main.put::<_, Str, SerdeBincode<&[&str]>>(txn, main_key::EXACT_ATTRIBUTES, &attrs)?;
self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put(
txn,
main_key::EXACT_ATTRIBUTES,
&attrs,
)?;
Ok(())
}
/// Clears the exact attributes from the store.
pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES)
self.main.remap_key_type::<Str>().delete(txn, main_key::EXACT_ATTRIBUTES)
}
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET)
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<u64>> {
self.main.remap_types::<Str, BEU64>().get(txn, main_key::MAX_VALUES_PER_FACET)
}
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> {
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET, &val)
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: u64) -> heed::Result<()> {
self.main.remap_types::<Str, BEU64>().put(txn, main_key::MAX_VALUES_PER_FACET, &val)
}
pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET)
self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET)
}
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> {
let mut orders = self
.main
.get::<_, Str, SerdeJson<HashMap<String, OrderBy>>>(
txn,
main_key::SORT_FACET_VALUES_BY,
)?
.remap_types::<Str, SerdeJson<HashMap<String, OrderBy>>>()
.get(txn, main_key::SORT_FACET_VALUES_BY)?
.unwrap_or_default();
// Insert the default ordering if it is not already overwritten by the user.
orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic);
@ -1365,27 +1445,49 @@ impl Index {
txn: &mut RwTxn,
val: &HashMap<String, OrderBy>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(txn, main_key::SORT_FACET_VALUES_BY, &val)
self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val)
}
pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(txn, main_key::SORT_FACET_VALUES_BY)
self.main.remap_key_type::<Str>().delete(txn, main_key::SORT_FACET_VALUES_BY)
}
pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<u64>> {
self.main.remap_types::<Str, BEU64>().get(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
}
pub(crate) fn put_pagination_max_total_hits(
&self,
txn: &mut RwTxn,
val: usize,
val: u64,
) -> heed::Result<()> {
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val)
self.main.remap_types::<Str, BEU64>().put(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val)
}
pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
self.main.remap_key_type::<Str>().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
}
pub fn proximity_precision(&self, txn: &RoTxn) -> heed::Result<Option<ProximityPrecision>> {
self.main
.remap_types::<Str, SerdeBincode<ProximityPrecision>>()
.get(txn, main_key::PROXIMITY_PRECISION)
}
pub(crate) fn put_proximity_precision(
&self,
txn: &mut RwTxn,
val: ProximityPrecision,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeBincode<ProximityPrecision>>().put(
txn,
main_key::PROXIMITY_PRECISION,
&val,
)
}
pub(crate) fn delete_proximity_precision(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
}
/* script language docids */
@ -1479,7 +1581,7 @@ pub(crate) mod tests {
}
pub fn add_documents_using_wtxn<'t, R>(
&'t self,
wtxn: &mut RwTxn<'t, '_>,
wtxn: &mut RwTxn<'t>,
documents: DocumentsBatchReader<R>,
) -> Result<(), crate::error::Error>
where
@ -1523,7 +1625,7 @@ pub(crate) mod tests {
}
pub fn update_settings_using_wtxn<'t>(
&'t self,
wtxn: &mut RwTxn<'t, '_>,
wtxn: &mut RwTxn<'t>,
update: impl Fn(&mut Settings),
) -> Result<(), crate::error::Error> {
let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config);
@ -1534,7 +1636,7 @@ pub(crate) mod tests {
pub fn delete_documents_using_wtxn<'t>(
&'t self,
wtxn: &mut RwTxn<'t, '_>,
wtxn: &mut RwTxn<'t>,
external_document_ids: Vec<String>,
) {
let builder = IndexDocuments::new(

View File

@ -66,9 +66,9 @@ pub use self::search::{
pub type Result<T> = std::result::Result<T, error::Error>;
pub type Attribute = u32;
pub type BEU16 = heed::zerocopy::U16<heed::byteorder::BE>;
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
pub type BEU16 = heed::types::U16<heed::byteorder::BE>;
pub type BEU32 = heed::types::U32<heed::byteorder::BE>;
pub type BEU64 = heed::types::U64<heed::byteorder::BE>;
pub type DocumentId = u32;
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;

View File

@ -1,5 +1,7 @@
use std::cmp;
use serde::{Deserialize, Serialize};
use crate::{relative_from_absolute_position, Position};
pub const MAX_DISTANCE: u32 = 4;
@ -25,3 +27,11 @@ pub fn positions_proximity(lhs: Position, rhs: Position) -> u32 {
pub fn path_proximity(path: &[Position]) -> u32 {
path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::<u32>()
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub enum ProximityPrecision {
#[default]
WordScale,
AttributeScale,
}

View File

@ -2,7 +2,7 @@ use std::collections::{BTreeMap, HashMap, HashSet};
use std::ops::ControlFlow;
use std::{fmt, mem};
use heed::types::ByteSlice;
use heed::types::Bytes;
use heed::BytesDecode;
use indexmap::IndexMap;
use roaring::RoaringBitmap;
@ -13,7 +13,7 @@ use crate::facet::FacetType;
use crate::heed_codec::facet::{
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
};
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::search::facet::facet_distribution_iter::{
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
};
@ -105,7 +105,7 @@ impl<'a> FacetDistribution<'a> {
key_buffer.truncate(mem::size_of::<FieldId>());
key_buffer.extend_from_slice(&docid.to_be_bytes());
let iter = db
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(self.rtxn, &key_buffer)?
.remap_key_type::<FieldDocIdFacetF64Codec>();
@ -129,7 +129,7 @@ impl<'a> FacetDistribution<'a> {
key_buffer.truncate(mem::size_of::<FieldId>());
key_buffer.extend_from_slice(&docid.to_be_bytes());
let iter = db
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(self.rtxn, &key_buffer)?
.remap_key_type::<FieldDocIdFacetStringCodec>();
@ -172,9 +172,7 @@ impl<'a> FacetDistribution<'a> {
search_function(
self.rtxn,
self.index
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id,
candidates,
|facet_key, nbr_docids, _| {
@ -203,9 +201,7 @@ impl<'a> FacetDistribution<'a> {
search_function(
self.rtxn,
self.index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id,
candidates,
|facet_key, nbr_docids, any_docid| {

View File

@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::DocumentId;
/// Call the given closure on the facet distribution of the candidate documents.
@ -23,7 +23,7 @@ use crate::DocumentId;
/// keep iterating over the different facet values or stop.
pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: &RoaringBitmap,
callback: CB,
@ -34,11 +34,11 @@ where
let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
let highest_level = get_highest_level(
rtxn,
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id,
)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
Ok(())
} else {
@ -48,7 +48,7 @@ where
pub fn count_iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: &RoaringBitmap,
mut callback: CB,
@ -77,11 +77,11 @@ where
let mut heap = BinaryHeap::new();
let highest_level = get_highest_level(
rtxn,
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id,
)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
// We first fill the heap with values from the highest level
let starting_key =
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
@ -146,7 +146,7 @@ where
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
callback: CB,
}

View File

@ -5,7 +5,7 @@ use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::Result;
/// Find all the document ids for which the given field contains a value contained within
@ -25,11 +25,11 @@ where
let inner;
let left = match left {
Bound::Included(left) => {
inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?;
inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
Bound::Included(inner.as_ref())
}
Bound::Excluded(left) => {
inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?;
inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
Bound::Excluded(inner.as_ref())
}
Bound::Unbounded => Bound::Unbounded,
@ -37,25 +37,22 @@ where
let inner;
let right = match right {
Bound::Included(right) => {
inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?;
inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
Bound::Included(inner.as_ref())
}
Bound::Excluded(right) => {
inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?;
inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
Bound::Excluded(inner.as_ref())
}
Bound::Unbounded => Bound::Unbounded,
};
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(starting_left_bound) =
get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?
{
let rightmost_bound = Bound::Included(
get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(),
); // will not fail because get_first_facet_value succeeded
if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
let rightmost_bound =
Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
let group_size = usize::MAX;
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
Ok(())
@ -67,7 +64,7 @@ where
/// Fetch the document ids that have a facet with a value between the two given bounds
struct FacetRangeSearch<'t, 'b, 'bitmap> {
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
left: Bound<&'b [u8]>,
right: Bound<&'b [u8]>,

View File

@ -5,7 +5,7 @@ use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
/// Return an iterator which iterates over the given candidate documents in
/// ascending order of their facet value for the given field id.
@ -31,12 +31,12 @@ use crate::heed_codec::ByteSliceRefCodec;
/// Note that once a document id is returned by the iterator, it is never returned again.
pub fn ascending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
@ -53,14 +53,12 @@ pub fn ascending_facet_sort<'t>(
struct AscendingFacetSort<'t, 'e> {
rtxn: &'t heed::RoTxn<'e>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
#[allow(clippy::type_complexity)]
stack: Vec<(
RoaringBitmap,
std::iter::Take<
heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
>,
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>>,
)>,
}

View File

@ -7,21 +7,21 @@ use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
///
/// This function does the same thing, but in the opposite order.
pub fn descending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
Ok(itertools::Either::Left(DescendingFacetSort {
@ -37,13 +37,13 @@ pub fn descending_facet_sort<'t>(
struct DescendingFacetSort<'t> {
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
#[allow(clippy::type_complexity)]
stack: Vec<(
RoaringBitmap,
std::iter::Take<
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
heed::RoRevRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
>,
Bound<&'t [u8]>,
)>,
@ -100,7 +100,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
*right_bound = Bound::Excluded(left_bound);
let iter = match self
.db
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
.rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow))
{
Ok(iter) => iter,
@ -123,7 +123,7 @@ mod tests {
use roaring::RoaringBitmap;
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::tests::{
@ -144,7 +144,7 @@ mod tests {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
@ -167,7 +167,7 @@ mod tests {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();

View File

@ -1,13 +1,13 @@
pub use facet_sort_ascending::ascending_facet_sort;
pub use facet_sort_descending::descending_facet_sort;
use heed::types::{ByteSlice, DecodeIgnore};
use heed::types::{Bytes, DecodeIgnore};
use heed::{BytesDecode, RoTxn};
use roaring::RoaringBitmap;
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::filter::{BadGeoError, Filter};
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::{Index, Result};
mod facet_distribution;
mod facet_distribution_iter;
@ -22,8 +22,10 @@ fn facet_extreme_value<'t>(
let extreme_value =
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
let (_, extreme_value) = extreme_value?;
Ok(OrderedF64Codec::bytes_decode(extreme_value))
OrderedF64Codec::bytes_decode(extreme_value)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into)
}
pub fn facet_min_value<'t>(
@ -32,7 +34,7 @@ pub fn facet_min_value<'t>(
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
@ -43,7 +45,7 @@ pub fn facet_max_value<'t>(
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
@ -51,7 +53,7 @@ pub fn facet_max_value<'t>(
/// Get the first facet value in the facet database
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
where
@ -60,13 +62,12 @@ where
let mut level0prefix = vec![];
level0prefix.extend_from_slice(&field_id.to_be_bytes());
level0prefix.push(0);
let mut level0_iter_forward = db
.as_polymorph()
.prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?;
let mut level0_iter_forward =
db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, level0prefix.as_slice())?;
if let Some(first) = level0_iter_forward.next() {
let (first_key, _) = first?;
let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
.ok_or(heed::Error::Encoding)?;
.map_err(heed::Error::Decoding)?;
Ok(Some(first_key.left_bound))
} else {
Ok(None)
@ -76,7 +77,7 @@ where
/// Get the last facet value in the facet database
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
where
@ -85,13 +86,12 @@ where
let mut level0prefix = vec![];
level0prefix.extend_from_slice(&field_id.to_be_bytes());
level0prefix.push(0);
let mut level0_iter_backward = db
.as_polymorph()
.rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?;
let mut level0_iter_backward =
db.remap_types::<Bytes, DecodeIgnore>().rev_prefix_iter(txn, level0prefix.as_slice())?;
if let Some(last) = level0_iter_backward.next() {
let (last_key, _) = last?;
let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
.ok_or(heed::Error::Encoding)?;
.map_err(heed::Error::Decoding)?;
Ok(Some(last_key.left_bound))
} else {
Ok(None)
@ -101,17 +101,17 @@ where
/// Get the height of the highest level in the facet database
pub(crate) fn get_highest_level<'t>(
txn: &'t RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<u8> {
let field_id_prefix = &field_id.to_be_bytes();
Ok(db
.as_polymorph()
.rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, field_id_prefix)?
.remap_types::<Bytes, DecodeIgnore>()
.rev_prefix_iter(txn, field_id_prefix)?
.next()
.map(|el| {
let (key, _) = el.unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap();
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
key.level
})
.unwrap_or(0))

View File

@ -17,8 +17,7 @@ use crate::error::UserError;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::{
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result,
SearchContext, BEU16,
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, SearchContext,
};
// Building these factories is not free.
@ -299,7 +298,7 @@ impl<'a> SearchForFacetValues<'a> {
None => return Ok(Vec::new()),
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
Some(fst) => fst,
None => return Ok(vec![]),
};

View File

@ -3,13 +3,14 @@ use std::collections::hash_map::Entry;
use std::hash::Hash;
use fxhash::FxHashMap;
use heed::types::ByteSlice;
use heed::types::Bytes;
use heed::{BytesEncode, Database, RoTxn};
use roaring::RoaringBitmap;
use super::interner::Interned;
use super::Word;
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
use crate::proximity::ProximityPrecision;
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
use crate::{
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
@ -50,7 +51,7 @@ impl<'ctx> DatabaseCache<'ctx> {
cache_key: K1,
db_key: &'v KC::EItem,
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
db: Database<KC, ByteSlice>,
db: Database<KC, Bytes>,
) -> Result<Option<DC::DItem>>
where
K1: Copy + Eq + Hash,
@ -63,12 +64,14 @@ impl<'ctx> DatabaseCache<'ctx> {
}
match cache.get(&cache_key).unwrap() {
Some(Cow::Borrowed(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
Some(Cow::Owned(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
None => Ok(None),
}
}
@ -78,7 +81,7 @@ impl<'ctx> DatabaseCache<'ctx> {
cache_key: K1,
db_keys: &'v [KC::EItem],
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
db: Database<KC, ByteSlice>,
db: Database<KC, Bytes>,
merger: MergeFn,
) -> Result<Option<DC::DItem>>
where
@ -110,12 +113,14 @@ impl<'ctx> DatabaseCache<'ctx> {
}
match cache.get(&cache_key).unwrap() {
Some(Cow::Borrowed(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
Some(Cow::Owned(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
None => Ok(None),
}
}
@ -164,7 +169,7 @@ impl<'ctx> SearchContext<'ctx> {
word,
&keys[..],
&mut self.db_cache.word_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
self.index.word_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps,
)
}
@ -173,7 +178,7 @@ impl<'ctx> SearchContext<'ctx> {
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.word_docids,
self.index.word_docids.remap_data_type::<ByteSlice>(),
self.index.word_docids.remap_data_type::<Bytes>(),
),
}
}
@ -187,7 +192,7 @@ impl<'ctx> SearchContext<'ctx> {
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.exact_word_docids,
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
self.index.exact_word_docids.remap_data_type::<Bytes>(),
)
}
@ -226,7 +231,7 @@ impl<'ctx> SearchContext<'ctx> {
prefix,
&keys[..],
&mut self.db_cache.word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps,
)
}
@ -235,7 +240,7 @@ impl<'ctx> SearchContext<'ctx> {
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.word_prefix_docids,
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
self.index.word_prefix_docids.remap_data_type::<Bytes>(),
),
}
}
@ -249,7 +254,7 @@ impl<'ctx> SearchContext<'ctx> {
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.exact_word_prefix_docids,
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
)
}
@ -259,17 +264,67 @@ impl<'ctx> SearchContext<'ctx> {
word2: Interned<String>,
proximity: u8,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, word1, word2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(word2).as_str(),
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
)
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
ProximityPrecision::AttributeScale => {
// Force proximity to 0 because:
// in AttributeScale, there are only 2 possible distances:
// 1. words in same attribute: in that the DB contains (0, word1, word2)
// 2. words in different attributes: no DB entry for these two words.
let proximity = 0;
let docids = if let Some(docids) =
self.db_cache.word_pair_proximity_docids.get(&(proximity, word1, word2))
{
docids
.as_ref()
.map(|d| CboRoaringBitmapCodec::bytes_decode_owned(d))
.transpose()
.map_err(heed::Error::Decoding)?
} else {
// Compute the distance at the attribute level and store it in the cache.
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
fids
} else {
self.index.fields_ids_map(self.txn)?.ids().collect()
};
let mut docids = RoaringBitmap::new();
for fid in fids {
// for each field, intersect left word bitmap and right word bitmap,
// then merge the result in a global bitmap before storing it in the cache.
let word1_docids = self.get_db_word_fid_docids(word1, fid)?;
let word2_docids = self.get_db_word_fid_docids(word2, fid)?;
if let (Some(word1_docids), Some(word2_docids)) =
(word1_docids, word2_docids)
{
docids |= word1_docids & word2_docids;
}
}
let encoded = CboRoaringBitmapCodec::bytes_encode(&docids)
.map(Cow::into_owned)
.map(Cow::Owned)
.map(Some)
.map_err(heed::Error::Decoding)?;
self.db_cache
.word_pair_proximity_docids
.insert((proximity, word1, word2), encoded);
Some(docids)
};
Ok(docids)
}
ProximityPrecision::WordScale => {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, word1, word2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(word2).as_str(),
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
)
}
}
}
pub fn get_db_word_pair_proximity_docids_len(
@ -278,54 +333,95 @@ impl<'ctx> SearchContext<'ctx> {
word2: Interned<String>,
proximity: u8,
) -> Result<Option<u64>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
self.txn,
(proximity, word1, word2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(word2).as_str(),
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
)
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
ProximityPrecision::AttributeScale => Ok(self
.get_db_word_pair_proximity_docids(word1, word2, proximity)?
.map(|d| d.len())),
ProximityPrecision::WordScale => {
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
self.txn,
(proximity, word1, word2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(word2).as_str(),
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
)
}
}
}
pub fn get_db_word_prefix_pair_proximity_docids(
&mut self,
word1: Interned<String>,
prefix2: Interned<String>,
proximity: u8,
mut proximity: u8,
) -> Result<Option<RoaringBitmap>> {
let docids = match self
.db_cache
.word_prefix_pair_proximity_docids
.entry((proximity, word1, prefix2))
{
Entry::Occupied(docids) => docids.get().clone(),
Entry::Vacant(entry) => {
// compute docids using prefix iter and store the result in the cache.
let key = U8StrStrCodec::bytes_encode(&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(prefix2).as_str(),
))
.unwrap()
.into_owned();
let mut prefix_docids = RoaringBitmap::new();
let remap_key_type = self
.index
.word_pair_proximity_docids
.remap_key_type::<ByteSlice>()
.prefix_iter(self.txn, &key)?;
for result in remap_key_type {
let (_, docids) = result?;
let proximity_precision = self.index.proximity_precision(self.txn)?.unwrap_or_default();
if proximity_precision == ProximityPrecision::AttributeScale {
// Force proximity to 0 because:
// in AttributeScale, there are only 2 possible distances:
// 1. words in same attribute: in that the DB contains (0, word1, word2)
// 2. words in different attributes: no DB entry for these two words.
proximity = 0;
}
prefix_docids |= docids;
let docids = if let Some(docids) =
self.db_cache.word_prefix_pair_proximity_docids.get(&(proximity, word1, prefix2))
{
docids.clone()
} else {
let prefix_docids = match proximity_precision {
ProximityPrecision::AttributeScale => {
// Compute the distance at the attribute level and store it in the cache.
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
fids
} else {
self.index.fields_ids_map(self.txn)?.ids().collect()
};
let mut prefix_docids = RoaringBitmap::new();
// for each field, intersect left word bitmap and right word bitmap,
// then merge the result in a global bitmap before storing it in the cache.
for fid in fids {
let word1_docids = self.get_db_word_fid_docids(word1, fid)?;
let prefix2_docids = self.get_db_word_prefix_fid_docids(prefix2, fid)?;
if let (Some(word1_docids), Some(prefix2_docids)) =
(word1_docids, prefix2_docids)
{
prefix_docids |= word1_docids & prefix2_docids;
}
}
prefix_docids
}
entry.insert(Some(prefix_docids.clone()));
Some(prefix_docids)
}
ProximityPrecision::WordScale => {
// compute docids using prefix iter and store the result in the cache.
let key = U8StrStrCodec::bytes_encode(&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(prefix2).as_str(),
))
.unwrap()
.into_owned();
let mut prefix_docids = RoaringBitmap::new();
let remap_key_type = self
.index
.word_pair_proximity_docids
.remap_key_type::<Bytes>()
.prefix_iter(self.txn, &key)?;
for result in remap_key_type {
let (_, docids) = result?;
prefix_docids |= docids;
}
prefix_docids
}
};
self.db_cache
.word_prefix_pair_proximity_docids
.insert((proximity, word1, prefix2), Some(prefix_docids.clone()));
Some(prefix_docids)
};
Ok(docids)
}
@ -355,7 +451,7 @@ impl<'ctx> SearchContext<'ctx> {
(word, fid),
&(self.word_interner.get(word).as_str(), fid),
&mut self.db_cache.word_fid_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
self.index.word_fid_docids.remap_data_type::<Bytes>(),
)
}
@ -374,7 +470,7 @@ impl<'ctx> SearchContext<'ctx> {
(word_prefix, fid),
&(self.word_interner.get(word_prefix).as_str(), fid),
&mut self.db_cache.word_prefix_fid_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
)
}
@ -388,7 +484,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self
.index
.word_fid_docids
.remap_types::<ByteSlice, ByteSlice>()
.remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>();
for result in remap_key_type {
@ -414,7 +510,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self
.index
.word_prefix_fid_docids
.remap_types::<ByteSlice, ByteSlice>()
.remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>();
for result in remap_key_type {
@ -442,7 +538,7 @@ impl<'ctx> SearchContext<'ctx> {
(word, position),
&(self.word_interner.get(word).as_str(), position),
&mut self.db_cache.word_position_docids,
self.index.word_position_docids.remap_data_type::<ByteSlice>(),
self.index.word_position_docids.remap_data_type::<Bytes>(),
)
}
@ -456,7 +552,7 @@ impl<'ctx> SearchContext<'ctx> {
(word_prefix, position),
&(self.word_interner.get(word_prefix).as_str(), position),
&mut self.db_cache.word_prefix_position_docids,
self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(),
self.index.word_prefix_position_docids.remap_data_type::<Bytes>(),
)
}
@ -470,7 +566,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self
.index
.word_position_docids
.remap_types::<ByteSlice, ByteSlice>()
.remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>();
for result in remap_key_type {
@ -501,7 +597,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self
.index
.word_prefix_position_docids
.remap_types::<ByteSlice, ByteSlice>()
.remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>();
for result in remap_key_type {

View File

@ -1,4 +1,4 @@
use heed::types::{ByteSlice, Str, Unit};
use heed::types::{Bytes, Str, Unit};
use heed::{Database, RoPrefix, RoTxn};
use roaring::RoaringBitmap;
@ -8,7 +8,7 @@ const DOCID_SIZE: usize = 4;
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::{Index, Result, SearchContext};
pub struct DistinctOutput {
@ -71,7 +71,7 @@ pub fn distinct_single_docid(
/// Return all the docids containing the given value in the given field
fn facet_value_docids(
database: Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
database: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
txn: &RoTxn,
field_id: u16,
facet_value: &[u8],
@ -87,12 +87,12 @@ fn facet_number_values<'a>(
field_id: u16,
index: &Index,
txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Unit>> {
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Unit>> {
let key = facet_values_prefix_key(field_id, docid);
let iter = index
.field_id_docid_facet_f64s
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(txn, &key)?
.remap_key_type();
@ -105,12 +105,12 @@ pub fn facet_string_values<'a>(
field_id: u16,
index: &Index,
txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Str>> {
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Str>> {
let key = facet_values_prefix_key(field_id, docid);
let iter = index
.field_id_docid_facet_strings
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(txn, &key)?
.remap_types();

View File

@ -1,7 +1,7 @@
use std::collections::VecDeque;
use std::iter::FromIterator;
use heed::types::{ByteSlice, Unit};
use heed::types::{Bytes, Unit};
use heed::{RoPrefix, RoTxn};
use roaring::RoaringBitmap;
use rstar::RTree;
@ -34,7 +34,7 @@ fn facet_number_values<'a>(
let iter = index
.field_id_docid_facet_f64s
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(txn, &key)?
.remap_key_type();
@ -163,7 +163,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
// computing the distance between two points is expensive thus we cache the result
documents
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
self.cached_sorted_docids.extend(documents.into_iter());
self.cached_sorted_docids.extend(documents);
};
Ok(())

View File

@ -228,7 +228,7 @@ impl<T> Ord for Interned<T> {
impl<T> PartialOrd for Interned<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.idx.partial_cmp(&other.idx)
Some(self.cmp(other))
}
}
@ -241,7 +241,7 @@ impl<T> PartialEq for Interned<T> {
}
impl<T> Clone for Interned<T> {
fn clone(&self) -> Self {
Self { idx: self.idx, _phantom: PhantomData }
*self
}
}

View File

@ -50,9 +50,7 @@ use crate::distance::NDotProductPoint;
use crate::error::FieldIdMapMissingEntry;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::{
AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32,
};
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
/// A structure used throughout the execution of a search query.
pub struct SearchContext<'ctx> {
@ -451,8 +449,8 @@ pub fn execute_search(
let mut docids = Vec::new();
let mut uniq_docids = RoaringBitmap::new();
for instant_distance::Item { distance: _, pid, point: _ } in neighbors {
let index = BEU32::new(pid.into_inner());
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
let index = pid.into_inner();
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap();
if universe.contains(docid) && uniq_docids.insert(docid) {
docids.push(docid);
if docids.len() == (from + length) {
@ -609,7 +607,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
field: field.to_string(),
valid_fields,
hidden_fields,
})?;
}
.into());
}
Member::Geo(_) if !sortable_fields.contains("_geo") => {
let (valid_fields, hidden_fields) =
@ -619,7 +618,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
field: "_geo".to_string(),
valid_fields,
hidden_fields,
})?;
}
.into());
}
_ => (),
}

View File

@ -175,7 +175,7 @@ impl QueryTermSubset {
pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {
let original = ctx.term_interner.get(self.original);
let Some(use_prefix_db) = original.zero_typo.use_prefix_db else { return None };
let use_prefix_db = original.zero_typo.use_prefix_db?;
let word = match &self.zero_typo_subset {
NTypoTermSubset::All => Some(use_prefix_db),
NTypoTermSubset::Subset { words, phrases: _ } => {

View File

@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::score_details::{self, ScoreDetails};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::{FieldId, Index, Result};
@ -100,11 +100,11 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
let number_db = ctx
.index
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let string_db = ctx
.index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let (number_iter, string_iter) = if self.is_ascending {
let number_iter = ascending_facet_sort(

View File

@ -124,8 +124,7 @@ fn test_attribute_fid_simple() {
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}
@ -142,7 +141,6 @@ fn test_attribute_fid_ngrams() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}

View File

@ -141,8 +141,7 @@ fn test_attribute_position_simple() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}
#[test]
@ -158,8 +157,7 @@ fn test_attribute_position_repeated() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}
@ -176,8 +174,7 @@ fn test_attribute_position_different_fields() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}
@ -194,7 +191,6 @@ fn test_attribute_position_ngrams() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}

View File

@ -478,8 +478,7 @@ fn test_exactness_simple_ordered() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -511,8 +510,7 @@ fn test_exactness_simple_reversed() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -535,8 +533,7 @@ fn test_exactness_simple_reversed() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -566,8 +563,7 @@ fn test_exactness_simple_random() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -596,8 +592,7 @@ fn test_exactness_attribute_starts_with_simple() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -623,8 +618,7 @@ fn test_exactness_attribute_starts_with_phrase() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -644,8 +638,7 @@ fn test_exactness_attribute_starts_with_phrase() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -674,8 +667,7 @@ fn test_exactness_all_candidates_with_typo() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -711,8 +703,7 @@ fn test_exactness_after_words() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -760,8 +751,7 @@ fn test_words_after_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -809,8 +799,7 @@ fn test_proximity_after_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 5, 8, 7, 3, 6]");
@ -847,8 +836,7 @@ fn test_proximity_after_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -881,8 +869,7 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 3]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -917,8 +904,7 @@ fn test_typo_followed_by_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 4, 3]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);

View File

@ -1,15 +1,16 @@
use heed::RwTxn;
use roaring::RoaringBitmap;
use time::OffsetDateTime;
use crate::{FieldDistribution, Index, Result};
pub struct ClearDocuments<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct ClearDocuments<'t, 'i> {
wtxn: &'t mut RwTxn<'i>,
index: &'i Index,
}
impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> ClearDocuments<'t, 'u, 'i> {
impl<'t, 'i> ClearDocuments<'t, 'i> {
pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> ClearDocuments<'t, 'i> {
ClearDocuments { wtxn, index }
}

View File

@ -2,8 +2,8 @@ use std::fs::File;
use std::io::BufReader;
use grenad::CompressionType;
use heed::types::ByteSlice;
use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn};
use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
@ -11,7 +11,7 @@ use crate::facet::FacetType;
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};
@ -70,11 +70,11 @@ impl<'i> FacetsUpdateBulk<'i> {
let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self;
let db = match facet_type {
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
}
};
@ -88,7 +88,7 @@ impl<'i> FacetsUpdateBulk<'i> {
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub delta_data: Option<grenad::Reader<R>>,
pub group_size: u8,
pub min_level_size: u8,
@ -106,7 +106,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
for level_reader in level_readers {
let mut cursor = level_reader.into_cursor()?;
while let Some((k, v)) = cursor.move_on_next()? {
self.db.remap_types::<ByteSlice, ByteSlice>().put(wtxn, k, v)?;
self.db.remap_types::<Bytes, Bytes>().put(wtxn, k, v)?;
}
}
}
@ -128,7 +128,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
};
if self.db.is_empty(wtxn)? {
let mut buffer = Vec::new();
let mut database = self.db.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>();
let mut database = self.db.iter_mut(wtxn)?.remap_types::<Bytes, Bytes>();
let mut cursor = delta_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
if !valid_lmdb_key(key) {
@ -146,11 +146,13 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
buffer.push(1);
// then we extend the buffer with the docids bitmap
buffer.extend_from_slice(value);
unsafe { database.append(key, &buffer)? };
unsafe {
database.put_current_with_options::<Bytes>(PutFlags::APPEND, key, &buffer)?
};
}
} else {
let mut buffer = Vec::new();
let database = self.db.remap_types::<ByteSlice, ByteSlice>();
let database = self.db.remap_types::<Bytes, Bytes>();
let mut cursor = delta_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
@ -219,9 +221,9 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
let level_0_iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())?
.remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>();
.remap_types::<Bytes, Bytes>()
.prefix_iter(rtxn, level_0_prefix.as_slice())?
.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>();
let mut left_bound: &[u8] = &[];
let mut first_iteration_for_new_group = true;
@ -307,11 +309,11 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{
let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?;
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key)
.map_err(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value =
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
cur_writer.insert(key, value)?;
cur_writer_len += 1;
}
@ -336,10 +338,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{
let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?;
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key)
.map_err(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
cur_writer.insert(key, value)?;
cur_writer_len += 1;
}

View File

@ -1,7 +1,7 @@
use std::fs::File;
use std::io::BufReader;
use heed::types::{ByteSlice, DecodeIgnore};
use heed::types::{Bytes, DecodeIgnore};
use heed::{BytesDecode, Error, RoTxn, RwTxn};
use obkv::KvReader;
use roaring::RoaringBitmap;
@ -10,7 +10,7 @@ use crate::facet::FacetType;
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::search::facet::get_highest_level;
use crate::update::del_add::DelAdd;
use crate::update::index_documents::valid_lmdb_key;
@ -48,10 +48,10 @@ impl FacetsUpdateIncremental {
db: match facet_type {
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
FacetType::Number => index
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
},
group_size,
max_group_size,
@ -67,19 +67,19 @@ impl FacetsUpdateIncremental {
if !valid_lmdb_key(key) {
continue;
}
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
.ok_or(heed::Error::Encoding)?;
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key)
.map_err(heed::Error::Encoding)?;
let value = KvReader::new(value);
let docids_to_delete = value
.get(DelAdd::Deletion)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.ok_or(heed::Error::Encoding));
.map(|o| o.map_err(heed::Error::Encoding));
let docids_to_add = value
.get(DelAdd::Addition)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.ok_or(heed::Error::Encoding));
.map(|o| o.map_err(heed::Error::Encoding));
if let Some(docids_to_delete) = docids_to_delete {
let docids_to_delete = docids_to_delete?;
@ -98,7 +98,7 @@ impl FacetsUpdateIncremental {
/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type
pub struct FacetsUpdateIncrementalInner {
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub group_size: u8,
pub min_level_size: u8,
pub max_group_size: u8,
@ -134,15 +134,14 @@ impl FacetsUpdateIncrementalInner {
prefix.extend_from_slice(&field_id.to_be_bytes());
prefix.push(level);
let mut iter =
self.db.as_polymorph().prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(
txn,
prefix.as_slice(),
)?;
let mut iter = self
.db
.remap_types::<Bytes, FacetGroupValueCodec>()
.prefix_iter(txn, prefix.as_slice())?;
let (key_bytes, value) = iter.next().unwrap()?;
Ok((
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
.map_err(Error::Encoding)?
.into_owned(),
value,
))
@ -177,10 +176,8 @@ impl FacetsUpdateIncrementalInner {
level0_prefix.extend_from_slice(&field_id.to_be_bytes());
level0_prefix.push(0);
let mut iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, &level0_prefix)?;
let mut iter =
self.db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?;
if iter.next().is_none() {
drop(iter);
@ -382,11 +379,8 @@ impl FacetsUpdateIncrementalInner {
highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
highest_level_prefix.push(highest_level);
let size_highest_level = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?
.count();
let size_highest_level =
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count();
if size_highest_level < self.group_size as usize * self.min_level_size as usize {
return Ok(());
@ -394,8 +388,8 @@ impl FacetsUpdateIncrementalInner {
let mut groups_iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &highest_level_prefix)?;
.remap_types::<Bytes, FacetGroupValueCodec>()
.prefix_iter(txn, &highest_level_prefix)?;
let nbr_new_groups = size_highest_level / self.group_size as usize;
let nbr_leftover_elements = size_highest_level % self.group_size as usize;
@ -406,8 +400,8 @@ impl FacetsUpdateIncrementalInner {
let mut values = RoaringBitmap::new();
for _ in 0..group_size {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?;
let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
.map_err(Error::Encoding)?;
if first_key.is_none() {
first_key = Some(key_i);
@ -429,8 +423,8 @@ impl FacetsUpdateIncrementalInner {
let mut values = RoaringBitmap::new();
for _ in 0..nbr_leftover_elements {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?;
let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
.map_err(Error::Encoding)?;
if first_key.is_none() {
first_key = Some(key_i);
@ -597,23 +591,21 @@ impl FacetsUpdateIncrementalInner {
if highest_level == 0
|| self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?
.remap_types::<Bytes, Bytes>()
.prefix_iter(txn, &highest_level_prefix)?
.count()
>= self.min_level_size as usize
{
return Ok(());
}
let mut to_delete = vec![];
let mut iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?;
let mut iter =
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?;
for el in iter.by_ref() {
let (k, _) = el?;
to_delete.push(
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k)
.ok_or(Error::Encoding)?
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
.map_err(Error::Encoding)?
.into_owned(),
);
}
@ -1121,7 +1113,7 @@ mod fuzz {
#[no_coverage]
fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) {
let index = FacetIndex::<ByteSliceRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
let mut txn = index.env.write_txn().unwrap();
let mut trivial_db = TrivialDatabase::<Vec<u8>>::default();
@ -1167,16 +1159,13 @@ mod fuzz {
let level0iter = index
.content
.as_polymorph()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(
&mut txn,
&field_id.to_be_bytes(),
)
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes())
.unwrap();
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
let (group_key, group_values) = group.unwrap();
let group_key =
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(group_key).unwrap();
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
assert_eq!(key, &group_key.left_bound);
assert_eq!(values, &group_values.bitmap);
}
@ -1186,13 +1175,13 @@ mod fuzz {
let level0iter = index
.content
.as_polymorph()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
.unwrap();
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
let (group_key, group_values) = group.unwrap();
let group_key =
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(group_key).unwrap();
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
assert_eq!(key, &group_key.left_bound);
assert_eq!(values, &group_values.bitmap);
}

View File

@ -83,7 +83,7 @@ use std::iter::FromIterator;
use charabia::normalizer::{Normalize, NormalizerOption};
use grenad::{CompressionType, SortAlgorithm};
use heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use heed::types::{Bytes, DecodeIgnore, SerdeJson};
use heed::BytesEncode;
use log::debug;
use time::OffsetDateTime;
@ -92,10 +92,10 @@ use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::update::index_documents::create_sorter;
use crate::update::merge_btreeset_string;
use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};
use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH};
pub mod bulk;
pub mod incremental;
@ -106,7 +106,7 @@ pub mod incremental;
/// a bulk update method or an incremental update method.
pub struct FacetsUpdate<'i> {
index: &'i Index,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
facet_type: FacetType,
delta_data: grenad::Reader<BufReader<File>>,
group_size: u8,
@ -120,11 +120,11 @@ impl<'i> FacetsUpdate<'i> {
delta_data: grenad::Reader<BufReader<File>>,
) -> Self {
let database = match facet_type {
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
}
};
Self {
@ -146,7 +146,7 @@ impl<'i> FacetsUpdate<'i> {
self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
// See self::comparison_bench::benchmark_facet_indexing
if self.delta_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
if self.delta_data.len() >= (self.database.len(wtxn)? / 50) {
let field_ids =
self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
let bulk_update = FacetsUpdateBulk::new(
@ -207,8 +207,8 @@ impl<'i> FacetsUpdate<'i> {
}
let set = BTreeSet::from_iter(std::iter::once(left_bound));
let key = (field_id, normalized_facet.as_ref());
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?;
let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
sorter.insert(key, val)?;
}
}
@ -217,10 +217,11 @@ impl<'i> FacetsUpdate<'i> {
// as the grenad sorter already merged them for us.
let mut merger_iter = sorter.into_stream_merger_iter()?;
while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
self.index
.facet_id_normalized_string_strings
.remap_types::<ByteSlice, ByteSlice>()
.put(wtxn, key_bytes, btreeset_bytes)?;
self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
wtxn,
key_bytes,
btreeset_bytes,
)?;
}
// We compute one FST by string facet
@ -252,7 +253,7 @@ impl<'i> FacetsUpdate<'i> {
// We write those FSTs in LMDB now
for (field_id, fst) in text_fsts {
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
}
Ok(())
@ -267,7 +268,7 @@ pub(crate) mod test_helpers {
use std::marker::PhantomData;
use std::rc::Rc;
use heed::types::ByteSlice;
use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
use roaring::RoaringBitmap;
@ -275,7 +276,7 @@ pub(crate) mod test_helpers {
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::search::facet::get_highest_level;
use crate::snapshot_tests::display_bitmap;
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
@ -306,7 +307,7 @@ pub(crate) mod test_helpers {
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{
pub env: Env,
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub group_size: Cell<u8>,
pub min_level_size: Cell<u8>,
pub max_group_size: Cell<u8>,
@ -454,7 +455,7 @@ pub(crate) mod test_helpers {
let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned();
let key: FacetGroupKey<&[u8]> =
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap();
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key).unwrap();
let mut inner_writer = KvWriterDelAdd::memory();
let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
inner_writer.insert(DelAdd::Addition, value).unwrap();
@ -486,12 +487,12 @@ pub(crate) mod test_helpers {
let iter = self
.content
.as_polymorph()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &level_no_prefix)
.remap_types::<Bytes, FacetGroupValueCodec>()
.prefix_iter(txn, &level_no_prefix)
.unwrap();
for el in iter {
let (key, value) = el.unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap();
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
let mut prefix_start_below = vec![];
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
@ -501,14 +502,11 @@ pub(crate) mod test_helpers {
let start_below = {
let mut start_below_iter = self
.content
.as_polymorph()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(
txn,
&prefix_start_below,
)
.remap_types::<Bytes, FacetGroupValueCodec>()
.prefix_iter(txn, &prefix_start_below)
.unwrap();
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes).unwrap()
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes).unwrap()
};
assert!(value.size > 0);
@ -612,7 +610,7 @@ mod comparison_bench {
}
let time_spent = timer.elapsed().as_millis();
println!(" add {nbr_doc} : {time_spent}ms");
txn.abort().unwrap();
txn.abort();
}
}
}

View File

@ -309,8 +309,7 @@ fn tokens_from_document<'a>(
// if a language has been detected for the token, we update the counter.
if let Some(language) = token.language {
let script = token.script;
let entry =
script_language_word_count.entry(script).or_insert_with(Vec::new);
let entry = script_language_word_count.entry(script).or_default();
match entry.iter_mut().find(|(l, _)| *l == language) {
Some((_, n)) => *n += 1,
None => entry.push((language, 1)),

View File

@ -6,8 +6,8 @@ use std::io::{self, BufReader};
use std::mem::size_of;
use std::result::Result as StdResult;
use bytemuck::bytes_of;
use grenad::Sorter;
use heed::zerocopy::AsBytes;
use heed::BytesEncode;
use itertools::EitherOrBoth;
use ordered_float::OrderedFloat;
@ -20,9 +20,7 @@ use crate::error::InternalError;
use crate::facet::value_encoding::f64_into_bytes;
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::{
CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH,
};
use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};
/// The length of the elements that are always in the buffer when inserting new values.
const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
@ -94,7 +92,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
let document = BEU32::from(document).get();
let document = DocumentId::from_be_bytes(document);
// For the other extraction tasks, prefix the key with the field_id and the document_id
numbers_key_buffer.extend_from_slice(docid_bytes);
@ -323,7 +321,7 @@ where
// We insert only the Del part of the Obkv to inform
// that we only want to remove all those numbers.
let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Deletion, ().as_bytes())?;
obkv.insert(DelAdd::Deletion, bytes_of(&()))?;
let bytes = obkv.into_inner()?;
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
}
@ -336,7 +334,7 @@ where
// We insert only the Add part of the Obkv to inform
// that we only want to remove all those numbers.
let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Addition, ().as_bytes())?;
obkv.insert(DelAdd::Addition, bytes_of(&()))?;
let bytes = obkv.into_inner()?;
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
}

View File

@ -118,7 +118,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
}
let (word, fid) = StrBEU16Codec::bytes_decode(key)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
.map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
// every words contained in an attribute set to exact must be pushed in the exact_words list.
if exact_attributes.contains(&fid) {

View File

@ -32,6 +32,7 @@ use super::helpers::{
MergeFn, MergeableReader,
};
use super::{helpers, TypedChunk};
use crate::proximity::ProximityPrecision;
use crate::{FieldId, Result};
/// Extract data for each databases from obkv documents in parallel.
@ -52,6 +53,7 @@ pub(crate) fn data_from_obkv_documents(
dictionary: Option<&[&str]>,
max_positions_per_attributes: Option<u32>,
exact_attributes: HashSet<FieldId>,
proximity_precision: ProximityPrecision,
) -> Result<()> {
puffin::profile_function!();
@ -150,15 +152,17 @@ pub(crate) fn data_from_obkv_documents(
});
}
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
docid_word_positions_chunks.clone(),
indexer,
lmdb_writer_sx.clone(),
extract_word_pair_proximity_docids,
merge_deladd_cbo_roaring_bitmaps,
TypedChunk::WordPairProximityDocids,
"word-pair-proximity-docids",
);
if proximity_precision == ProximityPrecision::WordScale {
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
docid_word_positions_chunks.clone(),
indexer,
lmdb_writer_sx.clone(),
extract_word_pair_proximity_docids,
merge_deladd_cbo_roaring_bitmaps,
TypedChunk::WordPairProximityDocids,
"word-pair-proximity-docids",
);
}
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
docid_word_positions_chunks.clone(),

View File

@ -3,7 +3,7 @@ use std::fs::File;
use std::io::{self, BufReader, BufWriter, Seek};
use grenad::{CompressionType, Sorter};
use heed::types::ByteSlice;
use heed::types::Bytes;
use super::{ClonableMmap, MergeFn};
use crate::update::index_documents::valid_lmdb_key;
@ -255,7 +255,7 @@ where
puffin::profile_function!();
let mut buffer = Vec::new();
let database = database.remap_types::<ByteSlice, ByteSlice>();
let database = database.remap_types::<Bytes, Bytes>();
let mut merger_iter = sorter.into_stream_merger_iter()?;
while let Some((key, value)) = merger_iter.next()? {

View File

@ -68,8 +68,8 @@ impl Default for IndexDocumentsMethod {
}
}
pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
config: IndexDocumentsConfig,
indexer_config: &'a IndexerConfig,
@ -90,19 +90,19 @@ pub struct IndexDocumentsConfig {
pub autogenerate_docids: bool,
}
impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA>
impl<'t, 'i, 'a, FP, FA> IndexDocuments<'t, 'i, 'a, FP, FA>
where
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
indexer_config: &'a IndexerConfig,
config: IndexDocumentsConfig,
progress: FP,
should_abort: FA,
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, FP, FA>> {
) -> Result<IndexDocuments<'t, 'i, 'a, FP, FA>> {
let transform = Some(Transform::new(
wtxn,
index,
@ -352,6 +352,7 @@ where
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
let proximity_precision = self.index.proximity_precision(self.wtxn)?.unwrap_or_default();
let pool_params = GrenadParameters {
chunk_compression_type: self.indexer_config.chunk_compression_type,
@ -392,6 +393,7 @@ where
dictionary.as_deref(),
max_positions_per_attributes,
exact_attributes,
proximity_precision,
)
});
@ -701,7 +703,7 @@ mod tests {
use crate::documents::documents_batch_reader_from_objects;
use crate::index::tests::TempIndex;
use crate::search::TermsMatchingStrategy;
use crate::{db_snap, Filter, Search, BEU16};
use crate::{db_snap, Filter, Search};
#[test]
fn simple_document_replacement() {
@ -1743,14 +1745,11 @@ mod tests {
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
let bitmap_colour =
index.facet_id_exists_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap();
index.facet_id_exists_docids.get(&rtxn, &colour_id).unwrap().unwrap();
assert_eq!(bitmap_colour.into_iter().collect::<Vec<_>>(), vec![0, 1, 2, 3, 4, 6, 7]);
let bitmap_colour_green = index
.facet_id_exists_docids
.get(&rtxn, &BEU16::new(colour_green_id))
.unwrap()
.unwrap();
let bitmap_colour_green =
index.facet_id_exists_docids.get(&rtxn, &colour_green_id).unwrap().unwrap();
assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![6, 7]);
};
@ -1848,21 +1847,15 @@ mod tests {
index.fields_ids_map(&rtxn).unwrap().id("colour.green.blue").unwrap();
let bitmap_null_colour =
index.facet_id_is_null_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap();
index.facet_id_is_null_docids.get(&rtxn, &colour_id).unwrap().unwrap();
assert_eq!(bitmap_null_colour.into_iter().collect::<Vec<_>>(), vec![0]);
let bitmap_colour_green = index
.facet_id_is_null_docids
.get(&rtxn, &BEU16::new(colour_green_id))
.unwrap()
.unwrap();
let bitmap_colour_green =
index.facet_id_is_null_docids.get(&rtxn, &colour_green_id).unwrap().unwrap();
assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![2]);
let bitmap_colour_blue = index
.facet_id_is_null_docids
.get(&rtxn, &BEU16::new(colour_blue_id))
.unwrap()
.unwrap();
let bitmap_colour_blue =
index.facet_id_is_null_docids.get(&rtxn, &colour_blue_id).unwrap().unwrap();
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
};
@ -1917,21 +1910,15 @@ mod tests {
let tags_blue_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green.blue").unwrap();
let bitmap_empty_tags =
index.facet_id_is_empty_docids.get(&rtxn, &BEU16::new(tags_id)).unwrap().unwrap();
index.facet_id_is_empty_docids.get(&rtxn, &tags_id).unwrap().unwrap();
assert_eq!(bitmap_empty_tags.into_iter().collect::<Vec<_>>(), vec![2, 6, 9]);
let bitmap_tags_green = index
.facet_id_is_empty_docids
.get(&rtxn, &BEU16::new(tags_green_id))
.unwrap()
.unwrap();
let bitmap_tags_green =
index.facet_id_is_empty_docids.get(&rtxn, &tags_green_id).unwrap().unwrap();
assert_eq!(bitmap_tags_green.into_iter().collect::<Vec<_>>(), vec![8]);
let bitmap_tags_blue = index
.facet_id_is_empty_docids
.get(&rtxn, &BEU16::new(tags_blue_id))
.unwrap()
.unwrap();
let bitmap_tags_blue =
index.facet_id_is_empty_docids.get(&rtxn, &tags_blue_id).unwrap().unwrap();
assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
};
@ -2684,7 +2671,7 @@ mod tests {
}
fn delete_documents<'t>(
wtxn: &mut RwTxn<'t, '_>,
wtxn: &mut RwTxn<'t>,
index: &'t TempIndex,
external_ids: &[&str],
) -> Vec<u32> {

View File

@ -24,9 +24,7 @@ use crate::index::{db_name, main_key};
use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
use crate::update::index_documents::GrenadParameters;
use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
use crate::{
FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32,
};
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result};
pub struct TransformOutput {
pub primary_key: String,
@ -245,11 +243,11 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut skip_insertion = false;
if let Some(original_docid) = original_docid {
let original_key = BEU32::new(original_docid);
let original_key = original_docid;
let base_obkv = self
.index
.documents
.remap_data_type::<heed::types::ByteSlice>()
.remap_data_type::<heed::types::Bytes>()
.get(wtxn, &original_key)?
.ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::DOCUMENTS,
@ -499,11 +497,11 @@ impl<'a, 'i> Transform<'a, 'i> {
self.replaced_documents_ids.insert(internal_docid);
// fetch the obkv document
let original_key = BEU32::new(internal_docid);
let original_key = internal_docid;
let base_obkv = self
.index
.documents
.remap_data_type::<heed::types::ByteSlice>()
.remap_data_type::<heed::types::Bytes>()
.get(txn, &original_key)?
.ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::DOCUMENTS,
@ -811,7 +809,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// TODO this can be done in parallel by using the rayon `ThreadPool`.
pub fn prepare_for_documents_reindexing(
self,
wtxn: &mut heed::RwTxn<'i, '_>,
wtxn: &mut heed::RwTxn<'i>,
old_fields_ids_map: FieldsIdsMap,
mut new_fields_ids_map: FieldsIdsMap,
) -> Result<TransformOutput> {
@ -857,7 +855,6 @@ impl<'a, 'i> Transform<'a, 'i> {
let obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
)?;
let docid = docid.get();
obkv_buffer.clear();
let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer);

View File

@ -6,8 +6,8 @@ use std::io::{self, BufReader};
use bytemuck::allocation::pod_collect_to_vec;
use charabia::{Language, Script};
use grenad::MergerBuilder;
use heed::types::ByteSlice;
use heed::RwTxn;
use heed::types::Bytes;
use heed::{PutFlags, RwTxn};
use log::error;
use obkv::{KvReader, KvWriter};
use ordered_float::OrderedFloat;
@ -27,9 +27,7 @@ use crate::index::Hnsw;
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
use crate::update::facet::FacetsUpdate;
use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at};
use crate::{
lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError, BEU32,
};
use crate::{lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError};
pub(crate) enum TypedChunk {
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
@ -146,10 +144,10 @@ pub(crate) fn write_typed_chunk_into_index(
}
}
let db = index.documents.remap_data_type::<ByteSlice>();
let db = index.documents.remap_data_type::<Bytes>();
if !writer.is_empty() {
db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?;
db.put(wtxn, &docid, &writer.into_inner().unwrap())?;
operations.push(DocumentOperation {
external_id: external_id.to_string(),
internal_id: docid,
@ -157,7 +155,7 @@ pub(crate) fn write_typed_chunk_into_index(
});
docids.insert(docid);
} else {
db.delete(wtxn, &BEU32::new(docid))?;
db.delete(wtxn, &docid)?;
operations.push(DocumentOperation {
external_id: external_id.to_string(),
internal_id: docid,
@ -295,7 +293,7 @@ pub(crate) fn write_typed_chunk_into_index(
}
TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => {
let index_fid_docid_facet_numbers =
index.field_id_docid_facet_f64s.remap_types::<ByteSlice, ByteSlice>();
index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
let mut cursor = fid_docid_facet_number.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
let reader = KvReaderDelAdd::new(value);
@ -315,7 +313,7 @@ pub(crate) fn write_typed_chunk_into_index(
}
TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => {
let index_fid_docid_facet_strings =
index.field_id_docid_facet_strings.remap_types::<ByteSlice, ByteSlice>();
index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
let mut cursor = fid_docid_facet_string.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
let reader = KvReaderDelAdd::new(value);
@ -362,8 +360,8 @@ pub(crate) fn write_typed_chunk_into_index(
// We extract and store the previous vectors
if let Some(hnsw) = index.vector_hnsw(wtxn)? {
for (pid, point) in hnsw.iter() {
let pid_key = BEU32::new(pid.into_inner());
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap().get();
let pid_key = pid.into_inner();
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap();
let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect();
vectors_set.insert((docid, vector));
}
@ -424,11 +422,7 @@ pub(crate) fn write_typed_chunk_into_index(
// Store the vectors in the point-docid relation database
index.vector_id_docid.clear(wtxn)?;
for (docid, pid) in docids.into_iter().zip(pids) {
index.vector_id_docid.put(
wtxn,
&BEU32::new(pid.into_inner()),
&BEU32::new(docid),
)?;
index.vector_id_docid.put(wtxn, &pid.into_inner(), &docid)?;
}
log::debug!("There are {} entries in the HNSW so far", hnsw_length);
@ -504,7 +498,7 @@ where
puffin::profile_function!(format!("number of entries: {}", data.len()));
let mut buffer = Vec::new();
let database = database.remap_types::<ByteSlice, ByteSlice>();
let database = database.remap_types::<Bytes, Bytes>();
let mut cursor = data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
@ -562,20 +556,23 @@ where
}
let mut buffer = Vec::new();
let mut database = database.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>();
let mut database = database.iter_mut(wtxn)?.remap_types::<Bytes, Bytes>();
let mut cursor = data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
if valid_lmdb_key(key) {
debug_assert!(
K::bytes_decode(key).is_some(),
K::bytes_decode(key).is_ok(),
"Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}",
key.len(),
&key
);
buffer.clear();
let value = serialize_value(value, &mut buffer)?;
unsafe { database.append(key, value)? };
unsafe {
// safety: We do not keep a reference to anything that lives inside the database
database.put_current_with_options::<Bytes>(PutFlags::APPEND, key, value)?
};
}
}

View File

@ -12,6 +12,7 @@ use super::IndexerConfig;
use crate::criterion::Criterion;
use crate::error::UserError;
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::proximity::ProximityPrecision;
use crate::update::index_documents::IndexDocumentsMethod;
use crate::update::{IndexDocuments, UpdateIndexingStep};
use crate::{FieldsIdsMap, Index, OrderBy, Result};
@ -100,8 +101,8 @@ impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
}
}
pub struct Settings<'a, 't, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct Settings<'a, 't, 'i> {
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
indexer_config: &'a IndexerConfig,
@ -127,14 +128,15 @@ pub struct Settings<'a, 't, 'u, 'i> {
max_values_per_facet: Setting<usize>,
sort_facet_values_by: Setting<HashMap<String, OrderBy>>,
pagination_max_total_hits: Setting<usize>,
proximity_precision: Setting<ProximityPrecision>,
}
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
indexer_config: &'a IndexerConfig,
) -> Settings<'a, 't, 'u, 'i> {
) -> Settings<'a, 't, 'i> {
Settings {
wtxn,
index,
@ -158,6 +160,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
max_values_per_facet: Setting::NotSet,
sort_facet_values_by: Setting::NotSet,
pagination_max_total_hits: Setting::NotSet,
proximity_precision: Setting::NotSet,
indexer_config,
}
}
@ -332,6 +335,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.pagination_max_total_hits = Setting::Reset;
}
pub fn set_proximity_precision(&mut self, value: ProximityPrecision) {
self.proximity_precision = Setting::Set(value);
}
pub fn reset_proximity_precision(&mut self) {
self.proximity_precision = Setting::Reset;
}
fn reindex<FP, FA>(
&mut self,
progress_callback: &FP,
@ -822,7 +833,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fn update_max_values_per_facet(&mut self) -> Result<()> {
match self.max_values_per_facet {
Setting::Set(max) => {
self.index.put_max_values_per_facet(self.wtxn, max)?;
self.index.put_max_values_per_facet(self.wtxn, max as u64)?;
}
Setting::Reset => {
self.index.delete_max_values_per_facet(self.wtxn)?;
@ -850,7 +861,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fn update_pagination_max_total_hits(&mut self) -> Result<()> {
match self.pagination_max_total_hits {
Setting::Set(max) => {
self.index.put_pagination_max_total_hits(self.wtxn, max)?;
self.index.put_pagination_max_total_hits(self.wtxn, max as u64)?;
}
Setting::Reset => {
self.index.delete_pagination_max_total_hits(self.wtxn)?;
@ -861,6 +872,24 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(())
}
fn update_proximity_precision(&mut self) -> Result<bool> {
let changed = match self.proximity_precision {
Setting::Set(new) => {
let old = self.index.proximity_precision(self.wtxn)?;
if old == Some(new) {
false
} else {
self.index.put_proximity_precision(self.wtxn, new)?;
true
}
}
Setting::Reset => self.index.delete_proximity_precision(self.wtxn)?,
Setting::NotSet => false,
};
Ok(changed)
}
pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
where
FP: Fn(UpdateIndexingStep) + Sync,
@ -897,6 +926,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
let synonyms_updated = self.update_synonyms()?;
let searchable_updated = self.update_searchable()?;
let exact_attributes_updated = self.update_exact_attributes()?;
let proximity_precision = self.update_proximity_precision()?;
if stop_words_updated
|| non_separator_tokens_updated
@ -906,6 +936,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|| synonyms_updated
|| searchable_updated
|| exact_attributes_updated
|| proximity_precision
{
self.reindex(&progress_callback, &should_abort, old_fields_ids_map)?;
}
@ -917,7 +948,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
#[cfg(test)]
mod tests {
use big_s::S;
use heed::types::ByteSlice;
use heed::types::Bytes;
use maplit::{btreemap, btreeset, hashset};
use super::*;
@ -1130,7 +1161,7 @@ mod tests {
}
let count = index
.facet_id_f64_docids
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
// The faceted field id is 1u16
.prefix_iter(&rtxn, &[0, 1, 0])
.unwrap()
@ -1151,7 +1182,7 @@ mod tests {
// Only count the field_id 0 and level 0 facet values.
let count = index
.facet_id_f64_docids
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(&rtxn, &[0, 1, 0])
.unwrap()
.count();
@ -1565,7 +1596,7 @@ mod tests {
})
.unwrap_err();
assert!(matches!(error, Error::UserError(UserError::PrimaryKeyCannotBeChanged(_))));
wtxn.abort().unwrap();
wtxn.abort();
// But if we clear the database...
let mut wtxn = index.write_txn().unwrap();
@ -1731,6 +1762,7 @@ mod tests {
max_values_per_facet,
sort_facet_values_by,
pagination_max_total_hits,
proximity_precision,
} = settings;
assert!(matches!(searchable_fields, Setting::NotSet));
assert!(matches!(displayed_fields, Setting::NotSet));
@ -1752,6 +1784,7 @@ mod tests {
assert!(matches!(max_values_per_facet, Setting::NotSet));
assert!(matches!(sort_facet_values_by, Setting::NotSet));
assert!(matches!(pagination_max_total_hits, Setting::NotSet));
assert!(matches!(proximity_precision, Setting::NotSet));
})
.unwrap();
}

View File

@ -1,7 +1,7 @@
use std::collections::{HashMap, HashSet};
use grenad::CompressionType;
use heed::types::{ByteSlice, Str};
use heed::types::{Bytes, Str};
use heed::Database;
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd};
@ -12,8 +12,8 @@ use crate::update::index_documents::{
};
use crate::{CboRoaringBitmapCodec, Result};
pub struct WordPrefixDocids<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct WordPrefixDocids<'t, 'i> {
wtxn: &'t mut heed::RwTxn<'i>,
word_docids: Database<Str, CboRoaringBitmapCodec>,
word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
pub(crate) chunk_compression_type: CompressionType,
@ -22,12 +22,12 @@ pub struct WordPrefixDocids<'t, 'u, 'i> {
pub(crate) max_memory: Option<usize>,
}
impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
impl<'t, 'i> WordPrefixDocids<'t, 'i> {
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
wtxn: &'t mut heed::RwTxn<'i>,
word_docids: Database<Str, CboRoaringBitmapCodec>,
word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
) -> WordPrefixDocids<'t, 'u, 'i> {
) -> WordPrefixDocids<'t, 'i> {
WordPrefixDocids {
wtxn,
word_docids,
@ -93,7 +93,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
}
// We fetch the docids associated to the newly added word prefix fst only.
let db = self.word_docids.remap_data_type::<ByteSlice>();
let db = self.word_docids.remap_data_type::<Bytes>();
let mut buffer = Vec::new();
for prefix in new_prefix_fst_words {
let prefix = std::str::from_utf8(prefix.as_bytes())?;

View File

@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet};
use std::str;
use grenad::CompressionType;
use heed::types::ByteSlice;
use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Database};
use log::debug;
@ -17,8 +17,8 @@ use crate::update::index_documents::{
};
use crate::{CboRoaringBitmapCodec, Result};
pub struct WordPrefixIntegerDocids<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct WordPrefixIntegerDocids<'t, 'i> {
wtxn: &'t mut heed::RwTxn<'i>,
prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
pub(crate) chunk_compression_type: CompressionType,
@ -27,12 +27,12 @@ pub struct WordPrefixIntegerDocids<'t, 'u, 'i> {
pub(crate) max_memory: Option<usize>,
}
impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
wtxn: &'t mut heed::RwTxn<'i>,
prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
) -> WordPrefixIntegerDocids<'t, 'u, 'i> {
) -> WordPrefixIntegerDocids<'t, 'i> {
WordPrefixIntegerDocids {
wtxn,
prefix_database,
@ -72,7 +72,8 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
let mut current_prefixes: Option<&&[String]> = None;
let mut prefixes_cache = HashMap::new();
while let Some((key, data)) = new_word_integer_docids_iter.move_on_next()? {
let (word, pos) = StrBEU16Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
let (word, pos) =
StrBEU16Codec::bytes_decode(key).map_err(heed::Error::Decoding)?;
current_prefixes = match current_prefixes.take() {
Some(prefixes) if word.starts_with(&prefixes[0]) => Some(prefixes),
@ -109,7 +110,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
}
// We fetch the docids associated to the newly added word prefix fst only.
let db = self.word_database.remap_data_type::<ByteSlice>();
let db = self.word_database.remap_data_type::<Bytes>();
let mut buffer = Vec::new();
for prefix_bytes in new_prefix_fst_words {
let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| {
@ -118,7 +119,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
// iter over all lines of the DB where the key is prefixed by the current prefix.
let iter = db
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(self.wtxn, prefix_bytes.as_bytes())?
.remap_key_type::<StrBEU16Codec>();
for result in iter {

View File

@ -2,21 +2,19 @@ use std::iter::{repeat_with, FromIterator};
use std::str;
use fst::{SetBuilder, Streamer};
use heed::RwTxn;
use crate::{Index, Result, SmallString32};
pub struct WordsPrefixesFst<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct WordsPrefixesFst<'t, 'i> {
wtxn: &'t mut RwTxn<'i>,
index: &'i Index,
threshold: u32,
max_prefix_length: usize,
}
impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
) -> WordsPrefixesFst<'t, 'u, 'i> {
impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> WordsPrefixesFst<'t, 'i> {
WordsPrefixesFst { wtxn, index, threshold: 100, max_prefix_length: 4 }
}