Compare commits

..

55 Commits

Author SHA1 Message Date
Tamo
dfb84f80da bump strois version 2023-10-10 19:25:12 +02:00
Tamo
98b67f217a move to our new S3 lib 2023-09-28 11:24:18 +02:00
Tamo
6325cda74f bump charabia 2023-09-21 15:18:44 +02:00
Tamo
c71ba72f73 fix build in release mode 2023-09-21 11:05:57 +02:00
Tamo
ecd36b15f0 exposes all the s3 arguments 2023-09-13 18:17:56 +02:00
Clément Renault
8a2e8a887f Load the latest snapshot when we start the engine 2023-09-12 18:08:24 +02:00
Clément Renault
309c33a418 Fix again the dots 2023-09-12 17:55:01 +02:00
Clément Renault
9b01506cee Move the load snapshot step into a function 2023-09-12 16:05:02 +02:00
Clément Renault
f37fdceb15 Use slashes instead of dots for the s3 paths separators 2023-09-12 15:46:15 +02:00
Clément Renault
f544cfa444 Remove tasks and content file on the s3 2023-09-12 15:19:45 +02:00
Clément Renault
c158d03337 Fix internal error 2023-09-12 14:46:13 +02:00
Tamo
b7109c0fd2 start a script to run everything 2023-09-12 11:34:59 +02:00
Kerollmops
a53a0fdb77 Store content files into the S3 2023-09-11 18:17:22 +02:00
Clément Renault
719fdd701b Fix and crash when the tasks path is unknown 2023-09-07 11:31:18 +02:00
Kerollmops
01c13c98ac Mastering minio 2023-09-06 17:54:21 +02:00
Tamo
5b89276fcc starts using s3 2023-09-05 19:25:09 +02:00
Kerollmops
41697c4d65 Introduce the zk-tasks folder 2023-09-04 18:24:34 +02:00
Kerollmops
7d85753573 Make the snapshot download work 2023-09-04 17:38:56 +02:00
Kerollmops
76657af1f9 Add the options into the IndexScheduler 2023-09-04 16:38:05 +02:00
Tamo
966cbdab69 make the tests compile again 2023-09-04 15:39:54 +02:00
Clément Renault
0c68b9ed4c WIP making the final snapshot swap 2023-08-31 15:56:42 +02:00
Clément Renault
d7233ecdb8 Make things to compile again 2023-08-31 14:55:14 +02:00
Clément Renault
95a011af13 Wrap the IndexScheduler fields into an inner struct 2023-08-31 10:36:33 +02:00
Clément Renault
e257710961 WIP fix the tests 2023-08-30 18:03:24 +02:00
Clément Renault
9dd4423054 Fix the watcher ordering of the auth/ node 2023-08-30 17:51:22 +02:00
Clément Renault
8c3ad57ef9 React to changes towards the cluster members 2023-08-30 17:40:12 +02:00
Clément Renault
2d1434da81 Keep the ZK flow when enqueuing tasks 2023-08-30 17:15:15 +02:00
Clément Renault
c488a4a351 Fixup a lot of small issues on the ZK config 2023-08-30 16:42:55 +02:00
Kerollmops
0c7d7c68bc WIP moving to the sync zookeeper API 2023-08-30 15:06:12 +02:00
Tamo
854745c670 wip: starts working on importing the snapshots 2023-08-16 18:41:05 +02:00
Tamo
777eebb759 starts creating snapshot, the import is still missing 2023-08-10 15:00:25 +02:00
Tamo
61ccfaf9bc wake up after registering a task 2023-08-10 09:39:39 +02:00
Tamo
f0c4d36ff7 implement the deletion of tasks after processing a batch
add a lot of comments and logs
2023-08-10 09:36:43 +02:00
Tamo
8c20d6e2fe fix the leader election 2023-08-09 17:23:13 +02:00
ManyTheFish
8e437ed76c Start leader election and task processing (WIP) 2023-08-09 16:52:38 +02:00
Tamo
1191ec5939 fix the register task watcher 2023-08-08 13:18:55 +02:00
Tamo
0d20d08daf fix a few warnings 2023-08-08 11:39:48 +02:00
ManyTheFish
b66bf049b5 Create a task on zookeeper side when a task is created locally 2023-08-07 17:02:51 +02:00
ManyTheFish
b2f36b9b97 Comment Meilisearch container by default 2023-08-07 17:02:00 +02:00
ManyTheFish
b311089435 Update zookeeper client 2023-08-07 14:20:01 +02:00
ManyTheFish
3d46e84d97 update docker compose as an example 2023-08-03 17:15:24 +02:00
ManyTheFish
ad7f8edff8 fix auto-synchronization with zk 2023-08-03 14:43:29 +02:00
Tamo
5ce01bcb53 add logs 2023-08-03 13:59:05 +02:00
Tamo
d5523cc6ac fix the tests 2023-08-03 12:28:08 +02:00
Tamo
fe7a312ec6 Import the already existing api keys on startup 2023-08-03 12:25:32 +02:00
Tamo
57dc4b148c implement the watcher for all kind of operations 2023-08-03 10:52:13 +02:00
Tamo
a325ddfe6a Forward the key deletions to zookeeper 2023-08-03 10:36:49 +02:00
Tamo
0cd81573b4 Forward the keys update to zookeeper 2023-08-03 10:22:34 +02:00
ManyTheFish
b0ff595f60 Event Listener: delete local key if deleted on ZK 2023-08-02 18:36:36 +02:00
ManyTheFish
3eb6f4b56f Create api keys 2023-08-02 16:52:45 +02:00
ManyTheFish
49f976c8d8 fix analitics compilation 2023-08-02 14:17:03 +02:00
Tamo
84d56f3320 send the creation of api-key to zookeeper 2023-08-02 13:57:30 +02:00
Tamo
97e3dfd99d makes zk available inside the auth-controller with config coming from the cli, it compiles 2023-08-02 13:17:40 +02:00
ManyTheFish
dc38da95c4 WIP 2023-08-02 12:00:02 +02:00
ManyTheFish
2ce8b42757 REMOVE: add docker compôse for tests 2023-08-02 11:59:54 +02:00
55 changed files with 2147 additions and 2566 deletions

View File

@@ -35,7 +35,7 @@ jobs:
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.7.0
uses: svenstaro/upload-release-action@2.6.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb

View File

@@ -54,7 +54,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
uses: svenstaro/upload-release-action@2.6.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
@@ -87,7 +87,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
uses: svenstaro/upload-release-action@2.6.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
@@ -121,7 +121,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
uses: svenstaro/upload-release-action@2.6.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
@@ -183,7 +183,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
uses: svenstaro/upload-release-action@2.6.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch

View File

@@ -43,7 +43,7 @@ jobs:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.1
uses: Swatinem/rust-cache@v2.5.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -65,7 +65,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.1
uses: Swatinem/rust-cache@v2.5.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -146,7 +146,7 @@ jobs:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.1
uses: Swatinem/rust-cache@v2.5.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@@ -165,7 +165,7 @@ jobs:
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.1
uses: Swatinem/rust-cache@v2.5.0
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@@ -184,7 +184,7 @@ jobs:
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.1
uses: Swatinem/rust-cache@v2.5.0
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

447
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -36,24 +36,3 @@ opt-level = 3
opt-level = 3
[profile.dev.package.roaring]
opt-level = 3
[profile.dev.package.lindera-ipadic-builder]
opt-level = 3
[profile.dev.package.encoding]
opt-level = 3
[profile.dev.package.yada]
opt-level = 3
[profile.release.package.lindera-ipadic-builder]
opt-level = 3
[profile.release.package.encoding]
opt-level = 3
[profile.release.package.yada]
opt-level = 3
[profile.bench.package.lindera-ipadic-builder]
opt-level = 3
[profile.bench.package.encoding]
opt-level = 3
[profile.bench.package.yada]
opt-level = 3

59
docker-compose.yml Normal file
View File

@@ -0,0 +1,59 @@
version: "3.9"
services:
zk1:
container_name: zk1
hostname: zk1
image: bitnami/zookeeper:3.7.1
ports:
- 21811:2181
environment:
- ALLOW_ANONYMOUS_LOGIN=yes
- ZOO_SERVER_ID=1
- ZOO_SERVERS=0.0.0.0:2888:3888,zk2:2888:3888,zk3:2888:3888
zk2:
container_name: zk2
hostname: zk2
image: bitnami/zookeeper:3.7.1
ports:
- 21812:2181
environment:
- ALLOW_ANONYMOUS_LOGIN=yes
- ZOO_SERVER_ID=2
- ZOO_SERVERS=zk1:2888:3888,0.0.0.0:2888:3888,zk3:2888:3888
zk3:
container_name: zk3
hostname: zk3
image: bitnami/zookeeper:3.7.1
ports:
- 21813:2181
environment:
- ALLOW_ANONYMOUS_LOGIN=yes
- ZOO_SERVER_ID=3
- ZOO_SERVERS=zk1:2888:3888,zk2:2888:3888,0.0.0.0:2888:3888
zoonavigator:
container_name: zoonavigator
image: elkozmon/zoonavigator
ports:
- 9000:9000
# Meilisearch instances
# m1:
# container_name: m1
# hostname: m1
# image: getmeili/meilisearch:prototype-zookeeper-ha-0
# ports:
# - 7700:7700
# environment:
# - MEILI_ZK_URL=zk1:2181
# - MEILI_MASTER_KEY=masterkey
# restart: always
# m2:
# container_name: m2
# hostname: m2
# image: getmeili/meilisearch:prototype-zookeeper-ha-0
# ports:
# - 7701:7700
# environment:
# - MEILI_ZK_URL=zk2:2181
# - MEILI_MASTER_KEY=masterkey
# restart: always

View File

@@ -262,9 +262,6 @@ pub(crate) mod test {
sortable_attributes: Setting::Set(btreeset! { S("age") }),
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet,

View File

@@ -340,9 +340,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
}
},
stop_words: settings.stop_words.into(),
non_separator_tokens: v6::Setting::NotSet,
separator_tokens: v6::Setting::NotSet,
dictionary: v6::Setting::NotSet,
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
typo_tolerance: match settings.typo_tolerance {

View File

@@ -22,20 +22,6 @@ pub enum Error {
pub type Result<T> = std::result::Result<T, Error>;
impl Deref for File {
type Target = NamedTempFile;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl DerefMut for File {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.file
}
}
#[derive(Clone, Debug)]
pub struct FileStore {
path: PathBuf,
@@ -146,6 +132,20 @@ impl File {
}
}
impl Deref for File {
type Target = NamedTempFile;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl DerefMut for File {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.file
}
}
#[cfg(test)]
mod test {
use std::io::Write;

61
ha_test/run.sh Normal file
View File

@@ -0,0 +1,61 @@
#!/bin/bash
function is_everything_installed {
everything_ok=yes
if hash zkli 2>/dev/null; then
echo "✅ zkli installed"
else
everything_ok=no
echo "🥺 zkli is missing, please run \`cargo install zkli\`"
fi
if hash s3cmd 2>/dev/null; then
echo "✅ s3cmd installed"
else
everything_ok=no
echo "🥺 s3cmd is missing, see how to install it here https://s3tools.org/s3cmd"
fi
if [ $everything_ok = "no" ]; then
echo "Exiting..."
exit 1
fi
}
# param: addr of zookeeper
function connect_to_zookeeper {
if ! zkli -a "$1" ls > /dev/null; then
echo "zkli can't connect"
return 1
fi
}
# param: addr of the s3 bucket
function connect_to_s3 {
# S3_SECRET_KEY
# S3_ACCESS_KEY
# S3_HOST
# S3_BUCKET
s3cmd --host="$S3_HOST" --host-bucket="$S3_BUCKET" --access_key="$ACCESS_KEY" --secret_key="$S3_SECRET_KEY" ls
if $?; then
echo "s3cmd can't connect"
return 1
fi
}
is_everything_installed
ZOOKEEPER_ADDR="localhost:2181"
if ! connect_to_zookeeper $ZOOKEEPER_ADDR; then
ZOOKEEPER_ADDR="localhost:21811"
if ! connect_to_zookeeper $ZOOKEEPER_ADDR; then
echo "Can't connect to zkli"
exit 1
fi
fi
connect_to_s3

View File

@@ -31,6 +31,10 @@ tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
tokio = { version = "1.27.0", features = ["full"] }
zookeeper = "0.8.0"
parking_lot = "0.12.1"
strois = "0.0.4"
[dev-dependencies]
big_s = "1.0.2"

View File

@@ -43,7 +43,7 @@ use uuid::Uuid;
use crate::autobatcher::{self, BatchKind};
use crate::utils::{self, swap_index_uid_in_task};
use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
use crate::{Error, IndexSchedulerInner, ProcessingTasks, Result, TaskId};
/// Represents a combination of tasks that can all be processed at the same time.
///
@@ -198,6 +198,35 @@ impl Batch {
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
}
}
/// Return the content fields uuids associated with this batch.
pub fn content_uuids(&self) -> Vec<Uuid> {
match self {
Batch::TaskCancelation { .. }
| Batch::TaskDeletion(_)
| Batch::Dump(_)
| Batch::IndexCreation { .. }
| Batch::IndexDocumentDeletionByFilter { .. }
| Batch::IndexUpdate { .. }
| Batch::SnapshotCreation(_)
| Batch::IndexDeletion { .. }
| Batch::IndexSwap { .. } => vec![],
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { operations, .. } => operations
.iter()
.flat_map(|op| match op {
DocumentOperation::Add(uuid) => Some(*uuid),
DocumentOperation::Delete(_) => None,
})
.collect(),
IndexOperation::DocumentDeletion { .. }
| IndexOperation::Settings { .. }
| IndexOperation::DocumentClear { .. }
| IndexOperation::SettingsAndDocumentOperation { .. }
| IndexOperation::DocumentClearAndSetting { .. } => vec![],
},
}
}
}
impl IndexOperation {
@@ -213,7 +242,7 @@ impl IndexOperation {
}
}
impl IndexScheduler {
impl IndexSchedulerInner {
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
///
/// ## Arguments
@@ -480,8 +509,7 @@ impl IndexScheduler {
if let Some(task_id) = to_cancel.max() {
// We retrieve the tasks that were processing before this tasks cancelation started.
// We must *not* reset the processing tasks before calling this method.
let ProcessingTasks { started_at, processing } =
&*self.processing_tasks.read().unwrap();
let ProcessingTasks { started_at, processing, .. } = &*self.processing_tasks.read();
return Ok(Some(Batch::TaskCancelation {
task: self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?,
previous_started_at: *started_at,
@@ -1392,7 +1420,7 @@ impl IndexScheduler {
fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
// 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
let processing_tasks = &self.processing_tasks.read().processing.clone();
let all_task_ids = self.all_task_ids(wtxn)?;
let mut to_delete_tasks = all_task_ids & matched_tasks;

View File

@@ -295,6 +295,11 @@ impl IndexMap {
"Attempt to finish deletion of an index that was being closed"
);
}
/// Returns the indexes that were opened by the `IndexMap`.
pub fn clear(&mut self) -> Vec<Index> {
self.available.clear().into_iter().map(|(_, (_, index))| index).collect()
}
}
/// Create or open an index in the specified path.
@@ -335,7 +340,8 @@ mod tests {
impl IndexMapper {
fn test() -> (Self, Env, IndexSchedulerHandle) {
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
(index_scheduler.index_mapper, index_scheduler.env, handle)
let index_scheduler = index_scheduler.inner();
(index_scheduler.index_mapper.clone(), index_scheduler.env.clone(), handle)
}
}

View File

@@ -61,7 +61,7 @@ pub struct IndexMapper {
pub(crate) index_stats: Database<UuidCodec, SerdeJson<IndexStats>>,
/// Path to the folder where the LMDB environments of each index are.
base_path: PathBuf,
pub(crate) base_path: PathBuf,
/// The map size an index is opened with on the first time.
index_base_map_size: usize,
/// The quantity by which the map size of an index is incremented upon reopening, in bytes.
@@ -135,7 +135,7 @@ impl IndexMapper {
index_growth_amount: usize,
index_count: usize,
enable_mdb_writemap: bool,
indexer_config: IndexerConfig,
indexer_config: Arc<IndexerConfig>,
) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
@@ -150,7 +150,7 @@ impl IndexMapper {
index_base_map_size,
index_growth_amount,
enable_mdb_writemap,
indexer_config: Arc::new(indexer_config),
indexer_config,
})
}
@@ -428,6 +428,11 @@ impl IndexMapper {
Ok(())
}
/// Returns the indexes that were opened by the `IndexMapper`.
pub fn clear(&mut self) -> Vec<Index> {
self.index_map.write().unwrap().clear()
}
/// The stats of an index.
///
/// If available in the cache, they are directly returned.

View File

@@ -1,5 +1,6 @@
use std::collections::BTreeSet;
use std::fmt::Write;
use std::ops::Deref;
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
@@ -8,12 +9,13 @@ use meilisearch_types::tasks::{Details, Task};
use roaring::RoaringBitmap;
use crate::index_mapper::IndexMapper;
use crate::{IndexScheduler, Kind, Status, BEI128};
use crate::{IndexScheduler, IndexSchedulerInner, Kind, Status, BEI128};
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
scheduler.assert_internally_consistent();
let IndexScheduler {
let inner = scheduler.inner();
let IndexSchedulerInner {
autobatching_enabled,
must_stop_processing: _,
processing_tasks,
@@ -38,13 +40,15 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
test_breakpoint_sdr: _,
planned_failures: _,
run_loop_iteration: _,
} = scheduler;
zookeeper: _,
options: _,
} = inner.deref();
let rtxn = env.read_txn().unwrap();
let mut snap = String::new();
let processing_tasks = processing_tasks.read().unwrap().processing.clone();
let processing_tasks = processing_tasks.read().processing.clone();
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
snap.push_str("### Processing Tasks:\n");
snap.push_str(&snapshot_bitmap(&processing_tasks));

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,6 @@
//! Thread-safe `Vec`-backend LRU cache using [`std::sync::atomic::AtomicU64`] for synchronization.
use std::mem;
use std::sync::atomic::{AtomicU64, Ordering};
/// Thread-safe `Vec`-backend LRU cache
@@ -190,6 +191,11 @@ where
}
None
}
/// Returns the generation associated to the key and values of the `LruMap`.
pub fn clear(&mut self) -> Vec<(AtomicU64, (K, V))> {
mem::take(&mut self.0.data)
}
}
/// The result of an insertion in a LRU map.

View File

@@ -10,9 +10,9 @@ use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
use crate::{Error, IndexScheduler, Result, Task, TaskId, BEI128};
use crate::{Error, IndexSchedulerInner, Result, Task, TaskId, BEI128};
impl IndexScheduler {
impl IndexSchedulerInner {
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
}
@@ -331,11 +331,12 @@ pub fn clamp_to_page_size(size: usize) -> usize {
}
#[cfg(test)]
impl IndexScheduler {
impl crate::IndexScheduler {
/// Asserts that the index scheduler's content is internally consistent.
pub fn assert_internally_consistent(&self) {
let rtxn = self.env.read_txn().unwrap();
for task in self.all_tasks.iter(&rtxn).unwrap() {
let this = self.inner();
let rtxn = this.env.read_txn().unwrap();
for task in this.all_tasks.iter(&rtxn).unwrap() {
let (task_id, task) = task.unwrap();
let task_id = task_id.get();
@@ -354,21 +355,21 @@ impl IndexScheduler {
} = task;
assert_eq!(uid, task.uid);
if let Some(task_index_uid) = &task_index_uid {
assert!(self
assert!(this
.index_tasks
.get(&rtxn, task_index_uid.as_str())
.unwrap()
.unwrap()
.contains(task.uid));
}
let db_enqueued_at = self
let db_enqueued_at = this
.enqueued_at
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
assert!(db_enqueued_at.contains(task_id));
if let Some(started_at) = started_at {
let db_started_at = self
let db_started_at = this
.started_at
.get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
.unwrap()
@@ -376,7 +377,7 @@ impl IndexScheduler {
assert!(db_started_at.contains(task_id));
}
if let Some(finished_at) = finished_at {
let db_finished_at = self
let db_finished_at = this
.finished_at
.get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
.unwrap()
@@ -384,9 +385,9 @@ impl IndexScheduler {
assert!(db_finished_at.contains(task_id));
}
if let Some(canceled_by) = canceled_by {
let db_canceled_tasks = self.get_status(&rtxn, Status::Canceled).unwrap();
let db_canceled_tasks = this.get_status(&rtxn, Status::Canceled).unwrap();
assert!(db_canceled_tasks.contains(uid));
let db_canceling_task = self.get_task(&rtxn, canceled_by).unwrap().unwrap();
let db_canceling_task = this.get_task(&rtxn, canceled_by).unwrap().unwrap();
assert_eq!(db_canceling_task.status, Status::Succeeded);
match db_canceling_task.kind {
KindWithContent::TaskCancelation { query: _, tasks } => {
@@ -427,7 +428,7 @@ impl IndexScheduler {
Details::IndexInfo { primary_key: pk1 } => match &kind {
KindWithContent::IndexCreation { index_uid, primary_key: pk2 }
| KindWithContent::IndexUpdate { index_uid, primary_key: pk2 } => {
self.index_tasks
this.index_tasks
.get(&rtxn, index_uid.as_str())
.unwrap()
.unwrap()
@@ -535,23 +536,23 @@ impl IndexScheduler {
}
}
assert!(self.get_status(&rtxn, status).unwrap().contains(uid));
assert!(self.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid));
assert!(this.get_status(&rtxn, status).unwrap().contains(uid));
assert!(this.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid));
if let KindWithContent::DocumentAdditionOrUpdate { content_file, .. } = kind {
match status {
Status::Enqueued | Status::Processing => {
assert!(self
assert!(this
.file_store
.all_uuids()
.unwrap()
.any(|uuid| uuid.as_ref().unwrap() == &content_file),
"Could not find uuid `{content_file}` in the file_store. Available uuids are {:?}.",
self.file_store.all_uuids().unwrap().collect::<std::result::Result<Vec<_>, file_store::Error>>().unwrap(),
this.file_store.all_uuids().unwrap().collect::<std::result::Result<Vec<_>, file_store::Error>>().unwrap(),
);
}
Status::Succeeded | Status::Failed | Status::Canceled => {
assert!(self
assert!(this
.file_store
.all_uuids()
.unwrap()

View File

@@ -14,6 +14,7 @@ license.workspace = true
base64 = "0.21.0"
enum-iterator = "1.4.0"
hmac = "0.12.1"
log = "0.4.19"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
@@ -24,3 +25,4 @@ sha2 = "0.10.6"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
zookeeper = "0.8.0"

View File

@@ -19,6 +19,7 @@ internal_error!(
AuthControllerError: meilisearch_types::milli::heed::Error,
std::io::Error,
serde_json::Error,
zookeeper::ZkError,
std::str::Utf8Error
);

View File

@@ -16,22 +16,113 @@ pub use store::open_auth_store_env;
use store::{generate_key_as_hexa, HeedAuthStore};
use time::OffsetDateTime;
use uuid::Uuid;
use zookeeper::{
Acl, AddWatchMode, CreateMode, WatchedEvent, WatchedEventType, ZkError, ZooKeeper,
};
#[derive(Clone)]
pub struct AuthController {
store: Arc<HeedAuthStore>,
master_key: Option<String>,
zookeeper: Option<Arc<ZooKeeper>>,
}
impl AuthController {
pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> {
pub fn new(
db_path: impl AsRef<Path>,
master_key: &Option<String>,
zookeeper: Option<Arc<ZooKeeper>>,
) -> Result<Self> {
let store = HeedAuthStore::new(db_path)?;
let controller = Self { store: Arc::new(store), master_key: master_key.clone(), zookeeper };
if store.is_empty()? {
generate_default_keys(&store)?;
match controller.zookeeper {
// setup the auth zk environment, the `auth` node
Some(ref zookeeper) => {
// Zookeeper Event listener loop
let controller_clone = controller.clone();
let zkk = zookeeper.clone();
zookeeper.add_watch("/auth", AddWatchMode::PersistentRecursive, move |event| {
let WatchedEvent { event_type, path, keeper_state: _ } = dbg!(event);
match event_type {
WatchedEventType::NodeDeleted => {
// TODO: ugly unwraps
let path = path.unwrap();
let uuid = path.strip_prefix("/auth/").unwrap();
let uuid = Uuid::parse_str(&uuid).unwrap();
log::info!("The key {} has been deleted", uuid);
controller_clone.store.delete_api_key(uuid).unwrap();
}
WatchedEventType::NodeCreated | WatchedEventType::NodeDataChanged => {
let path = path.unwrap();
if path.strip_prefix("/auth/").map_or(false, |s| !s.is_empty()) {
let (key, _stat) = zkk.get_data(&path, false).unwrap();
let key: Key = serde_json::from_slice(&key).unwrap();
log::info!("The key {} has been deleted", key.uid);
controller_clone.store.put_api_key(key).unwrap();
}
}
otherwise => panic!("Got the unexpected `{otherwise:?}` event!"),
}
})?;
// TODO: we should catch the potential unexpected errors here https://docs.rs/zookeeper-client/latest/zookeeper_client/struct.Client.html#method.create
// for the moment we consider that `create` only returns Error::NodeExists.
match zookeeper.create(
"/auth",
vec![],
Acl::open_unsafe().clone(),
CreateMode::Persistent,
) {
// If the store is empty, we must generate and push the default api-keys.
Ok(_) => generate_default_keys(&controller)?,
// If the node exist we should clear our DB and download all the existing api-keys
Err(ZkError::NodeExists) => {
log::warn!("Auth directory already exists, we need to clear our keys + import the one in zookeeper");
let store = controller.store.clone();
store.delete_all_keys()?;
let children = zookeeper
.get_children("/auth", false)
.expect("Internal, the auth directory was deleted during execution.");
log::info!("Importing {} api-keys", children.len());
for path in children {
log::info!(" Importing {}", path);
match zookeeper.get_data(&format!("/auth/{}", &path), false) {
Ok((key, _stat)) => {
let key = serde_json::from_slice(&key).unwrap();
let store = controller.store.clone();
store.put_api_key(key)?;
}
Err(e) => panic!("{e}"),
}
// else the file was deleted while we were inserting the key. We ignore it.
// TODO: What happens if someone updates the files before we have the time
// to setup the watcher
}
}
e @ Err(
ZkError::NoNode | ZkError::NoChildrenForEphemerals | ZkError::InvalidACL,
) => unreachable!("{e:?}"),
Err(e) => panic!("{e}"),
}
// TODO: Race condition above:
// What happens if two node join exactly at the same moment:
// One will create the dir
// The second one will delete its DB, load nothing and install a watcher
// The first one will push its keys and should wake up and update the second one.
// / BUT, if the second one delete its DB and the first one push its files before the second one install the watcher we're fucked
}
None => {
if controller.store.is_empty()? {
generate_default_keys(&controller)?;
}
}
}
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
Ok(controller)
}
/// Return `Ok(())` if the auth controller is able to access one of its database.
@@ -53,7 +144,24 @@ impl AuthController {
pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
match self.store.get_api_key(create_key.uid)? {
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),
None => self.store.put_api_key(create_key.to_key()),
None => self.put_key(create_key.to_key()),
}
}
pub fn put_key(&self, key: Key) -> Result<Key> {
let store = self.store.clone();
match &self.zookeeper {
Some(zookeeper) => {
zookeeper.create(
&format!("/auth/{}", key.uid),
serde_json::to_vec_pretty(&key)?,
Acl::open_unsafe().clone(),
CreateMode::Persistent,
)?;
Ok(key)
}
None => store.put_api_key(key),
}
}
@@ -68,7 +176,20 @@ impl AuthController {
name => key.name = name.set(),
};
key.updated_at = OffsetDateTime::now_utc();
self.store.put_api_key(key)
let store = self.store.clone();
// TODO: we may commit only after zk persisted the keys
match &self.zookeeper {
Some(zookeeper) => {
zookeeper.set_data(
&format!("/auth/{}", key.uid),
serde_json::to_vec_pretty(&key)?,
None,
)?;
Ok(key)
}
None => store.put_api_key(key),
}
}
pub fn get_key(&self, uid: Uuid) -> Result<Key> {
@@ -110,7 +231,19 @@ impl AuthController {
}
pub fn delete_key(&self, uid: Uuid) -> Result<()> {
if self.store.delete_api_key(uid)? {
let deleted = match &self.zookeeper {
Some(zookeeper) => match zookeeper.delete(&format!("/auth/{}", uid), None) {
Ok(()) => true,
Err(ZkError::NoNode) => false,
Err(e) => return Err(e.into()),
},
None => {
let store = self.store.clone();
store.delete_api_key(uid)?
}
};
if deleted {
Ok(())
} else {
Err(AuthControllerError::ApiKeyNotFound(uid.to_string()))
@@ -159,7 +292,7 @@ impl AuthController {
self.store.delete_all_keys()
}
/// Delete all the keys in the DB.
/// Insert a key in the DB without any check on its validity
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
self.store.put_api_key(key)?;
Ok(())
@@ -304,10 +437,9 @@ pub struct IndexSearchRules {
pub filter: Option<serde_json::Value>,
}
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
store.put_api_key(Key::default_admin())?;
store.put_api_key(Key::default_search())?;
fn generate_default_keys(controller: &AuthController) -> Result<()> {
controller.put_key(Key::default_admin())?;
controller.put_key(Key::default_search())?;
Ok(())
}

View File

@@ -1,4 +1,3 @@
use std::borrow::Borrow;
use std::fmt::{self, Debug, Display};
use std::fs::File;
use std::io::{self, Seek, Write};
@@ -42,7 +41,7 @@ impl Display for DocumentFormatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Io(e) => write!(f, "{e}"),
Self::MalformedPayload(me, b) => match me.borrow() {
Self::MalformedPayload(me, b) => match me {
Error::Json(se) => {
let mut message = match se.classify() {
Category::Data => {

View File

@@ -175,6 +175,7 @@ macro_rules! make_error_codes {
// An exhaustive list of all the error codes used by meilisearch.
make_error_codes! {
S3Error , System , INTERNAL_SERVER_ERROR;
ApiKeyAlreadyExists , InvalidRequest , CONFLICT ;
ApiKeyNotFound , InvalidRequest , NOT_FOUND ;
BadParameter , InvalidRequest , BAD_REQUEST;
@@ -259,9 +260,6 @@ InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsStopWords , InvalidRequest , BAD_REQUEST ;
InvalidSettingsNonSeparatorTokens , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSeparatorTokens , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDictionary , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSynonyms , InvalidRequest , BAD_REQUEST ;
InvalidSettingsTypoTolerance , InvalidRequest , BAD_REQUEST ;
InvalidState , Internal , INTERNAL_SERVER_ERROR ;

View File

@@ -171,15 +171,6 @@ pub struct Settings<T> {
#[deserr(default, error = DeserrJsonError<InvalidSettingsStopWords>)]
pub stop_words: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsNonSeparatorTokens>)]
pub non_separator_tokens: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSeparatorTokens>)]
pub separator_tokens: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsDictionary>)]
pub dictionary: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSynonyms>)]
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
@@ -210,9 +201,6 @@ impl Settings<Checked> {
ranking_rules: Setting::Reset,
stop_words: Setting::Reset,
synonyms: Setting::Reset,
non_separator_tokens: Setting::Reset,
separator_tokens: Setting::Reset,
dictionary: Setting::Reset,
distinct_attribute: Setting::Reset,
typo_tolerance: Setting::Reset,
faceting: Setting::Reset,
@@ -229,9 +217,6 @@ impl Settings<Checked> {
sortable_attributes,
ranking_rules,
stop_words,
non_separator_tokens,
separator_tokens,
dictionary,
synonyms,
distinct_attribute,
typo_tolerance,
@@ -247,9 +232,6 @@ impl Settings<Checked> {
sortable_attributes,
ranking_rules,
stop_words,
non_separator_tokens,
separator_tokens,
dictionary,
synonyms,
distinct_attribute,
typo_tolerance,
@@ -292,9 +274,6 @@ impl Settings<Unchecked> {
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
synonyms: self.synonyms,
non_separator_tokens: self.non_separator_tokens,
separator_tokens: self.separator_tokens,
dictionary: self.dictionary,
distinct_attribute: self.distinct_attribute,
typo_tolerance: self.typo_tolerance,
faceting: self.faceting,
@@ -356,28 +335,6 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match settings.non_separator_tokens {
Setting::Set(ref non_separator_tokens) => {
builder.set_non_separator_tokens(non_separator_tokens.clone())
}
Setting::Reset => builder.reset_non_separator_tokens(),
Setting::NotSet => (),
}
match settings.separator_tokens {
Setting::Set(ref separator_tokens) => {
builder.set_separator_tokens(separator_tokens.clone())
}
Setting::Reset => builder.reset_separator_tokens(),
Setting::NotSet => (),
}
match settings.dictionary {
Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
Setting::Reset => builder.reset_dictionary(),
Setting::NotSet => (),
}
match settings.synonyms {
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
Setting::Reset => builder.reset_synonyms(),
@@ -502,14 +459,15 @@ pub fn settings(
})
.transpose()?
.unwrap_or_default();
let non_separator_tokens = index.non_separator_tokens(rtxn)?.unwrap_or_default();
let separator_tokens = index.separator_tokens(rtxn)?.unwrap_or_default();
let dictionary = index.dictionary(rtxn)?.unwrap_or_default();
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
let synonyms = index.user_defined_synonyms(rtxn)?;
// in milli each word in the synonyms map were split on their separator. Since we lost
// this information we are going to put space between words.
let synonyms = index
.synonyms(rtxn)?
.iter()
.map(|(key, values)| (key.join(" "), values.iter().map(|value| value.join(" ")).collect()))
.collect();
let min_typo_word_len = MinWordSizeTyposSetting {
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
@@ -562,9 +520,6 @@ pub fn settings(
sortable_attributes: Setting::Set(sortable_attributes),
ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
stop_words: Setting::Set(stop_words),
non_separator_tokens: Setting::Set(non_separator_tokens),
separator_tokens: Setting::Set(separator_tokens),
dictionary: Setting::Set(dictionary),
distinct_attribute: match distinct_field {
Some(field) => Setting::Set(field),
None => Setting::Reset,
@@ -687,9 +642,6 @@ pub(crate) mod test {
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet,
@@ -711,9 +663,6 @@ pub(crate) mod test {
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet,

View File

@@ -80,6 +80,7 @@ reqwest = { version = "0.11.16", features = [
], default-features = false }
rustls = "0.20.8"
rustls-pemfile = "1.0.2"
strois = "0.0.4"
segment = { version = "0.2.2", optional = true }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
@@ -105,6 +106,7 @@ walkdir = "2.3.3"
yaup = "0.2.1"
serde_urlencoded = "0.7.1"
termcolor = "1.2.0"
zookeeper = "0.8.0"
[dev-dependencies]
actix-rt = "2.8.0"

View File

@@ -312,6 +312,13 @@ impl From<Opt> for Infos {
config_file_path,
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics: _,
zk_url: _,
s3_url: _,
s3_region: _,
s3_bucket: _,
s3_access_key: _,
s3_secret_key: _,
s3_security_token: _,
} = options;
let schedule_snapshot = match schedule_snapshot {

View File

@@ -33,6 +33,8 @@ pub enum MeilisearchHttpError {
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
)]
SwapIndexPayloadWrongLength(Vec<IndexUid>),
#[error("S3 Error: {0}")]
S3Error(#[from] strois::Error),
#[error(transparent)]
IndexUid(#[from] IndexUidFormatError),
#[error(transparent)]
@@ -65,6 +67,7 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
MeilisearchHttpError::S3Error(_) => Code::S3Error,
MeilisearchHttpError::IndexUid(e) => e.error_code(),
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
MeilisearchHttpError::HeedError(_) => Code::Internal,

View File

@@ -39,6 +39,8 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use option::ScheduleSnapshot;
use strois::Client;
use zookeeper::ZooKeeper;
use crate::error::MeilisearchHttpError;
@@ -136,14 +138,17 @@ enum OnFailure {
KeepDb,
}
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
pub async fn setup_meilisearch(
opt: &Opt,
zookeeper: Option<Arc<ZooKeeper>>,
) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
let empty_db = is_empty_db(&opt.db_path);
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
let snapshot_path_exists = snapshot_path.exists();
// the db is empty and the snapshot exists, import it
if empty_db && snapshot_path_exists {
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb, zookeeper)?,
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
@@ -160,14 +165,14 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
bail!("snapshot doesn't exist at {}", snapshot_path.display())
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
} else {
open_or_create_database(opt, empty_db)?
open_or_create_database(opt, empty_db, zookeeper)?
}
} else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists();
// the db is empty and the dump exists, import it
if empty_db && src_path_exists {
let (mut index_scheduler, mut auth_controller) =
open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
open_or_create_database_unchecked(opt, OnFailure::RemoveDb, zookeeper)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
@@ -187,10 +192,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
} else {
open_or_create_database(opt, empty_db)?
open_or_create_database(opt, empty_db, zookeeper)?
}
} else {
open_or_create_database(opt, empty_db)?
open_or_create_database(opt, empty_db, zookeeper)?
};
// We create a loop in a thread that registers snapshotCreation tasks
@@ -199,15 +204,12 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
let snapshot_delay = Duration::from_secs(snapshot_delay);
let index_scheduler = index_scheduler.clone();
thread::Builder::new()
.name(String::from("register-snapshot-tasks"))
.spawn(move || loop {
thread::sleep(snapshot_delay);
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
error!("Error while registering snapshot: {}", e);
}
})
.unwrap();
thread::spawn(move || loop {
thread::sleep(snapshot_delay);
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
error!("Error while registering snapshot: {}", e);
}
});
}
Ok((index_scheduler, auth_controller))
@@ -217,34 +219,49 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
fn open_or_create_database_unchecked(
opt: &Opt,
on_failure: OnFailure,
zookeeper: Option<Arc<ZooKeeper>>,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key, zookeeper.clone());
let instance_features = opt.to_instance_features();
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
max_number_of_tasks: 1_000_000,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features,
})?)
};
let index_scheduler = IndexScheduler::new(Arc::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into().map(Arc::new)?,
autobatching_enabled: true,
max_number_of_tasks: 1_000_000,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features,
zookeeper: zookeeper.clone(),
s3: opt.s3_url.as_ref().map(|url| {
Arc::new(
Client::builder(url)
.unwrap()
.key(opt.s3_access_key.as_ref().expect("Need s3 key to work").clone())
.secret(opt.s3_secret_key.as_ref().expect("Need s3 secret to work").clone())
.maybe_token(opt.s3_security_token.clone())
.region(&opt.s3_region)
.bucket(opt.s3_bucket.as_ref().expect("Need an s3 bucket to work"))
.unwrap()
.get_or_create()
.unwrap(),
)
}),
}))
.map_err(anyhow::Error::from);
match (
index_scheduler_builder(),
index_scheduler,
auth_controller.map_err(anyhow::Error::from),
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
) {
@@ -262,12 +279,13 @@ fn open_or_create_database_unchecked(
fn open_or_create_database(
opt: &Opt,
empty_db: bool,
zookeeper: Option<Arc<ZooKeeper>>,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
if !empty_db {
check_version_file(&opt.db_path)?;
}
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
open_or_create_database_unchecked(opt, OnFailure::KeepDb, zookeeper)
}
fn import_dump(
@@ -277,6 +295,7 @@ fn import_dump(
auth: &mut AuthController,
) -> Result<(), anyhow::Error> {
let reader = File::open(dump_path)?;
let index_scheduler = index_scheduler.inner();
let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() {

View File

@@ -2,6 +2,7 @@ use std::env;
use std::io::{stderr, Write};
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use actix_web::http::KeepAlive;
use actix_web::web::Data;
@@ -12,6 +13,7 @@ use meilisearch::analytics::Analytics;
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use zookeeper::ZooKeeper;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
@@ -63,7 +65,10 @@ async fn main() -> anyhow::Result<()> {
_ => (),
}
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
let timeout = Duration::from_millis(2500);
let zookeeper =
opt.zk_url.as_ref().map(|url| Arc::new(ZooKeeper::connect(url, timeout, drop).unwrap()));
let (index_scheduler, auth_controller) = setup_meilisearch(&opt, zookeeper).await?;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
let analytics = if !opt.no_analytics {

View File

@@ -28,6 +28,13 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
const MEILI_ENV: &str = "MEILI_ENV";
const MEILI_ZK_URL: &str = "MEILI_ZK_URL";
const MEILI_S3_URL: &str = "MEILI_S3_URL";
const MEILI_S3_BUCKET: &str = "MEILI_S3_BUCKET";
const MEILI_S3_ACCESS_KEY: &str = "MEILI_S3_ACCESS_KEY";
const MEILI_S3_SECRET_KEY: &str = "MEILI_S3_SECRET_KEY";
const MEILI_S3_SECURITY_TOKEN: &str = "MEILI_S3_SECURITY_TOKEN";
const MEILI_S3_REGION: &str = "MEILI_S3_REGION";
#[cfg(all(not(debug_assertions), feature = "analytics"))]
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@@ -56,6 +63,7 @@ const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
const DEFAULT_ENV: &str = "development";
const DEFAULT_S3_REGION: &str = "eu-central-1";
const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB";
const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
@@ -154,6 +162,36 @@ pub struct Opt {
#[serde(default = "default_env")]
pub env: String,
/// Sets the HTTP address and port used to communicate with the zookeeper cluster.
/// If ran locally, the default url is `http://localhost:2181/`.
#[clap(long, env = MEILI_ZK_URL)]
pub zk_url: Option<String>,
/// Sets the address and port used to communicate with the S3 bucket.
#[clap(long, env = MEILI_S3_URL)]
pub s3_url: Option<String>,
/// Sets the region used to communicate with the s3 bucket.
#[clap(long, env = MEILI_S3_REGION, default_value_t = default_s3_region())]
#[serde(default = "default_s3_region")]
pub s3_region: String,
/// Sets the S3 bucket name to use.
#[clap(long, env = MEILI_S3_BUCKET)]
pub s3_bucket: Option<String>,
/// Set the S3 access key. If used you must also set the secret key.
#[clap(long, env = MEILI_S3_ACCESS_KEY)]
pub s3_access_key: Option<String>,
/// Set the S3 secret key. If used you must also set the access key.
#[clap(long, env = MEILI_S3_SECRET_KEY)]
pub s3_secret_key: Option<String>,
/// Security token, can't be used with access key and secret key.
#[clap(long, env = MEILI_S3_SECURITY_TOKEN)]
pub s3_security_token: Option<String>,
/// Deactivates Meilisearch's built-in telemetry when provided.
///
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@@ -368,6 +406,13 @@ impl Opt {
http_addr,
master_key,
env,
zk_url,
s3_url,
s3_region,
s3_bucket,
s3_access_key,
s3_secret_key,
s3_security_token,
max_index_size: _,
max_task_db_size: _,
http_payload_size_limit,
@@ -401,6 +446,25 @@ impl Opt {
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
}
export_to_env_if_not_present(MEILI_ENV, env);
if let Some(zk_url) = zk_url {
export_to_env_if_not_present(MEILI_ZK_URL, zk_url);
}
if let Some(s3_url) = s3_url {
export_to_env_if_not_present(MEILI_S3_URL, s3_url);
}
export_to_env_if_not_present(MEILI_S3_REGION, s3_region);
if let Some(s3_bucket) = s3_bucket {
export_to_env_if_not_present(MEILI_S3_BUCKET, s3_bucket);
}
if let Some(s3_access_key) = s3_access_key {
export_to_env_if_not_present(MEILI_S3_ACCESS_KEY, s3_access_key);
}
if let Some(s3_secret_key) = s3_secret_key {
export_to_env_if_not_present(MEILI_S3_SECRET_KEY, s3_secret_key);
}
if let Some(s3_security_token) = s3_security_token {
export_to_env_if_not_present(MEILI_S3_SECURITY_TOKEN, s3_security_token);
}
#[cfg(all(not(debug_assertions), feature = "analytics"))]
{
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
@@ -547,7 +611,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
Ok(Self {
log_every_n: Some(DEFAULT_LOG_EVERY_N),
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
thread_pool: Some(thread_pool),
thread_pool: Some(Arc::new(thread_pool)),
max_positions_per_attributes: None,
skip_index_budget: other.skip_index_budget,
..Default::default()
@@ -715,6 +779,10 @@ fn default_env() -> String {
DEFAULT_ENV.to_string()
}
fn default_s3_region() -> String {
DEFAULT_S3_REGION.to_string()
}
fn default_max_index_size() -> Byte {
Byte::from_bytes(INDEX_SIZE)
}

View File

@@ -41,14 +41,10 @@ pub async fn create_api_key(
_req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let v = body.into_inner();
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let key = auth_controller.create_key(v)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
let key = auth_controller.create_key(v)?;
let key = KeyView::from_key(key, &auth_controller);
Ok(HttpResponse::Created().json(res))
Ok(HttpResponse::Created().json(key))
}
#[derive(Deserr, Debug, Clone, Copy)]
@@ -110,17 +106,11 @@ pub async fn patch_api_key(
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;
let patch_api_key = body.into_inner();
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let uid =
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
let key = auth_controller.update_key(uid, patch_api_key)?;
let uid = Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
let key = auth_controller.update_key(uid, patch_api_key)?;
let key = KeyView::from_key(key, &auth_controller);
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::Ok().json(res))
Ok(HttpResponse::Ok().json(key))
}
pub async fn delete_api_key(
@@ -128,13 +118,8 @@ pub async fn delete_api_key(
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;
tokio::task::spawn_blocking(move || {
let uid =
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
auth_controller.delete_key(uid)
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
let uid = Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
auth_controller.delete_key(uid)?;
Ok(HttpResponse::NoContent().finish())
}

View File

@@ -29,8 +29,7 @@ pub async fn create_dump(
keys: auth_controller.list_keys()?,
instance_uid: analytics.instance_uid().cloned(),
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@@ -78,6 +78,6 @@ async fn patch_features(
}),
Some(&req),
);
index_scheduler.put_runtime_features(new_features)?;
index_scheduler.inner().put_runtime_features(new_features)?;
Ok(HttpResponse::Ok().json(new_features))
}

View File

@@ -1,4 +1,4 @@
use std::io::ErrorKind;
use std::io::{BufReader, ErrorKind, Seek, SeekFrom};
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
@@ -129,8 +129,7 @@ pub async fn delete_document(
index_uid: index_uid.to_string(),
documents_ids: vec![document_id],
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -396,11 +395,12 @@ async fn document_addition(
return Err(MeilisearchHttpError::MissingPayload(format));
}
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
if let Err(e) = buffer.seek(SeekFrom::Start(0)).await {
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
}
let read_file = buffer.into_inner().into_std().await;
let s3 = index_scheduler.s3.clone();
let documents_count = tokio::task::spawn_blocking(move || {
let documents_count = match format {
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
@@ -409,8 +409,16 @@ async fn document_addition(
}
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
};
if let Some(s3) = s3 {
update_file.seek(SeekFrom::Start(0)).unwrap();
let mut reader = BufReader::new(&*update_file);
s3.put_object_multipart(format!("update-files/{}", uuid), &mut reader)?;
}
// we NEED to persist the file here because we moved the `udpate_file` in another task.
update_file.persist()?;
Ok(documents_count)
})
.await;
@@ -445,7 +453,7 @@ async fn document_addition(
};
let scheduler = index_scheduler.clone();
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
let task = match scheduler.register(task) {
Ok(task) => task,
Err(e) => {
index_scheduler.delete_update_file(uuid)?;
@@ -476,8 +484,7 @@ pub async fn delete_documents_batch(
let task =
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -512,8 +519,7 @@ pub async fn delete_documents_by_filter(
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -529,8 +535,7 @@ pub async fn clear_all_documents(
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@@ -135,8 +135,7 @@ pub async fn create_index(
);
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
Ok(HttpResponse::Accepted().json(task))
} else {
@@ -203,8 +202,7 @@ pub async fn update_index(
primary_key: body.primary_key,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -216,8 +214,7 @@ pub async fn delete_index(
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
Ok(HttpResponse::Accepted().json(task))
}

View File

@@ -55,10 +55,7 @@ macro_rules! make_setting_route {
is_deletion: true,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -97,10 +94,7 @@ macro_rules! make_setting_route {
is_deletion: false,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -310,81 +304,6 @@ make_setting_route!(
}
);
make_setting_route!(
"/non-separator-tokens",
put,
std::collections::BTreeSet<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
>,
non_separator_tokens,
"nonSeparatorTokens",
analytics,
|non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"nonSeparatorTokens Updated".to_string(),
json!({
"non_separator_tokens": {
"total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/separator-tokens",
put,
std::collections::BTreeSet<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
>,
separator_tokens,
"separatorTokens",
analytics,
|separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"separatorTokens Updated".to_string(),
json!({
"separator_tokens": {
"total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/dictionary",
put,
std::collections::BTreeSet<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
>,
dictionary,
"dictionary",
analytics,
|dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"dictionary Updated".to_string(),
json!({
"dictionary": {
"total": dictionary.as_ref().map(|dictionary| dictionary.len()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/synonyms",
put,
@@ -541,9 +460,6 @@ generate_configure!(
searchable_attributes,
distinct_attribute,
stop_words,
separator_tokens,
non_separator_tokens,
dictionary,
synonyms,
ranking_rules,
typo_tolerance,
@@ -664,8 +580,7 @@ pub async fn update_all(
is_deletion: false,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -700,8 +615,7 @@ pub async fn delete_all(
is_deletion: true,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@@ -18,7 +18,6 @@ use serde_json::json;
use time::format_description::well_known::Rfc3339;
use time::macros::format_description;
use time::{Date, Duration, OffsetDateTime, Time};
use tokio::task;
use super::SummarizedTaskView;
use crate::analytics::Analytics;
@@ -325,15 +324,12 @@ async fn cancel_tasks(
let query = params.into_query();
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
index_scheduler.filters(),
)?;
let (tasks, _) =
index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?;
let task_cancelation =
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??;
let task = index_scheduler.register(task_cancelation)?;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Ok().json(task))
@@ -370,15 +366,12 @@ async fn delete_tasks(
);
let query = params.into_query();
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
index_scheduler.filters(),
)?;
let (tasks, _) =
index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?;
let task_deletion =
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??;
let task = index_scheduler.register(task_deletion)?;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Ok().json(task))

View File

@@ -491,20 +491,6 @@ pub fn perform_search(
tokenizer_builder.allow_list(&script_lang_map);
}
let separators = index.allowed_separators(&rtxn)?;
let separators: Option<Vec<_>> =
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref separators) = separators {
tokenizer_builder.separators(separators);
}
let dictionary = index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref dictionary) = dictionary {
tokenizer_builder.words_dict(dictionary);
}
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
formatter_builder.crop_marker(query.crop_marker);
formatter_builder.highlight_prefix(query.highlight_pre_tag);

View File

@@ -39,7 +39,7 @@ impl Server {
let options = default_settings(dir.path());
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
let (index_scheduler, auth) = setup_meilisearch(&options, None).await.unwrap();
let service = Service { index_scheduler, auth, options, api_key: None };
Server { service, _dir: Some(dir) }
@@ -54,7 +54,7 @@ impl Server {
options.master_key = Some("MASTER_KEY".to_string());
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
let (index_scheduler, auth) = setup_meilisearch(&options, None).await.unwrap();
let service = Service { index_scheduler, auth, options, api_key: None };
Server { service, _dir: Some(dir) }
@@ -67,7 +67,7 @@ impl Server {
}
pub async fn new_with_options(options: Opt) -> Result<Self, anyhow::Error> {
let (index_scheduler, auth) = setup_meilisearch(&options)?;
let (index_scheduler, auth) = setup_meilisearch(&options, None).await?;
let service = Service { index_scheduler, auth, options, api_key: None };
Ok(Server { service, _dir: None })

File diff suppressed because it is too large Load Diff

View File

@@ -16,9 +16,6 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
);
map.insert("stop_words", json!([]));
map.insert("non_separator_tokens", json!([]));
map.insert("separator_tokens", json!([]));
map.insert("dictionary", json!([]));
map.insert("synonyms", json!({}));
map.insert(
"faceting",
@@ -54,7 +51,7 @@ async fn get_settings() {
let (response, code) = index.settings().await;
assert_eq!(code, 200);
let settings = response.as_object().unwrap();
assert_eq!(settings.keys().len(), 14);
assert_eq!(settings.keys().len(), 11);
assert_eq!(settings["displayedAttributes"], json!(["*"]));
assert_eq!(settings["searchableAttributes"], json!(["*"]));
assert_eq!(settings["filterableAttributes"], json!([]));
@@ -65,9 +62,6 @@ async fn get_settings() {
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"])
);
assert_eq!(settings["stopWords"], json!([]));
assert_eq!(settings["nonSeparatorTokens"], json!([]));
assert_eq!(settings["separatorTokens"], json!([]));
assert_eq!(settings["dictionary"], json!([]));
assert_eq!(
settings["faceting"],
json!({
@@ -278,9 +272,6 @@ test_setting_routes!(
searchable_attributes put,
distinct_attribute put,
stop_words put,
separator_tokens put,
non_separator_tokens put,
dictionary put,
ranking_rules put,
synonyms put,
pagination patch,

View File

@@ -1,4 +1,3 @@
mod distinct;
mod errors;
mod get_settings;
mod tokenizer_customization;

View File

@@ -1,467 +0,0 @@
use meili_snap::{json_string, snapshot};
use serde_json::json;
use crate::common::Server;
#[actix_rt::test]
async fn set_and_reset() {
let server = Server::new().await;
let index = server.index("test");
let (_response, _code) = index
.update_settings(json!({
"nonSeparatorTokens": ["#", "&"],
"separatorTokens": ["&sep", "<br/>"],
"dictionary": ["J.R.R.", "J. R. R."],
}))
.await;
index.wait_task(0).await;
let (response, _) = index.settings().await;
snapshot!(json_string!(response["nonSeparatorTokens"]), @r###"
[
"#",
"&"
]
"###);
snapshot!(json_string!(response["separatorTokens"]), @r###"
[
"&sep",
"<br/>"
]
"###);
snapshot!(json_string!(response["dictionary"]), @r###"
[
"J. R. R.",
"J.R.R."
]
"###);
index
.update_settings(json!({
"nonSeparatorTokens": null,
"separatorTokens": null,
"dictionary": null,
}))
.await;
index.wait_task(1).await;
let (response, _) = index.settings().await;
snapshot!(json_string!(response["nonSeparatorTokens"]), @"[]");
snapshot!(json_string!(response["separatorTokens"]), @"[]");
snapshot!(json_string!(response["dictionary"]), @"[]");
}
#[actix_rt::test]
async fn set_and_search() {
let documents = json!([
{
"id": 1,
"content": "Mac & cheese",
},
{
"id": 2,
"content": "G#D#G#D#G#C#D#G#C#",
},
{
"id": 3,
"content": "Mac&sep&&sepcheese",
},
]);
let server = Server::new().await;
let index = server.index("test");
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (_response, _code) = index
.update_settings(json!({
"nonSeparatorTokens": ["#", "&"],
"separatorTokens": ["<br/>", "&sep"],
"dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"],
}))
.await;
index.wait_task(1).await;
index
.search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"content": "Mac & cheese",
"_formatted": {
"id": "1",
"content": "Mac <em>&</em> cheese"
}
},
{
"id": 3,
"content": "Mac&sep&&sepcheese",
"_formatted": {
"id": "3",
"content": "Mac&sep<em>&</em>&sepcheese"
}
}
]
"###);
})
.await;
index
.search(
json!({"q": "Mac & cheese", "attributesToHighlight": ["content"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"content": "Mac & cheese",
"_formatted": {
"id": "1",
"content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
}
},
{
"id": 3,
"content": "Mac&sep&&sepcheese",
"_formatted": {
"id": "3",
"content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
}
}
]
"###);
},
)
.await;
index
.search(
json!({"q": "Mac&sep&&sepcheese", "attributesToHighlight": ["content"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"content": "Mac & cheese",
"_formatted": {
"id": "1",
"content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
}
},
{
"id": 3,
"content": "Mac&sep&&sepcheese",
"_formatted": {
"id": "3",
"content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
}
}
]
"###);
},
)
.await;
index
.search(json!({"q": "C#D#G", "attributesToHighlight": ["content"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 2,
"content": "G#D#G#D#G#C#D#G#C#",
"_formatted": {
"id": "2",
"content": "<em>G</em>#<em>D#</em><em>G</em>#<em>D#</em><em>G</em>#<em>C#</em><em>D#</em><em>G</em>#<em>C#</em>"
}
}
]
"###);
})
.await;
index
.search(json!({"q": "#", "attributesToHighlight": ["content"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @"[]");
})
.await;
}
#[actix_rt::test]
async fn advanced_synergies() {
let documents = json!([
{
"id": 1,
"content": "J.R.R. Tolkien",
},
{
"id": 2,
"content": "J. R. R. Tolkien",
},
{
"id": 3,
"content": "jrr Tolkien",
},
{
"id": 4,
"content": "J.K. Rowlings",
},
{
"id": 5,
"content": "J. K. Rowlings",
},
{
"id": 6,
"content": "jk Rowlings",
},
]);
let server = Server::new().await;
let index = server.index("test");
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (_response, _code) = index
.update_settings(json!({
"dictionary": ["J.R.R.", "J. R. R."],
"synonyms": {
"J.R.R.": ["jrr", "J. R. R."],
"J. R. R.": ["jrr", "J.R.R."],
"jrr": ["J.R.R.", "J. R. R."],
"J.K.": ["jk", "J. K."],
"J. K.": ["jk", "J.K."],
"jk": ["J.K.", "J. K."],
}
}))
.await;
index.wait_task(1).await;
index
.search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"content": "J.R.R. Tolkien",
"_formatted": {
"id": "1",
"content": "<em>J.R.R.</em> Tolkien"
}
},
{
"id": 2,
"content": "J. R. R. Tolkien",
"_formatted": {
"id": "2",
"content": "<em>J. R. R.</em> Tolkien"
}
},
{
"id": 3,
"content": "jrr Tolkien",
"_formatted": {
"id": "3",
"content": "<em>jrr</em> Tolkien"
}
}
]
"###);
})
.await;
index
.search(json!({"q": "jrr", "attributesToHighlight": ["content"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 3,
"content": "jrr Tolkien",
"_formatted": {
"id": "3",
"content": "<em>jrr</em> Tolkien"
}
},
{
"id": 1,
"content": "J.R.R. Tolkien",
"_formatted": {
"id": "1",
"content": "<em>J.R.R.</em> Tolkien"
}
},
{
"id": 2,
"content": "J. R. R. Tolkien",
"_formatted": {
"id": "2",
"content": "<em>J. R. R.</em> Tolkien"
}
}
]
"###);
})
.await;
index
.search(json!({"q": "J. R. R.", "attributesToHighlight": ["content"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 2,
"content": "J. R. R. Tolkien",
"_formatted": {
"id": "2",
"content": "<em>J. R. R.</em> Tolkien"
}
},
{
"id": 1,
"content": "J.R.R. Tolkien",
"_formatted": {
"id": "1",
"content": "<em>J.R.R.</em> Tolkien"
}
},
{
"id": 3,
"content": "jrr Tolkien",
"_formatted": {
"id": "3",
"content": "<em>jrr</em> Tolkien"
}
}
]
"###);
})
.await;
// Only update dictionary, the synonyms should be recomputed.
let (_response, _code) = index
.update_settings(json!({
"dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."],
}))
.await;
index.wait_task(2).await;
index
.search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 6,
"content": "jk Rowlings",
"_formatted": {
"id": "6",
"content": "<em>jk</em> Rowlings"
}
},
{
"id": 4,
"content": "J.K. Rowlings",
"_formatted": {
"id": "4",
"content": "<em>J.K.</em> Rowlings"
}
},
{
"id": 5,
"content": "J. K. Rowlings",
"_formatted": {
"id": "5",
"content": "<em>J. K.</em> Rowlings"
}
}
]
"###);
})
.await;
index
.search(json!({"q": "J.K.", "attributesToHighlight": ["content"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 4,
"content": "J.K. Rowlings",
"_formatted": {
"id": "4",
"content": "<em>J.K.</em> Rowlings"
}
},
{
"id": 5,
"content": "J. K. Rowlings",
"_formatted": {
"id": "5",
"content": "<em>J. K.</em> Rowlings"
}
},
{
"id": 6,
"content": "jk Rowlings",
"_formatted": {
"id": "6",
"content": "<em>jk</em> Rowlings"
}
}
]
"###);
})
.await;
index
.search(json!({"q": "J. K.", "attributesToHighlight": ["content"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 5,
"content": "J. K. Rowlings",
"_formatted": {
"id": "5",
"content": "<em>J. K.</em> Rowlings"
}
},
{
"id": 4,
"content": "J.K. Rowlings",
"_formatted": {
"id": "4",
"content": "<em>J.K.</em> Rowlings"
}
},
{
"id": 6,
"content": "jk Rowlings",
"_formatted": {
"id": "6",
"content": "<em>jk</em> Rowlings"
}
},
{
"id": 2,
"content": "J. R. R. Tolkien",
"_formatted": {
"id": "2",
"content": "<em>J. R.</em> R. Tolkien"
}
}
]
"###);
})
.await;
}

View File

@@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.4.0"
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
byteorder = "1.4.3"
charabia = { version = "0.8.2", default-features = false }
charabia = { version = "0.8.4", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.8"
deserr = "0.5.0"

View File

@@ -1,5 +1,5 @@
use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::collections::{BTreeSet, HashMap, HashSet};
use std::fs::File;
use std::mem::size_of;
use std::path::Path;
@@ -61,12 +61,8 @@ pub mod main_key {
pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
pub const STOP_WORDS_KEY: &str = "stop-words";
pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens";
pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens";
pub const DICTIONARY_KEY: &str = "dictionary";
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
pub const SYNONYMS_KEY: &str = "synonyms";
pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms";
pub const WORDS_FST_KEY: &str = "words-fst";
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
pub const CREATED_AT_KEY: &str = "created-at";
@@ -1059,116 +1055,18 @@ impl Index {
}
}
/* non separator tokens */
pub(crate) fn put_non_separator_tokens(
&self,
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set)
}
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
}
pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>(
rtxn,
main_key::NON_SEPARATOR_TOKENS_KEY,
)?)
}
/* separator tokens */
pub(crate) fn put_separator_tokens(
&self,
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set)
}
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY)
}
pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self
.main
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
}
/* separators easing method */
pub fn allowed_separators(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
let default_separators =
charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
let mut separators: Option<BTreeSet<_>> = None;
if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? {
separator_tokens.extend(default_separators.clone());
separators = Some(separator_tokens);
}
if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? {
separators = separators
.or_else(|| Some(default_separators.collect()))
.map(|separators| &separators - &non_separator_tokens);
}
Ok(separators)
}
/* dictionary */
pub(crate) fn put_dictionary(
&self,
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set)
}
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY)
}
pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self
.main
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?)
}
/* synonyms */
pub(crate) fn put_synonyms(
&self,
wtxn: &mut RwTxn,
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
user_defined_synonyms: &BTreeMap<String, Vec<String>>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?;
self.main.put::<_, Str, SerdeBincode<_>>(
wtxn,
main_key::USER_DEFINED_SYNONYMS_KEY,
user_defined_synonyms,
)
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
}
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?;
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
}
pub fn user_defined_synonyms(
&self,
rtxn: &RoTxn,
) -> heed::Result<BTreeMap<String, Vec<String>>> {
Ok(self
.main
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
.unwrap_or_default())
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
}
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {

View File

@@ -488,20 +488,6 @@ pub fn execute_search(
tokbuilder.stop_words(stop_words);
}
let separators = ctx.index.allowed_separators(ctx.txn)?;
let separators: Option<Vec<_>> =
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref separators) = separators {
tokbuilder.separators(separators);
}
let dictionary = ctx.index.dictionary(ctx.txn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref dictionary) = dictionary {
tokbuilder.words_dict(dictionary);
}
let script_lang_map = ctx.index.script_language(ctx.txn)?;
if !script_lang_map.is_empty() {
tokbuilder.allow_list(&script_lang_map);

View File

@@ -2,7 +2,7 @@ use std::io::Cursor;
use big_s::S;
use heed::EnvOpenOptions;
use maplit::{btreemap, hashset};
use maplit::{hashmap, hashset};
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
@@ -33,7 +33,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
S("tag"),
S("asc_desc_rank"),
});
builder.set_synonyms(btreemap! {
builder.set_synonyms(hashmap! {
S("hello") => vec![S("good morning")],
S("world") => vec![S("earth")],
S("america") => vec![S("the united states")],

View File

@@ -15,7 +15,7 @@ they store fewer sprximities than the regular word sprximity DB.
*/
use std::collections::BTreeMap;
use std::collections::HashMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
@@ -336,7 +336,7 @@ fn test_proximity_split_word() {
index
.update_settings(|s| {
let mut syns = BTreeMap::new();
let mut syns = HashMap::new();
syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]);
s.set_synonyms(syns);
})

View File

@@ -18,7 +18,7 @@ if `words` doesn't exist before it.
14. Synonyms cost nothing according to the typo ranking rule
*/
use std::collections::BTreeMap;
use std::collections::HashMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
@@ -591,7 +591,7 @@ fn test_typo_synonyms() {
.update_settings(|s| {
s.set_criteria(vec![Criterion::Typo]);
let mut synonyms = BTreeMap::new();
let mut synonyms = HashMap::new();
synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
synonyms.insert("fast brownish".to_owned(), vec!["quick brown".to_owned()]);

View File

@@ -28,8 +28,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
indexer: GrenadParameters,
searchable_fields: &Option<HashSet<FieldId>>,
stop_words: Option<&fst::Set<&[u8]>>,
allowed_separators: Option<&Vec<&str>>,
dictionary: Option<&Vec<&str>>,
max_positions_per_attributes: Option<u32>,
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
puffin::profile_function!();
@@ -54,14 +52,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
if let Some(stop_words) = stop_words {
tokenizer_builder.stop_words(stop_words);
}
if let Some(dictionary) = dictionary {
// let dictionary: Vec<_> = dictionary.iter().map(String::as_str).collect();
tokenizer_builder.words_dict(dictionary.as_slice());
}
if let Some(separators) = allowed_separators {
// let separators: Vec<_> = separators.iter().map(String::as_str).collect();
tokenizer_builder.separators(separators.as_slice());
}
let tokenizer = tokenizer_builder.build();
let mut cursor = obkv_documents.into_cursor()?;

View File

@@ -49,8 +49,6 @@ pub(crate) fn data_from_obkv_documents(
geo_fields_ids: Option<(FieldId, FieldId)>,
vectors_field_id: Option<FieldId>,
stop_words: Option<fst::Set<&[u8]>>,
allowed_separators: Option<Vec<&str>>,
dictionary: Option<Vec<&str>>,
max_positions_per_attributes: Option<u32>,
exact_attributes: HashSet<FieldId>,
) -> Result<()> {
@@ -78,8 +76,6 @@ pub(crate) fn data_from_obkv_documents(
geo_fields_ids,
vectors_field_id,
&stop_words,
&allowed_separators,
&dictionary,
max_positions_per_attributes,
)
})
@@ -293,8 +289,6 @@ fn send_and_extract_flattened_documents_data(
geo_fields_ids: Option<(FieldId, FieldId)>,
vectors_field_id: Option<FieldId>,
stop_words: &Option<fst::Set<&[u8]>>,
allowed_separators: &Option<Vec<&str>>,
dictionary: &Option<Vec<&str>>,
max_positions_per_attributes: Option<u32>,
) -> Result<(
grenad::Reader<CursorClonableMmap>,
@@ -350,8 +344,6 @@ fn send_and_extract_flattened_documents_data(
indexer,
searchable_fields,
stop_words.as_ref(),
allowed_separators.as_ref(),
dictionary.as_ref(),
max_positions_per_attributes,
)?;

View File

@@ -316,12 +316,6 @@ where
let vectors_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vectors");
let stop_words = self.index.stop_words(self.wtxn)?;
let separators = self.index.allowed_separators(self.wtxn)?;
let separators: Option<Vec<_>> =
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
let dictionary = self.index.dictionary(self.wtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
let pool_params = GrenadParameters {
@@ -359,8 +353,6 @@ where
geo_fields_ids,
vectors_field_id,
stop_words,
separators,
dictionary,
max_positions_per_attributes,
exact_attributes,
)

View File

@@ -1,3 +1,5 @@
use std::sync::Arc;
use grenad::CompressionType;
use rayon::ThreadPool;
@@ -9,7 +11,7 @@ pub struct IndexerConfig {
pub max_memory: Option<usize>,
pub chunk_compression_type: CompressionType,
pub chunk_compression_level: Option<u32>,
pub thread_pool: Option<ThreadPool>,
pub thread_pool: Option<Arc<ThreadPool>>,
pub max_positions_per_attributes: Option<u32>,
pub skip_index_budget: bool,
}

View File

@@ -1,4 +1,4 @@
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::collections::{BTreeSet, HashMap, HashSet};
use std::result::Result as StdResult;
use charabia::{Normalize, Tokenizer, TokenizerBuilder};
@@ -112,11 +112,8 @@ pub struct Settings<'a, 't, 'u, 'i> {
sortable_fields: Setting<HashSet<String>>,
criteria: Setting<Vec<Criterion>>,
stop_words: Setting<BTreeSet<String>>,
non_separator_tokens: Setting<BTreeSet<String>>,
separator_tokens: Setting<BTreeSet<String>>,
dictionary: Setting<BTreeSet<String>>,
distinct_field: Setting<String>,
synonyms: Setting<BTreeMap<String, Vec<String>>>,
synonyms: Setting<HashMap<String, Vec<String>>>,
primary_key: Setting<String>,
authorize_typos: Setting<bool>,
min_word_len_two_typos: Setting<u8>,
@@ -144,9 +141,6 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
sortable_fields: Setting::NotSet,
criteria: Setting::NotSet,
stop_words: Setting::NotSet,
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
dictionary: Setting::NotSet,
distinct_field: Setting::NotSet,
synonyms: Setting::NotSet,
primary_key: Setting::NotSet,
@@ -211,39 +205,6 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
if stop_words.is_empty() { Setting::Reset } else { Setting::Set(stop_words) }
}
pub fn reset_non_separator_tokens(&mut self) {
self.non_separator_tokens = Setting::Reset;
}
pub fn set_non_separator_tokens(&mut self, non_separator_tokens: BTreeSet<String>) {
self.non_separator_tokens = if non_separator_tokens.is_empty() {
Setting::Reset
} else {
Setting::Set(non_separator_tokens)
}
}
pub fn reset_separator_tokens(&mut self) {
self.separator_tokens = Setting::Reset;
}
pub fn set_separator_tokens(&mut self, separator_tokens: BTreeSet<String>) {
self.separator_tokens = if separator_tokens.is_empty() {
Setting::Reset
} else {
Setting::Set(separator_tokens)
}
}
pub fn reset_dictionary(&mut self) {
self.dictionary = Setting::Reset;
}
pub fn set_dictionary(&mut self, dictionary: BTreeSet<String>) {
self.dictionary =
if dictionary.is_empty() { Setting::Reset } else { Setting::Set(dictionary) }
}
pub fn reset_distinct_field(&mut self) {
self.distinct_field = Setting::Reset;
}
@@ -256,7 +217,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.synonyms = Setting::Reset;
}
pub fn set_synonyms(&mut self, synonyms: BTreeMap<String, Vec<String>>) {
pub fn set_synonyms(&mut self, synonyms: HashMap<String, Vec<String>>) {
self.synonyms = if synonyms.is_empty() { Setting::Reset } else { Setting::Set(synonyms) }
}
@@ -491,84 +452,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
}
}
fn update_non_separator_tokens(&mut self) -> Result<bool> {
let changes = match self.non_separator_tokens {
Setting::Set(ref non_separator_tokens) => {
let current = self.index.non_separator_tokens(self.wtxn)?;
// Does the new list differ from the previous one?
if current.map_or(true, |current| &current != non_separator_tokens) {
self.index.put_non_separator_tokens(self.wtxn, non_separator_tokens)?;
true
} else {
false
}
}
Setting::Reset => self.index.delete_non_separator_tokens(self.wtxn)?,
Setting::NotSet => false,
};
// the synonyms must be updated if non separator tokens have been updated.
if changes && self.synonyms == Setting::NotSet {
self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
}
Ok(changes)
}
fn update_separator_tokens(&mut self) -> Result<bool> {
let changes = match self.separator_tokens {
Setting::Set(ref separator_tokens) => {
let current = self.index.separator_tokens(self.wtxn)?;
// Does the new list differ from the previous one?
if current.map_or(true, |current| &current != separator_tokens) {
self.index.put_separator_tokens(self.wtxn, separator_tokens)?;
true
} else {
false
}
}
Setting::Reset => self.index.delete_separator_tokens(self.wtxn)?,
Setting::NotSet => false,
};
// the synonyms must be updated if separator tokens have been updated.
if changes && self.synonyms == Setting::NotSet {
self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
}
Ok(changes)
}
fn update_dictionary(&mut self) -> Result<bool> {
let changes = match self.dictionary {
Setting::Set(ref dictionary) => {
let current = self.index.dictionary(self.wtxn)?;
// Does the new list differ from the previous one?
if current.map_or(true, |current| &current != dictionary) {
self.index.put_dictionary(self.wtxn, dictionary)?;
true
} else {
false
}
}
Setting::Reset => self.index.delete_dictionary(self.wtxn)?,
Setting::NotSet => false,
};
// the synonyms must be updated if dictionary has been updated.
if changes && self.synonyms == Setting::NotSet {
self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
}
Ok(changes)
}
fn update_synonyms(&mut self) -> Result<bool> {
match self.synonyms {
Setting::Set(ref user_synonyms) => {
Setting::Set(ref synonyms) => {
fn normalize(tokenizer: &Tokenizer, text: &str) -> Vec<String> {
tokenizer
.tokenize(text)
@@ -587,25 +473,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
if let Some(ref stop_words) = stop_words {
builder.stop_words(stop_words);
}
let separators = self.index.allowed_separators(self.wtxn)?;
let separators: Option<Vec<_>> =
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref separators) = separators {
builder.separators(separators);
}
let dictionary = self.index.dictionary(self.wtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
if let Some(ref dictionary) = dictionary {
builder.words_dict(dictionary);
}
let tokenizer = builder.build();
let mut new_synonyms = HashMap::new();
for (word, synonyms) in user_synonyms {
for (word, synonyms) in synonyms {
// Normalize both the word and associated synonyms.
let normalized_word = normalize(&tokenizer, word);
let normalized_synonyms =
@@ -626,7 +497,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
let old_synonyms = self.index.synonyms(self.wtxn)?;
if new_synonyms != old_synonyms {
self.index.put_synonyms(self.wtxn, &new_synonyms, user_synonyms)?;
self.index.put_synonyms(self.wtxn, &new_synonyms)?;
Ok(true)
} else {
Ok(false)
@@ -886,17 +757,11 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
let faceted_updated = old_faceted_fields != new_faceted_fields;
let stop_words_updated = self.update_stop_words()?;
let non_separator_tokens_updated = self.update_non_separator_tokens()?;
let separator_tokens_updated = self.update_separator_tokens()?;
let dictionary_updated = self.update_dictionary()?;
let synonyms_updated = self.update_synonyms()?;
let searchable_updated = self.update_searchable()?;
let exact_attributes_updated = self.update_exact_attributes()?;
if stop_words_updated
|| non_separator_tokens_updated
|| separator_tokens_updated
|| dictionary_updated
|| faceted_updated
|| synonyms_updated
|| searchable_updated
@@ -913,7 +778,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
mod tests {
use big_s::S;
use heed::types::ByteSlice;
use maplit::{btreemap, btreeset, hashset};
use maplit::{btreeset, hashmap, hashset};
use super::*;
use crate::error::Error;
@@ -1379,7 +1244,7 @@ mod tests {
// In the same transaction provide some synonyms
index
.update_settings_using_wtxn(&mut wtxn, |settings| {
settings.set_synonyms(btreemap! {
settings.set_synonyms(hashmap! {
"blini".to_string() => vec!["crepes".to_string()],
"super like".to_string() => vec!["love".to_string()],
"puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()]
@@ -1675,9 +1540,6 @@ mod tests {
sortable_fields,
criteria,
stop_words,
non_separator_tokens,
separator_tokens,
dictionary,
distinct_field,
synonyms,
primary_key,
@@ -1696,9 +1558,6 @@ mod tests {
assert!(matches!(sortable_fields, Setting::NotSet));
assert!(matches!(criteria, Setting::NotSet));
assert!(matches!(stop_words, Setting::NotSet));
assert!(matches!(non_separator_tokens, Setting::NotSet));
assert!(matches!(separator_tokens, Setting::NotSet));
assert!(matches!(dictionary, Setting::NotSet));
assert!(matches!(distinct_field, Setting::NotSet));
assert!(matches!(synonyms, Setting::NotSet));
assert!(matches!(primary_key, Setting::NotSet));

View File

@@ -5,7 +5,7 @@ use std::io::Cursor;
use big_s::S;
use either::{Either, Left, Right};
use heed::EnvOpenOptions;
use maplit::{btreemap, hashset};
use maplit::{hashmap, hashset};
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
@@ -51,7 +51,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
S("tag"),
S("asc_desc_rank"),
});
builder.set_synonyms(btreemap! {
builder.set_synonyms(hashmap! {
S("hello") => vec![S("good morning")],
S("world") => vec![S("earth")],
S("america") => vec![S("the united states")],