Compare commits

..

18 Commits

Author SHA1 Message Date
Kerollmops
83616bc03e Expose a new indexer parameter to enable the creation of a document dictionary 2025-01-21 14:44:07 +01:00
Kerollmops
bbbc4410ac Fix the dump creation process 2025-01-21 14:44:07 +01:00
Kerollmops
46dfa9f7c1 Remove unused code 2025-01-21 14:44:07 +01:00
Kerollmops
5dcd5d8797 Introduce a new experimental document compression parameter 2025-01-21 14:44:07 +01:00
Kerollmops
bc62cb0801 Remove span that is called too many times 2025-01-21 14:44:06 +01:00
Kerollmops
9109fbaeb0 Add more spans to debug compression 2025-01-21 14:44:06 +01:00
Kerollmops
78c9f67550 Generate the dictionary only when necessary 2025-01-21 14:44:06 +01:00
Kerollmops
523733db0a Clean up the tests 2025-01-21 14:44:06 +01:00
Clément Renault
6d7415a25f Remove last warning by storing rtxn and compressor on each thread 2025-01-21 14:44:05 +01:00
Clément Renault
3a32a58d6c Remove TODO and rely on the PR checklist 2025-01-21 14:44:05 +01:00
Clément Renault
ecc7741212 Fix some issues after a rebase 2025-01-21 14:44:05 +01:00
Clément Renault
d43ddd7205 Fetch the compression dictionary only once to decompress documents 2025-01-21 14:44:05 +01:00
Clément Renault
0dcbd2fe07 Fix the usage of compressed documents 2025-01-21 14:44:04 +01:00
Clément Renault
df80aaefc9 Compress and send compressed documents to the writer 2025-01-21 14:44:04 +01:00
Clément Renault
afec94d1f3 Compress the right documents when a new dictionary is computed 2025-01-21 14:44:04 +01:00
Clément Renault
e122970570 Move the compression extractor into a dedicated module 2025-01-21 14:44:03 +01:00
Kerollmops
19b0bf7121 Allocate the decompressed documents in the extractor allocator 2025-01-21 14:44:03 +01:00
Clément Renault
beef5b5f98 Squash in a single commit and rebase 2025-01-21 14:44:03 +01:00
208 changed files with 3103 additions and 15854 deletions

View File

@@ -52,7 +52,7 @@ jobs:
- name: Setup .NET Core
uses: actions/setup-dotnet@v4
with:
dotnet-version: "8.0.x"
dotnet-version: "6.0.x"
- name: Install dependencies
run: dotnet restore
- name: Build

436
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -22,7 +22,7 @@ members = [
]
[workspace.package]
version = "1.13.0"
version = "1.12.2"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -38,7 +38,7 @@ fn setup_index() -> Index {
let mut options = EnvOpenOptions::new();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100);
Index::new(options, path, true).unwrap()
Index::new(options, path).unwrap()
}
fn setup_settings<'t>(

View File

@@ -68,7 +68,7 @@ pub fn base_setup(conf: &Conf) -> Index {
let mut options = EnvOpenOptions::new();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100);
let index = Index::new(options, conf.database_name, true).unwrap();
let index = Index::new(options, conf.database_name).unwrap();
let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();

View File

@@ -10,10 +10,8 @@ dump
├── instance-uid.uuid
├── keys.jsonl
├── metadata.json
── tasks
├── update_files
│ └── [task_id].jsonl
│ └── queue.jsonl
└── batches
── tasks
├── update_files
│ └── [task_id].jsonl
└── queue.jsonl
```
```

View File

@@ -141,9 +141,6 @@ pub enum KindDump {
instance_uid: Option<InstanceUid>,
},
SnapshotCreation,
UpgradeDatabase {
from: (u32, u32, u32),
},
}
impl From<Task> for TaskDump {
@@ -213,9 +210,6 @@ impl From<KindWithContent> for KindDump {
KindDump::DumpCreation { keys, instance_uid }
}
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
KindWithContent::UpgradeDatabase { from: version } => {
KindDump::UpgradeDatabase { from: version }
}
}
}
}
@@ -228,16 +222,14 @@ pub(crate) mod test {
use big_s::S;
use maplit::{btreemap, btreeset};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{Details, Kind, Status};
use meilisearch_types::tasks::{Details, Status};
use serde_json::{json, Map, Value};
use time::macros::datetime;
use uuid::Uuid;
@@ -307,30 +299,6 @@ pub(crate) mod test {
settings.check()
}
pub fn create_test_batches() -> Vec<Batch> {
vec![Batch {
uid: 0,
details: DetailsView {
received_documents: Some(12),
indexed_documents: Some(Some(10)),
..DetailsView::default()
},
progress: None,
stats: BatchStats {
total_nb_tasks: 1,
status: maplit::btreemap! { Status::Succeeded => 1 },
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
},
enqueued_at: Some(BatchEnqueuedAt {
earliest: datetime!(2022-11-11 0:00 UTC),
oldest: datetime!(2022-11-11 0:00 UTC),
}),
started_at: datetime!(2022-11-20 0:00 UTC),
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
}]
}
pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
vec![
(
@@ -453,15 +421,6 @@ pub(crate) mod test {
index.flush().unwrap();
index.settings(&settings).unwrap();
// ========== pushing the batch queue
let batches = create_test_batches();
let mut batch_queue = dump.create_batches_queue().unwrap();
for batch in &batches {
batch_queue.push_batch(batch).unwrap();
}
batch_queue.flush().unwrap();
// ========== pushing the task queue
let tasks = create_test_tasks();
@@ -490,10 +449,6 @@ pub(crate) mod test {
dump.create_experimental_features(features).unwrap();
// ========== network
let network = create_test_network();
dump.create_network(network).unwrap();
// create the dump
let mut file = tempfile::tempfile().unwrap();
dump.persist_to(&mut file).unwrap();
@@ -506,13 +461,6 @@ pub(crate) mod test {
RuntimeTogglableFeatures::default()
}
fn create_test_network() -> Network {
Network {
local: Some("myself".to_string()),
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
}
}
#[test]
fn test_creating_and_read_dump() {
let mut file = create_test_dump();
@@ -561,9 +509,5 @@ pub(crate) mod test {
// ==== checking the features
let expected = create_test_features();
assert_eq!(dump.features().unwrap().unwrap(), expected);
// ==== checking the network
let expected = create_test_network();
assert_eq!(&expected, dump.network().unwrap().unwrap());
}
}

View File

@@ -196,10 +196,6 @@ impl CompatV5ToV6 {
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
Ok(None)
}
pub fn network(&self) -> Result<Option<&v6::Network>> {
Ok(None)
}
}
pub enum CompatIndexV5ToV6 {

View File

@@ -23,7 +23,6 @@ mod v6;
pub type Document = serde_json::Map<String, serde_json::Value>;
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
#[allow(clippy::large_enum_variant)]
pub enum DumpReader {
Current(V6Reader),
Compat(CompatV5ToV6),
@@ -102,13 +101,6 @@ impl DumpReader {
}
}
pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> {
match self {
DumpReader::Current(current) => Ok(current.batches()),
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
}
}
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
match self {
DumpReader::Current(current) => Ok(current.keys()),
@@ -122,13 +114,6 @@ impl DumpReader {
DumpReader::Compat(compat) => compat.features(),
}
}
pub fn network(&self) -> Result<Option<&v6::Network>> {
match self {
DumpReader::Current(current) => Ok(current.network()),
DumpReader::Compat(compat) => compat.network(),
}
}
}
impl From<V6Reader> for DumpReader {
@@ -234,10 +219,6 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -347,7 +328,6 @@ pub(crate) mod test {
}
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
assert_eq!(dump.network().unwrap(), None);
}
#[test]
@@ -359,10 +339,6 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -397,27 +373,6 @@ pub(crate) mod test {
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
}
#[test]
fn import_dump_v6_network() {
let dump = File::open("tests/assets/v6-with-network.dump").unwrap();
let dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// network
let network = dump.network().unwrap().unwrap();
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io");
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo");
}
#[test]
fn import_dump_v5() {
let dump = File::open("tests/assets/v5.dump").unwrap();
@@ -427,10 +382,6 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -511,10 +462,6 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -592,10 +539,6 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -689,10 +632,6 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -786,10 +725,6 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@@ -866,10 +801,6 @@ pub(crate) mod test {
assert_eq!(dump.date(), None);
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();

View File

@@ -18,10 +18,8 @@ pub type Checked = meilisearch_types::settings::Checked;
pub type Unchecked = meilisearch_types::settings::Unchecked;
pub type Task = crate::TaskDump;
pub type Batch = meilisearch_types::batches::Batch;
pub type Key = meilisearch_types::keys::Key;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::features::Network;
// ===== Other types to clarify the code of the compat module
// everything related to the tasks
@@ -50,10 +48,8 @@ pub struct V6Reader {
instance_uid: Option<Uuid>,
metadata: Metadata,
tasks: BufReader<File>,
batches: Option<BufReader<File>>,
keys: BufReader<File>,
features: Option<RuntimeTogglableFeatures>,
network: Option<Network>,
}
impl V6Reader {
@@ -81,38 +77,13 @@ impl V6Reader {
} else {
None
};
let batches = match File::open(dump.path().join("batches").join("queue.jsonl")) {
Ok(file) => Some(BufReader::new(file)),
// The batch file was only introduced during the v1.13, anything prior to that won't have batches
Err(err) if err.kind() == ErrorKind::NotFound => None,
Err(e) => return Err(e.into()),
};
let network_file = match fs::read(dump.path().join("network.json")) {
Ok(network_file) => Some(network_file),
Err(error) => match error.kind() {
// Allows the file to be missing, this will only result in all experimental features disabled.
ErrorKind::NotFound => {
debug!("`network.json` not found in dump");
None
}
_ => return Err(error.into()),
},
};
let network = if let Some(network_file) = network_file {
Some(serde_json::from_reader(&*network_file)?)
} else {
None
};
Ok(V6Reader {
metadata: serde_json::from_reader(&*meta_file)?,
instance_uid,
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
batches,
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
features,
network,
dump,
})
}
@@ -153,7 +124,7 @@ impl V6Reader {
&mut self,
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
let task: Task = serde_json::from_str(&line?)?;
let task: Task = serde_json::from_str(&line?).unwrap();
let update_file_path = self
.dump
@@ -165,7 +136,8 @@ impl V6Reader {
if update_file_path.exists() {
Ok((
task,
Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
Some(Box::new(UpdateFile::new(&update_file_path).unwrap())
as Box<super::UpdateFile>),
))
} else {
Ok((task, None))
@@ -173,16 +145,6 @@ impl V6Reader {
}))
}
pub fn batches(&mut self) -> Box<dyn Iterator<Item = Result<Batch>> + '_> {
match self.batches.as_mut() {
Some(batches) => Box::new((batches).lines().map(|line| -> Result<_> {
let batch = serde_json::from_str(&line?)?;
Ok(batch)
})),
None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = Result<Batch>> + '_>,
}
}
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
Box::new(
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
@@ -192,10 +154,6 @@ impl V6Reader {
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
self.features
}
pub fn network(&self) -> Option<&Network> {
self.network.as_ref()
}
}
pub struct UpdateFile {

View File

@@ -4,8 +4,7 @@ use std::path::PathBuf;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::keys::Key;
use meilisearch_types::settings::{Checked, Settings};
use serde_json::{Map, Value};
@@ -55,10 +54,6 @@ impl DumpWriter {
TaskWriter::new(self.dir.path().join("tasks"))
}
pub fn create_batches_queue(&self) -> Result<BatchWriter> {
BatchWriter::new(self.dir.path().join("batches"))
}
pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
Ok(std::fs::write(
self.dir.path().join("experimental-features.json"),
@@ -66,10 +61,6 @@ impl DumpWriter {
)?)
}
pub fn create_network(&self, network: Network) -> Result<()> {
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
}
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
let mut tar_encoder = tar::Builder::new(gz_encoder);
@@ -93,7 +84,7 @@ impl KeyWriter {
}
pub fn push_key(&mut self, key: &Key) -> Result<()> {
serde_json::to_writer(&mut self.keys, &key)?;
self.keys.write_all(&serde_json::to_vec(key)?)?;
self.keys.write_all(b"\n")?;
Ok(())
}
@@ -123,7 +114,7 @@ impl TaskWriter {
/// Pushes tasks in the dump.
/// If the tasks has an associated `update_file` it'll use the `task_id` as its name.
pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> {
serde_json::to_writer(&mut self.queue, &task)?;
self.queue.write_all(&serde_json::to_vec(task)?)?;
self.queue.write_all(b"\n")?;
Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid))))
@@ -135,30 +126,6 @@ impl TaskWriter {
}
}
pub struct BatchWriter {
queue: BufWriter<File>,
}
impl BatchWriter {
pub(crate) fn new(path: PathBuf) -> Result<Self> {
std::fs::create_dir(&path)?;
let queue = File::create(path.join("queue.jsonl"))?;
Ok(BatchWriter { queue: BufWriter::new(queue) })
}
/// Pushes batches in the dump.
pub fn push_batch(&mut self, batch: &Batch) -> Result<()> {
serde_json::to_writer(&mut self.queue, &batch)?;
self.queue.write_all(b"\n")?;
Ok(())
}
pub fn flush(mut self) -> Result<()> {
self.queue.flush()?;
Ok(())
}
}
pub struct UpdateFile {
path: PathBuf,
writer: Option<BufWriter<File>>,
@@ -170,8 +137,8 @@ impl UpdateFile {
}
pub fn push_document(&mut self, document: &Document) -> Result<()> {
if let Some(mut writer) = self.writer.as_mut() {
serde_json::to_writer(&mut writer, &document)?;
if let Some(writer) = self.writer.as_mut() {
writer.write_all(&serde_json::to_vec(document)?)?;
writer.write_all(b"\n")?;
} else {
let file = File::create(&self.path).unwrap();
@@ -238,8 +205,8 @@ pub(crate) mod test {
use super::*;
use crate::reader::Document;
use crate::test::{
create_test_api_keys, create_test_batches, create_test_documents, create_test_dump,
create_test_instance_uid, create_test_settings, create_test_tasks,
create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid,
create_test_settings, create_test_tasks,
};
fn create_directory_hierarchy(dir: &Path) -> String {
@@ -314,10 +281,8 @@ pub(crate) mod test {
let dump_path = dump.path();
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r"
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
.
├---- batches/
│ └---- queue.jsonl
├---- indexes/
│ └---- doggos/
│ │ ├---- documents.jsonl
@@ -330,9 +295,8 @@ pub(crate) mod test {
├---- experimental-features.json
├---- instance_uid.uuid
├---- keys.jsonl
---- metadata.json
└---- network.json
");
---- metadata.json
"###);
// ==== checking the top level infos
let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap();
@@ -385,16 +349,6 @@ pub(crate) mod test {
}
}
// ==== checking the batch queue
let batches_queue = fs::read_to_string(dump_path.join("batches/queue.jsonl")).unwrap();
for (batch, expected) in batches_queue.lines().zip(create_test_batches()) {
let mut batch = serde_json::from_str::<Batch>(batch).unwrap();
if batch.details.settings == Some(Box::new(Settings::<Unchecked>::default())) {
batch.details.settings = None;
}
assert_eq!(batch, expected, "{batch:#?}{expected:#?}");
}
// ==== checking the keys
let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap();
for (key, expected) in keys.lines().zip(create_test_api_keys()) {

View File

@@ -63,7 +63,7 @@ fn main() {
Some(path) => TempDir::new_in(path).unwrap(),
None => TempDir::new().unwrap(),
};
let index = Index::new(options, tempdir.path(), true).unwrap();
let index = Index::new(options, tempdir.path()).unwrap();
let indexer_config = IndexerConfig::default();
std::thread::scope(|s| {
@@ -150,7 +150,7 @@ fn main() {
// after executing a batch we check if the database is corrupted
let res = index.search(&wtxn).execute().unwrap();
index.documents(&wtxn, res.documents_ids).unwrap();
index.compressed_documents(&wtxn, res.documents_ids).unwrap();
progression.fetch_add(1, Ordering::Relaxed);
}
wtxn.abort();

View File

@@ -1,10 +1,8 @@
use std::collections::HashMap;
use std::io;
use dump::{KindDump, TaskDump, UpdateFile};
use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::RwTxn;
use meilisearch_types::milli;
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use uuid::Uuid;
@@ -15,15 +13,9 @@ pub struct Dump<'a> {
index_scheduler: &'a IndexScheduler,
wtxn: RwTxn<'a>,
batch_to_task_mapping: HashMap<BatchId, RoaringBitmap>,
indexes: HashMap<String, RoaringBitmap>,
statuses: HashMap<Status, RoaringBitmap>,
kinds: HashMap<Kind, RoaringBitmap>,
batch_indexes: HashMap<String, RoaringBitmap>,
batch_statuses: HashMap<Status, RoaringBitmap>,
batch_kinds: HashMap<Kind, RoaringBitmap>,
}
impl<'a> Dump<'a> {
@@ -34,72 +26,12 @@ impl<'a> Dump<'a> {
Ok(Dump {
index_scheduler,
wtxn,
batch_to_task_mapping: HashMap::new(),
indexes: HashMap::new(),
statuses: HashMap::new(),
kinds: HashMap::new(),
batch_indexes: HashMap::new(),
batch_statuses: HashMap::new(),
batch_kinds: HashMap::new(),
})
}
/// Register a new batch coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_batch(&mut self, batch: Batch) -> Result<()> {
self.index_scheduler.queue.batches.all_batches.put(&mut self.wtxn, &batch.uid, &batch)?;
if let Some(enqueued_at) = batch.enqueued_at {
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.enqueued_at,
enqueued_at.earliest,
batch.uid,
)?;
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.enqueued_at,
enqueued_at.oldest,
batch.uid,
)?;
}
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.started_at,
batch.started_at,
batch.uid,
)?;
if let Some(finished_at) = batch.finished_at {
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.finished_at,
finished_at,
batch.uid,
)?;
}
for index in batch.stats.index_uids.keys() {
match self.batch_indexes.get_mut(index) {
Some(bitmap) => {
bitmap.insert(batch.uid);
}
None => {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(batch.uid);
self.batch_indexes.insert(index.to_string(), bitmap);
}
};
}
for status in batch.stats.status.keys() {
self.batch_statuses.entry(*status).or_default().insert(batch.uid);
}
for kind in batch.stats.types.keys() {
self.batch_kinds.entry(*kind).or_default().insert(batch.uid);
}
Ok(())
}
/// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(
@@ -107,19 +39,14 @@ impl<'a> Dump<'a> {
task: TaskDump,
content_file: Option<Box<UpdateFile>>,
) -> Result<Task> {
let task_has_no_docs = matches!(task.kind, KindDump::DocumentImport { documents_count, .. } if documents_count == 0);
let content_uuid = match content_file {
Some(content_file) if task.status == Status::Enqueued => {
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
let mut writer = io::BufWriter::new(file);
let (uuid, mut file) = self.index_scheduler.queue.create_update_file(false)?;
let mut builder = DocumentsBatchBuilder::new(&mut file);
for doc in content_file {
let doc = doc?;
serde_json::to_writer(&mut writer, &doc).map_err(|e| {
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
})?;
builder.append_json_object(&doc?)?;
}
let file = writer.into_inner().map_err(|e| e.into_error())?;
builder.into_inner()?;
file.persist()?;
Some(uuid)
@@ -127,12 +54,6 @@ impl<'a> Dump<'a> {
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
// in case we try to open it later.
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
None if task.status == Status::Enqueued && task_has_no_docs => {
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
file.persist()?;
Some(uuid)
}
_ => None,
};
@@ -211,14 +132,10 @@ impl<'a> Dump<'a> {
KindWithContent::DumpCreation { keys, instance_uid }
}
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
},
};
self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
if let Some(batch_id) = task.batch_uid {
self.batch_to_task_mapping.entry(batch_id).or_default().insert(task.uid);
}
for index in task.indexes() {
match self.indexes.get_mut(index) {
@@ -268,14 +185,6 @@ impl<'a> Dump<'a> {
/// Commit all the changes and exit the importing dump state
pub fn finish(mut self) -> Result<()> {
for (batch_id, task_ids) in self.batch_to_task_mapping {
self.index_scheduler.queue.batch_to_tasks_mapping.put(
&mut self.wtxn,
&batch_id,
&task_ids,
)?;
}
for (index, bitmap) in self.indexes {
self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
}
@@ -286,16 +195,6 @@ impl<'a> Dump<'a> {
self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?;
}
for (index, bitmap) in self.batch_indexes {
self.index_scheduler.queue.batches.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
}
for (status, bitmap) in self.batch_statuses {
self.index_scheduler.queue.batches.put_status(&mut self.wtxn, status, &bitmap)?;
}
for (kind, bitmap) in self.batch_kinds {
self.index_scheduler.queue.batches.put_kind(&mut self.wtxn, kind, &bitmap)?;
}
self.wtxn.commit()?;
self.index_scheduler.scheduler.wake_up.signal();

View File

@@ -109,8 +109,6 @@ pub enum Error {
InvalidIndexUid { index_uid: String },
#[error("Task `{0}` not found.")]
TaskNotFound(TaskId),
#[error("Task `{0}` does not contain any documents. Only `documentAdditionOrUpdate` tasks with the statuses `enqueued` or `processing` contain documents")]
TaskFileNotFound(TaskId),
#[error("Batch `{0}` not found.")]
BatchNotFound(BatchId),
#[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
@@ -129,8 +127,8 @@ pub enum Error {
_ => format!("{error}")
})]
Milli { error: milli::Error, index_uid: Option<String> },
#[error("An unexpected crash occurred when processing the task: {0}")]
ProcessBatchPanicked(String),
#[error("An unexpected crash occurred when processing the task.")]
ProcessBatchPanicked,
#[error(transparent)]
FileStore(#[from] file_store::Error),
#[error(transparent)]
@@ -149,9 +147,7 @@ pub enum Error {
#[error("Corrupted task queue.")]
CorruptedTaskQueue,
#[error(transparent)]
DatabaseUpgrade(Box<Self>),
#[error(transparent)]
UnrecoverableError(Box<Self>),
TaskDatabaseUpdate(Box<Self>),
#[error(transparent)]
HeedTransaction(heed::Error),
@@ -191,7 +187,6 @@ impl Error {
| Error::InvalidTaskCanceledBy { .. }
| Error::InvalidIndexUid { .. }
| Error::TaskNotFound(_)
| Error::TaskFileNotFound(_)
| Error::BatchNotFound(_)
| Error::TaskDeletionWithEmptyQuery
| Error::TaskCancelationWithEmptyQuery
@@ -199,7 +194,7 @@ impl Error {
| Error::Dump(_)
| Error::Heed(_)
| Error::Milli { .. }
| Error::ProcessBatchPanicked(_)
| Error::ProcessBatchPanicked
| Error::FileStore(_)
| Error::IoError(_)
| Error::Persist(_)
@@ -207,8 +202,7 @@ impl Error {
| Error::Anyhow(_) => true,
Error::CreateBatch(_)
| Error::CorruptedTaskQueue
| Error::DatabaseUpgrade(_)
| Error::UnrecoverableError(_)
| Error::TaskDatabaseUpdate(_)
| Error::HeedTransaction(_) => false,
#[cfg(test)]
Error::PlannedFailure => false,
@@ -253,7 +247,6 @@ impl ErrorCode for Error {
Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy,
Error::InvalidIndexUid { .. } => Code::InvalidIndexUid,
Error::TaskNotFound(_) => Code::TaskNotFound,
Error::TaskFileNotFound(_) => Code::TaskFileNotFound,
Error::BatchNotFound(_) => Code::BatchNotFound,
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
@@ -261,7 +254,7 @@ impl ErrorCode for Error {
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::Dump(e) => e.error_code(),
Error::Milli { error, .. } => error.error_code(),
Error::ProcessBatchPanicked(_) => Code::Internal,
Error::ProcessBatchPanicked => Code::Internal,
Error::Heed(e) => e.error_code(),
Error::HeedTransaction(e) => e.error_code(),
Error::FileStore(e) => e.error_code(),
@@ -273,8 +266,7 @@ impl ErrorCode for Error {
Error::Anyhow(_) => Code::Internal,
Error::CorruptedTaskQueue => Code::Internal,
Error::CorruptedDump => Code::Internal,
Error::DatabaseUpgrade(_) => Code::Internal,
Error::UnrecoverableError(_) => Code::Internal,
Error::TaskDatabaseUpdate(_) => Code::Internal,
Error::CreateBatch(_) => Code::Internal,
// This one should never be seen by the end user

View File

@@ -1,29 +1,18 @@
use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn};
use crate::error::FeatureNotEnabledError;
use crate::Result;
/// The number of database used by features
const NUMBER_OF_DATABASES: u32 = 1;
/// Database const names for the `FeatureData`.
mod db_name {
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
}
mod db_keys {
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
pub const NETWORK: &str = "network";
}
const EXPERIMENTAL_FEATURES: &str = "experimental-features";
#[derive(Clone)]
pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
network: Arc<RwLock<Network>>,
}
#[derive(Debug, Clone, Copy)]
@@ -92,49 +81,17 @@ impl RoFeatures {
.into())
}
}
pub fn check_network(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.network {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "network",
issue_link: "https://github.com/orgs/meilisearch/discussions/805",
}
.into())
}
}
pub fn check_get_task_documents_route(&self) -> Result<()> {
if self.runtime.get_task_documents_route {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Getting the documents of an enqueued task",
feature: "get task documents route",
issue_link: "https://github.com/orgs/meilisearch/discussions/808",
}
.into())
}
}
}
impl FeatureData {
pub(crate) const fn nb_db() -> u32 {
NUMBER_OF_DATABASES
}
pub fn new(
env: &Env,
wtxn: &mut RwTxn,
instance_features: InstanceTogglableFeatures,
) -> Result<Self> {
let runtime_features_db =
env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
wtxn.commit()?;
let txn = env.read_txn()?;
let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: metrics || persisted_features.metrics,
@@ -143,14 +100,7 @@ impl FeatureData {
..persisted_features
}));
let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>();
let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();
Ok(Self {
persisted: runtime_features_db,
runtime,
network: Arc::new(RwLock::new(network)),
})
Ok(Self { persisted: runtime_features_db, runtime })
}
pub fn put_runtime_features(
@@ -158,7 +108,7 @@ impl FeatureData {
mut wtxn: RwTxn,
features: RuntimeTogglableFeatures,
) -> Result<()> {
self.persisted.put(&mut wtxn, db_keys::EXPERIMENTAL_FEATURES, &features)?;
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
wtxn.commit()?;
// safe to unwrap, the lock will only fail if:
@@ -179,21 +129,4 @@ impl FeatureData {
pub fn features(&self) -> RoFeatures {
RoFeatures::new(self)
}
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
&mut wtxn,
db_keys::NETWORK,
&new_network,
)?;
wtxn.commit()?;
let mut network = self.network.write().unwrap();
*network = new_network;
Ok(())
}
pub fn network(&self) -> Network {
Network::clone(&*self.network.read().unwrap())
}
}

View File

@@ -1,7 +1,5 @@
use std::collections::BTreeMap;
use std::env::VarError;
use std::path::Path;
use std::str::FromStr;
use std::time::Duration;
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
@@ -104,7 +102,7 @@ impl ReopenableIndex {
return Ok(());
}
map.unavailable.remove(&self.uuid);
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size, false)?;
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
}
Ok(())
}
@@ -173,12 +171,11 @@ impl IndexMap {
date: Option<(OffsetDateTime, OffsetDateTime)>,
enable_mdb_writemap: bool,
map_size: usize,
creation: bool,
) -> Result<Index> {
if !matches!(self.get_unavailable(uuid), Missing) {
panic!("Attempt to open an index that was unavailable");
}
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size, creation)?;
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
match self.available.insert(*uuid, index.clone()) {
InsertionOutcome::InsertedNew => (),
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
@@ -302,30 +299,18 @@ fn create_or_open_index(
date: Option<(OffsetDateTime, OffsetDateTime)>,
enable_mdb_writemap: bool,
map_size: usize,
creation: bool,
) -> Result<Index> {
let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(map_size));
// You can find more details about this experimental
// environment variable on the following GitHub discussion:
// <https://github.com/orgs/meilisearch/discussions/806>
let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") {
Ok(value) => u32::from_str(&value).unwrap(),
Err(VarError::NotPresent) => 1024,
Err(VarError::NotUnicode(value)) => panic!(
"Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}"
),
};
options.max_readers(max_readers);
options.max_readers(1024);
if enable_mdb_writemap {
unsafe { options.flags(EnvFlags::WRITE_MAP) };
}
if let Some((created, updated)) = date {
Ok(Index::new_with_creation_dates(options, path, created, updated, creation)?)
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
} else {
Ok(Index::new(options, path, creation)?)
Ok(Index::new(options, path)?)
}
}

View File

@@ -20,13 +20,8 @@ use crate::{Error, IndexBudget, IndexSchedulerOptions, Result};
mod index_map;
/// The number of database used by index mapper
const NUMBER_OF_DATABASES: u32 = 2;
/// Database const names for the `IndexMapper`.
mod db_name {
pub const INDEX_MAPPING: &str = "index-mapping";
pub const INDEX_STATS: &str = "index-stats";
}
const INDEX_MAPPING: &str = "index-mapping";
const INDEX_STATS: &str = "index-stats";
/// Structure managing meilisearch's indexes.
///
@@ -106,12 +101,6 @@ pub struct IndexStats {
/// are not returned to the disk after a deletion, this number is typically larger than
/// `used_database_size` that only includes the size of the used pages.
pub database_size: u64,
/// Number of embeddings in the index.
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
pub number_of_embeddings: Option<u64>,
/// Number of embedded documents in the index.
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
pub number_of_embedded_documents: Option<u64>,
/// Size taken by the used pages of the index' DB, in bytes.
///
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
@@ -136,11 +125,8 @@ impl IndexStats {
///
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
let arroy_stats = index.arroy_stats(rtxn)?;
Ok(IndexStats {
number_of_documents: index.number_of_documents(rtxn)?,
number_of_embeddings: Some(arroy_stats.number_of_embeddings),
number_of_embedded_documents: Some(arroy_stats.documents.len()),
database_size: index.on_disk_size()?,
used_database_size: index.used_size()?,
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
@@ -152,10 +138,6 @@ impl IndexStats {
}
impl IndexMapper {
pub(crate) const fn nb_db() -> u32 {
NUMBER_OF_DATABASES
}
pub fn new(
env: &Env,
wtxn: &mut RwTxn,
@@ -164,8 +146,8 @@ impl IndexMapper {
) -> Result<Self> {
Ok(Self {
index_map: Arc::new(RwLock::new(IndexMap::new(budget.index_count))),
index_mapping: env.create_database(wtxn, Some(db_name::INDEX_MAPPING))?,
index_stats: env.create_database(wtxn, Some(db_name::INDEX_STATS))?,
index_mapping: env.create_database(wtxn, Some(INDEX_MAPPING))?,
index_stats: env.create_database(wtxn, Some(INDEX_STATS))?,
base_path: options.indexes_path.clone(),
index_base_map_size: budget.map_size,
index_growth_amount: options.index_growth_amount,
@@ -207,7 +189,6 @@ impl IndexMapper {
date,
self.enable_mdb_writemap,
self.index_base_map_size,
true,
)
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
let index_rtxn = index.read_txn()?;
@@ -406,7 +387,6 @@ impl IndexMapper {
None,
self.enable_mdb_writemap,
self.index_base_map_size,
false,
)
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
}

View File

@@ -1,12 +1,11 @@
use std::collections::BTreeSet;
use std::fmt::Write;
use meilisearch_types::batches::{Batch, BatchEnqueuedAt};
use meilisearch_types::batches::Batch;
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Kind, Status, Task};
use meilisearch_types::versioning;
use roaring::RoaringBitmap;
use crate::index_mapper::IndexMapper;
@@ -22,7 +21,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
cleanup_enabled: _,
processing_tasks,
env,
version,
queue,
scheduler,
@@ -40,16 +38,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let mut snap = String::new();
let indx_sched_version = version.get_version(&rtxn).unwrap();
let latest_version = (
versioning::VERSION_MAJOR.parse().unwrap(),
versioning::VERSION_MINOR.parse().unwrap(),
versioning::VERSION_PATCH.parse().unwrap(),
);
if indx_sched_version != Some(latest_version) {
snap.push_str(&format!("index scheduler running on version {indx_sched_version:?}\n"));
}
let processing = processing_tasks.read().unwrap().clone();
snap.push_str(&format!("### Autobatching Enabled = {}\n", scheduler.autobatching_enabled));
snap.push_str(&format!(
@@ -291,9 +279,6 @@ fn snapshot_details(d: &Details) -> String {
Details::IndexSwap { swaps } => {
format!("{{ swaps: {swaps:?} }}")
}
Details::UpgradeDatabase { from, to } => {
format!("{{ from: {from:?}, to: {to:?} }}")
}
}
}
@@ -341,14 +326,10 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
pub fn snapshot_batch(batch: &Batch) -> String {
let mut snap = String::new();
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
if let Some(finished_at) = finished_at {
assert!(finished_at > started_at);
}
let BatchEnqueuedAt { earliest, oldest } = enqueued_at.unwrap();
assert!(*started_at > earliest);
assert!(earliest >= oldest);
snap.push('{');
snap.push_str(&format!("uid: {uid}, "));
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));

View File

@@ -30,10 +30,8 @@ mod queue;
mod scheduler;
#[cfg(test)]
mod test_utils;
pub mod upgrade;
mod utils;
pub mod uuid_codec;
pub mod versioning;
pub type Result<T, E = Error> = std::result::Result<T, E>;
pub type TaskId = u32;
@@ -51,7 +49,7 @@ pub use features::RoFeatures;
use flate2::bufread::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::I128;
use meilisearch_types::heed::{self, Env, RoTxn};
@@ -67,7 +65,6 @@ use queue::Queue;
use roaring::RoaringBitmap;
use scheduler::Scheduler;
use time::OffsetDateTime;
use versioning::Versioning;
use crate::index_mapper::IndexMapper;
use crate::utils::clamp_to_page_size;
@@ -123,8 +120,6 @@ pub struct IndexSchedulerOptions {
pub batched_tasks_size_limit: u64,
/// The experimental features enabled for this instance.
pub instance_features: InstanceTogglableFeatures,
/// The experimental features enabled for this instance.
pub auto_upgrade: bool,
}
/// Structure which holds meilisearch's indexes and schedules the tasks
@@ -136,18 +131,17 @@ pub struct IndexScheduler {
/// The list of tasks currently processing
pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>,
/// A database containing only the version of the index-scheduler
pub version: versioning::Versioning,
/// The queue containing both the tasks and the batches.
pub queue: queue::Queue,
pub scheduler: scheduler::Scheduler,
/// In charge of creating, opening, storing and returning indexes.
pub(crate) index_mapper: IndexMapper,
/// In charge of fetching and setting the status of experimental features.
features: features::FeatureData,
/// Everything related to the processing of the tasks
pub scheduler: scheduler::Scheduler,
/// Whether we should automatically cleanup the task queue or not.
pub(crate) cleanup_enabled: bool,
@@ -182,7 +176,6 @@ impl IndexScheduler {
IndexScheduler {
env: self.env.clone(),
processing_tasks: self.processing_tasks.clone(),
version: self.version.clone(),
queue: self.queue.private_clone(),
scheduler: self.scheduler.private_clone(),
@@ -201,15 +194,10 @@ impl IndexScheduler {
}
}
pub(crate) const fn nb_db() -> u32 {
Versioning::nb_db() + Queue::nb_db() + IndexMapper::nb_db() + features::FeatureData::nb_db()
}
/// Create an index scheduler and start its run loop.
#[allow(private_interfaces)] // because test_utils is private
pub fn new(
options: IndexSchedulerOptions,
from_db_version: (u32, u32, u32),
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
) -> Result<Self> {
@@ -241,16 +229,14 @@ impl IndexScheduler {
let env = unsafe {
heed::EnvOpenOptions::new()
.max_dbs(Self::nb_db())
.max_dbs(19)
.map_size(budget.task_db_size)
.open(&options.tasks_path)
}?;
// We **must** starts by upgrading the version because it'll also upgrade the required database before we can open them
let version = versioning::Versioning::new(&env, from_db_version)?;
let features = features::FeatureData::new(&env, options.instance_features)?;
let mut wtxn = env.write_txn()?;
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
let queue = Queue::new(&env, &mut wtxn, &options)?;
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
wtxn.commit()?;
@@ -258,7 +244,6 @@ impl IndexScheduler {
// allow unreachable_code to get rids of the warning in the case of a test build.
let this = Self {
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
version,
queue,
scheduler: Scheduler::new(&options),
@@ -381,7 +366,6 @@ impl IndexScheduler {
match ret {
Ok(Ok(TickOutcome::TickAgain(_))) => (),
Ok(Ok(TickOutcome::WaitForSignal)) => run.scheduler.wake_up.wait(),
Ok(Ok(TickOutcome::StopProcessingForever)) => break,
Ok(Err(e)) => {
tracing::error!("{e}");
// Wait one second when an irrecoverable error occurs.
@@ -770,16 +754,7 @@ impl IndexScheduler {
Ok(())
}
pub fn put_network(&self, network: Network) -> Result<()> {
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
self.features.put_network(wtxn, network)?;
Ok(())
}
pub fn network(&self) -> Network {
self.features.network()
}
// TODO: consider using a type alias or a struct embedder/template
pub fn embedders(
&self,
index_uid: String,
@@ -838,8 +813,6 @@ pub enum TickOutcome {
TickAgain(u64),
/// The scheduler should wait for an external signal before attempting another `tick`.
WaitForSignal,
/// The scheduler exits the run-loop and will never process tasks again
StopProcessingForever,
}
/// How many indexes we can afford to have open simultaneously.

View File

@@ -1,6 +1,8 @@
use std::borrow::Cow;
use std::sync::Arc;
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView};
use enum_iterator::Sequence;
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
use roaring::RoaringBitmap;
@@ -96,7 +98,6 @@ make_enum_progress! {
StartTheDumpCreation,
DumpTheApiKeys,
DumpTheTasks,
DumpTheBatches,
DumpTheIndexes,
DumpTheExperimentalFeatures,
CompressTheDump,
@@ -172,6 +173,32 @@ make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
pub struct VariableNameStep {
name: String,
current: u32,
total: u32,
}
impl VariableNameStep {
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
Self { name: name.into(), current, total }
}
}
impl Step for VariableNameStep {
fn name(&self) -> Cow<'static, str> {
self.name.clone().into()
}
fn current(&self) -> u32 {
self.current
}
fn total(&self) -> u32 {
self.total
}
}
#[cfg(test)]
mod test {
use std::sync::atomic::Ordering;

View File

@@ -1,4 +1,3 @@
use std::collections::HashSet;
use std::ops::{Bound, RangeBounds};
use meilisearch_types::batches::{Batch, BatchId};
@@ -11,14 +10,9 @@ use time::OffsetDateTime;
use super::{Query, Queue};
use crate::processing::ProcessingTasks;
use crate::utils::{
insert_task_datetime, keep_ids_within_datetimes, map_bound,
remove_n_tasks_datetime_earlier_than, remove_task_datetime, ProcessingBatch,
};
use crate::utils::{insert_task_datetime, keep_ids_within_datetimes, map_bound, ProcessingBatch};
use crate::{Error, Result, BEI128};
/// The number of database used by the batch queue
const NUMBER_OF_DATABASES: u32 = 7;
/// Database const names for the `IndexScheduler`.
mod db_name {
pub const ALL_BATCHES: &str = "all-batches";
@@ -62,10 +56,6 @@ impl BatchQueue {
}
}
pub(crate) const fn nb_db() -> u32 {
NUMBER_OF_DATABASES
}
pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self {
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
@@ -169,8 +159,6 @@ impl BatchQueue {
}
pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> {
let old_batch = self.all_batches.get(wtxn, &batch.uid)?;
self.all_batches.put(
wtxn,
&batch.uid,
@@ -181,86 +169,32 @@ impl BatchQueue {
stats: batch.stats,
started_at: batch.started_at,
finished_at: batch.finished_at,
enqueued_at: batch.enqueued_at,
},
)?;
// Update the statuses
if let Some(ref old_batch) = old_batch {
for status in old_batch.stats.status.keys() {
self.update_status(wtxn, *status, |bitmap| {
bitmap.remove(batch.uid);
})?;
}
}
for status in batch.statuses {
self.update_status(wtxn, status, |bitmap| {
bitmap.insert(batch.uid);
})?;
}
// Update the kinds / types
if let Some(ref old_batch) = old_batch {
let kinds: HashSet<_> = old_batch.stats.types.keys().cloned().collect();
for kind in kinds.difference(&batch.kinds) {
self.update_kind(wtxn, *kind, |bitmap| {
bitmap.remove(batch.uid);
})?;
}
}
for kind in batch.kinds {
self.update_kind(wtxn, kind, |bitmap| {
bitmap.insert(batch.uid);
})?;
}
// Update the indexes
if let Some(ref old_batch) = old_batch {
let indexes: HashSet<_> = old_batch.stats.index_uids.keys().cloned().collect();
for index in indexes.difference(&batch.indexes) {
self.update_index(wtxn, index, |bitmap| {
bitmap.remove(batch.uid);
})?;
}
}
for index in batch.indexes {
self.update_index(wtxn, &index, |bitmap| {
bitmap.insert(batch.uid);
})?;
}
// Update the enqueued_at: we cannot retrieve the previous enqueued at from the previous batch, and
// must instead go through the db looking for it. We cannot look at the task contained in this batch either
// because they may have been removed.
// What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written
// to the DB per batches.
if let Some(ref old_batch) = old_batch {
if let Some(enqueued_at) = old_batch.enqueued_at {
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, old_batch.uid)?;
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, old_batch.uid)?;
} else {
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
// and we still need to find the date by scrolling the database
remove_n_tasks_datetime_earlier_than(
wtxn,
self.enqueued_at,
old_batch.started_at,
old_batch.stats.total_nb_tasks.clamp(1, 2) as usize,
old_batch.uid,
)?;
}
if let Some(enqueued_at) = batch.oldest_enqueued_at {
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?;
}
// A finished batch MUST contains at least one task and have an enqueued_at
let enqueued_at = batch.enqueued_at.as_ref().unwrap();
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, batch.uid)?;
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, batch.uid)?;
// Update the started at and finished at
if let Some(ref old_batch) = old_batch {
remove_task_datetime(wtxn, self.started_at, old_batch.started_at, old_batch.uid)?;
if let Some(finished_at) = old_batch.finished_at {
remove_task_datetime(wtxn, self.finished_at, finished_at, old_batch.uid)?;
}
if let Some(enqueued_at) = batch.earliest_enqueued_at {
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?;
}
insert_task_datetime(wtxn, self.started_at, batch.started_at, batch.uid)?;
insert_task_datetime(wtxn, self.finished_at, batch.finished_at.unwrap(), batch.uid)?;

View File

@@ -102,33 +102,30 @@ fn query_batches_simple() {
.unwrap();
assert_eq!(batches.len(), 1);
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
assert!(batches[0].enqueued_at.is_some());
batches[0].enqueued_at = None;
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
snapshot!(batch, @r#"
{
"uid": 0,
"details": {
"primaryKey": "mouse"
},
"stats": {
"totalNbTasks": 1,
"status": {
"processing": 1
},
"types": {
"indexCreation": 1
},
"indexUids": {
"catto": 1
{
"uid": 0,
"details": {
"primaryKey": "mouse"
},
"stats": {
"totalNbTasks": 1,
"status": {
"processing": 1
},
"types": {
"indexCreation": 1
},
"indexUids": {
"catto": 1
}
},
"startedAt": "1970-01-01T00:00:00Z",
"finishedAt": null
}
},
"startedAt": "1970-01-01T00:00:00Z",
"finishedAt": null,
"enqueuedAt": null
}
"#);
"#);
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
let (batches, _) = index_scheduler

View File

@@ -8,7 +8,6 @@ mod tasks_test;
mod test;
use std::collections::BTreeMap;
use std::fs::File as StdFile;
use std::time::Duration;
use file_store::FileStore;
@@ -21,16 +20,14 @@ use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use uuid::Uuid;
pub(crate) use self::batches::BatchQueue;
pub(crate) use self::tasks::TaskQueue;
use self::batches::BatchQueue;
use self::tasks::TaskQueue;
use crate::processing::ProcessingTasks;
use crate::utils::{
check_index_swap_validity, filter_out_references_to_newer_tasks, ProcessingBatch,
};
use crate::{Error, IndexSchedulerOptions, Result, TaskId};
/// The number of database used by queue itself
const NUMBER_OF_DATABASES: u32 = 1;
/// Database const names for the `IndexScheduler`.
mod db_name {
pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping";
@@ -151,10 +148,6 @@ impl Queue {
}
}
pub(crate) const fn nb_db() -> u32 {
tasks::TaskQueue::nb_db() + batches::BatchQueue::nb_db() + NUMBER_OF_DATABASES
}
/// Create an index scheduler and start its run loop.
pub(crate) fn new(
env: &Env,
@@ -217,11 +210,6 @@ impl Queue {
}
}
/// Open and returns the task's content File.
pub fn update_file(&self, uuid: Uuid) -> file_store::Result<StdFile> {
self.file_store.get_update(uuid)
}
/// Delete a file from the index scheduler.
///
/// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method.

View File

@@ -9,17 +9,12 @@ use time::OffsetDateTime;
use super::{Query, Queue};
use crate::processing::ProcessingTasks;
use crate::utils::{
self, insert_task_datetime, keep_ids_within_datetimes, map_bound, remove_task_datetime,
};
use crate::utils::{self, insert_task_datetime, keep_ids_within_datetimes, map_bound};
use crate::{Error, Result, TaskId, BEI128};
/// The number of database used by the task queue
const NUMBER_OF_DATABASES: u32 = 8;
/// Database const names for the `IndexScheduler`.
mod db_name {
pub const ALL_TASKS: &str = "all-tasks";
pub const STATUS: &str = "status";
pub const KIND: &str = "kind";
pub const INDEX_TASKS: &str = "index-tasks";
@@ -64,11 +59,7 @@ impl TaskQueue {
}
}
pub(crate) const fn nb_db() -> u32 {
NUMBER_OF_DATABASES
}
pub(crate) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> {
pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self {
all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?,
status: env.create_database(wtxn, Some(db_name::STATUS))?,
@@ -99,14 +90,12 @@ impl TaskQueue {
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
let reprocessing = old_task.status != Status::Enqueued;
debug_assert!(old_task != *task);
debug_assert_eq!(old_task.uid, task.uid);
// If we're processing a task that failed it may already contains a batch_uid
debug_assert!(old_task.batch_uid.is_none() && task.batch_uid.is_some());
debug_assert!(
reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
old_task.batch_uid.is_none() && task.batch_uid.is_some(),
"\n==> old: {old_task:?}\n==> new: {task:?}"
);
@@ -133,25 +122,13 @@ impl TaskQueue {
"Cannot update a task's enqueued_at time"
);
if old_task.started_at != task.started_at {
assert!(
reprocessing || old_task.started_at.is_none(),
"Cannot update a task's started_at time"
);
if let Some(started_at) = old_task.started_at {
remove_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
}
assert!(old_task.started_at.is_none(), "Cannot update a task's started_at time");
if let Some(started_at) = task.started_at {
insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
}
}
if old_task.finished_at != task.finished_at {
assert!(
reprocessing || old_task.finished_at.is_none(),
"Cannot update a task's finished_at time"
);
if let Some(finished_at) = old_task.finished_at {
remove_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
}
assert!(old_task.finished_at.is_none(), "Cannot update a task's finished_at time");
if let Some(finished_at) = task.finished_at {
insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
}

View File

@@ -165,7 +165,6 @@ fn test_disable_auto_deletion_of_tasks() {
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
config.cleanup_enabled = false;
config.max_number_of_tasks = 2;
None
});
index_scheduler
@@ -229,7 +228,6 @@ fn test_disable_auto_deletion_of_tasks() {
fn test_auto_deletion_of_tasks() {
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
config.max_number_of_tasks = 2;
None
});
index_scheduler
@@ -326,8 +324,7 @@ fn test_auto_deletion_of_tasks() {
fn test_task_queue_is_full() {
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
// that's the minimum map size possible
config.task_db_size = 1048576 * 3;
None
config.task_db_size = 1048576;
});
index_scheduler

View File

@@ -85,7 +85,6 @@ impl From<KindWithContent> for AutobatchKind {
KindWithContent::TaskCancelation { .. }
| KindWithContent::TaskDeletion { .. }
| KindWithContent::DumpCreation { .. }
| KindWithContent::UpgradeDatabase { .. }
| KindWithContent::SnapshotCreation => {
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
}

View File

@@ -47,9 +47,6 @@ pub(crate) enum Batch {
IndexSwap {
task: Task,
},
UpgradeDatabase {
tasks: Vec<Task>,
},
}
#[derive(Debug)]
@@ -108,7 +105,6 @@ impl Batch {
}
Batch::SnapshotCreation(tasks)
| Batch::TaskDeletions(tasks)
| Batch::UpgradeDatabase { tasks }
| Batch::IndexDeletion { tasks, .. } => {
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
}
@@ -142,7 +138,6 @@ impl Batch {
| TaskDeletions(_)
| SnapshotCreation(_)
| Dump(_)
| UpgradeDatabase { .. }
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
@@ -167,7 +162,6 @@ impl fmt::Display for Batch {
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
};
match index_uid {
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
@@ -433,24 +427,9 @@ impl IndexScheduler {
let mut current_batch = ProcessingBatch::new(batch_id);
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?;
// 0. The priority over everything is to upgrade the instance
// There shouldn't be multiple upgrade tasks but just in case we're going to batch all of them at the same time
let upgrade = self.queue.tasks.get_kind(rtxn, Kind::UpgradeDatabase)? & (enqueued | failed);
if !upgrade.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, upgrade)?;
// In the case of an upgrade database batch, we want to find back the original batch that tried processing it
// and re-use its id
if let Some(batch_uid) = tasks.last().unwrap().batch_uid {
current_batch.uid = batch_uid;
}
current_batch.processing(&mut tasks);
return Ok(Some((Batch::UpgradeDatabase { tasks }, current_batch)));
}
let to_cancel = self.queue.tasks.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
// 1. we get the last task to cancel.
let to_cancel = self.queue.tasks.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
if let Some(task_id) = to_cancel.max() {
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;

View File

@@ -6,7 +6,6 @@ mod process_batch;
mod process_dump_creation;
mod process_index_operation;
mod process_snapshot_creation;
mod process_upgrade;
#[cfg(test)]
mod test;
#[cfg(test)]
@@ -166,41 +165,13 @@ impl IndexScheduler {
let processing_batch = &mut processing_batch;
let progress = progress.clone();
std::thread::scope(|s| {
let p = progress.clone();
let handle = std::thread::Builder::new()
.name(String::from("batch-operation"))
.spawn_scoped(s, move || {
cloned_index_scheduler.process_batch(batch, processing_batch, p)
cloned_index_scheduler.process_batch(batch, processing_batch, progress)
})
.unwrap();
match handle.join() {
Ok(ret) => {
if ret.is_err() {
if let Ok(progress_view) =
serde_json::to_string(&progress.as_progress_view())
{
tracing::warn!("Batch failed while doing: {progress_view}")
}
}
ret
}
Err(panic) => {
if let Ok(progress_view) =
serde_json::to_string(&progress.as_progress_view())
{
tracing::warn!("Batch failed while doing: {progress_view}")
}
let msg = match panic.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match panic.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
Err(Error::ProcessBatchPanicked(msg.to_string()))
}
}
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
})
};
@@ -212,7 +183,6 @@ impl IndexScheduler {
progress.update_progress(BatchProgress::WritingTasksToDisk);
processing_batch.finished();
let mut stop_scheduler_forever = false;
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new();
@@ -251,7 +221,7 @@ impl IndexScheduler {
self.queue
.tasks
.update_task(&mut wtxn, &task)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
}
if let Some(canceled_by) = canceled_by {
self.queue.tasks.canceled_by.put(&mut wtxn, &canceled_by, &canceled)?;
@@ -302,12 +272,6 @@ impl IndexScheduler {
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
progress.update_progress(task_progress_obj);
if matches!(err, Error::DatabaseUpgrade(_)) {
tracing::error!(
"Upgrade task failed, tasks won't be processed until the following issue is fixed: {err}"
);
stop_scheduler_forever = true;
}
let error: ResponseError = err.into();
for id in ids.iter() {
task_progress.fetch_add(1, Ordering::Relaxed);
@@ -315,7 +279,7 @@ impl IndexScheduler {
.queue
.tasks
.get_task(&wtxn, id)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?
.ok_or(Error::CorruptedTaskQueue)?;
task.status = Status::Failed;
task.error = Some(error.clone());
@@ -332,7 +296,7 @@ impl IndexScheduler {
self.queue
.tasks
.update_task(&mut wtxn, &task)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
}
}
}
@@ -362,7 +326,7 @@ impl IndexScheduler {
.queue
.tasks
.get_task(&rtxn, id)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?
.ok_or(Error::CorruptedTaskQueue)?;
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
tracing::error!(
@@ -380,10 +344,6 @@ impl IndexScheduler {
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);
if stop_scheduler_forever {
Ok(TickOutcome::StopProcessingForever)
} else {
Ok(TickOutcome::TickAgain(processed_tasks))
}
Ok(TickOutcome::TickAgain(processed_tasks))
}
}

View File

@@ -1,10 +1,9 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use std::panic::{catch_unwind, AssertUnwindSafe};
use std::sync::atomic::Ordering;
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{self};
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task};
use milli::update::Settings as MilliSettings;
@@ -14,12 +13,9 @@ use super::create_batch::Batch;
use crate::processing::{
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress,
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
UpdateIndexProgress,
};
use crate::utils::{
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
ProcessingBatch,
UpdateIndexProgress, VariableNameStep,
};
use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
use crate::{Error, IndexScheduler, Result, TaskId};
impl IndexScheduler {
@@ -301,7 +297,7 @@ impl IndexScheduler {
}
progress.update_progress(SwappingTheIndexes::SwappingTheIndexes);
for (step, swap) in swaps.iter().enumerate() {
progress.update_progress(VariableNameStep::<SwappingTheIndexes>::new(
progress.update_progress(VariableNameStep::new(
format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
step as u32,
swaps.len() as u32,
@@ -318,36 +314,6 @@ impl IndexScheduler {
task.status = Status::Succeeded;
Ok(vec![task])
}
Batch::UpgradeDatabase { mut tasks } => {
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
unreachable!();
};
let ret = catch_unwind(AssertUnwindSafe(|| self.process_upgrade(from, progress)));
match ret {
Ok(Ok(())) => (),
Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))),
Err(e) => {
let msg = match e.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match e.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked(
msg.to_string(),
))));
}
}
for task in tasks.iter_mut() {
task.status = Status::Succeeded;
// Since this task can be retried we must reset its error status
task.error = None;
}
Ok(tasks)
}
}
}
@@ -430,6 +396,7 @@ impl IndexScheduler {
to_delete_tasks -= &enqueued_tasks;
// 2. We now have a list of tasks to delete, delete them
let mut affected_indexes = HashSet::new();
let mut affected_statuses = HashSet::new();
let mut affected_kinds = HashSet::new();
@@ -526,51 +493,9 @@ impl IndexScheduler {
tasks -= &to_delete_tasks;
// We must remove the batch entirely
if tasks.is_empty() {
if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? {
if let Some(BatchEnqueuedAt { earliest, oldest }) = batch.enqueued_at {
remove_task_datetime(
wtxn,
self.queue.batches.enqueued_at,
earliest,
batch_id,
)?;
remove_task_datetime(
wtxn,
self.queue.batches.enqueued_at,
oldest,
batch_id,
)?;
} else {
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
// and we still need to find the date by scrolling the database
remove_n_tasks_datetime_earlier_than(
wtxn,
self.queue.batches.enqueued_at,
batch.started_at,
batch.stats.total_nb_tasks.clamp(1, 2) as usize,
batch_id,
)?;
}
remove_task_datetime(
wtxn,
self.queue.batches.started_at,
batch.started_at,
batch_id,
)?;
if let Some(finished_at) = batch.finished_at {
remove_task_datetime(
wtxn,
self.queue.batches.finished_at,
finished_at,
batch_id,
)?;
}
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
}
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
}
// Anyway, we must remove the batch from all its reverse indexes.
// The only way to do that is to check

View File

@@ -1,11 +1,11 @@
use std::collections::BTreeMap;
use std::fs::File;
use std::io::BufWriter;
use std::sync::atomic::Ordering;
use dump::IndexMetadata;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{self};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
@@ -13,7 +13,7 @@ use time::macros::format_description;
use time::OffsetDateTime;
use crate::processing::{
AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress,
AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress, VariableNameStep,
};
use crate::{Error, IndexScheduler, Result};
@@ -72,13 +72,6 @@ impl IndexScheduler {
t.started_at = Some(started_at);
t.finished_at = Some(finished_at);
}
// Patch the task to remove the batch uid, because as of v1.12.5 batches are not persisted.
// This prevent from referencing *future* batches not actually associated with the task.
//
// See <https://github.com/meilisearch/meilisearch/issues/5247> for details.
t.batch_uid = None;
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
@@ -89,15 +82,19 @@ impl IndexScheduler {
if status == Status::Enqueued {
let content_file = self.queue.file_store.get_update(content_file)?;
for document in
serde_json::de::Deserializer::from_reader(content_file).into_iter()
{
let document = document.map_err(|e| {
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
})?;
dump_content_file.push_document(&document)?;
}
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(|e| Error::from_milli(e.into(), None))?;
let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
while let Some(doc) =
cursor.next_document().map_err(|e| Error::from_milli(e.into(), None))?
{
dump_content_file.push_document(
&obkv_to_object(doc, &documents_batch_index)
.map_err(|e| Error::from_milli(e, None))?,
)?;
}
dump_content_file.flush()?;
}
}
@@ -105,49 +102,12 @@ impl IndexScheduler {
}
dump_tasks.flush()?;
// 3. dump the batches
progress.update_progress(DumpCreationProgress::DumpTheBatches);
let mut dump_batches = dump.create_batches_queue()?;
let (atomic_batch_progress, update_batch_progress) =
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32);
progress.update_progress(update_batch_progress);
for ret in self.queue.batches.all_batches.iter(&rtxn)? {
if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_, mut b) = ret?;
// In the case we're dumping ourselves we want to be marked as finished
// to not loop over ourselves indefinitely.
if b.uid == task.uid {
let finished_at = OffsetDateTime::now_utc();
// We're going to fake the date because we don't know if everything is going to go well.
// But we need to dump the task as finished and successful.
// If something fail everything will be set appropriately in the end.
let mut statuses = BTreeMap::new();
statuses.insert(Status::Succeeded, b.stats.total_nb_tasks);
b.stats.status = statuses;
b.finished_at = Some(finished_at);
}
dump_batches.push_batch(&b)?;
atomic_batch_progress.fetch_add(1, Ordering::Relaxed);
}
dump_batches.flush()?;
// 4. Dump the indexes
// 3. Dump the indexes
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
let mut count = 0;
let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
progress.update_progress(VariableNameStep::<DumpCreationProgress>::new(
uid.to_string(),
count,
nb_indexes,
));
progress.update_progress(VariableNameStep::new(uid.to_string(), count, nb_indexes));
count += 1;
let rtxn = index.read_txn()?;
@@ -168,6 +128,7 @@ impl IndexScheduler {
let embedding_configs = index
.embedding_configs(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let decompression_dictionary = index.document_decompression_dictionary(&rtxn)?;
let nb_documents = index
.number_of_documents(&rtxn)
@@ -175,16 +136,21 @@ impl IndexScheduler {
as u32;
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
progress.update_progress(update_document_progress);
let doc_alloc = bumpalo::Bump::new();
let documents = index
.all_documents(&rtxn)
.all_compressed_documents(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// 4.1. Dump the documents
// 3.1. Dump the documents
for ret in documents {
if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let doc = match decompression_dictionary.as_ref() {
Some(dict) => doc.decompress_into_bump(&doc_alloc, dict)?,
None => doc.as_non_compressed(),
};
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
@@ -240,7 +206,7 @@ impl IndexScheduler {
atomic.fetch_add(1, Ordering::Relaxed);
}
// 4.2. Dump the settings
// 3.2. Dump the settings
let settings = meilisearch_types::settings::settings(
index,
&rtxn,
@@ -251,12 +217,10 @@ impl IndexScheduler {
Ok(())
})?;
// 5. Dump experimental feature settings
// 4. Dump experimental feature settings
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
let features = self.features().runtime_features();
dump.create_experimental_features(features)?;
let network = self.network();
dump.create_network(network)?;
let dump_uid = started_at.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"

View File

@@ -3,12 +3,12 @@ use std::fs;
use std::sync::atomic::Ordering;
use meilisearch_types::heed::CompactionOption;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::{self};
use meilisearch_types::tasks::{Status, Task};
use meilisearch_types::{compression, VERSION_FILE_NAME};
use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress, VariableNameStep};
use crate::{Error, IndexScheduler, Result};
impl IndexScheduler {
@@ -74,9 +74,7 @@ impl IndexScheduler {
for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
let (name, uuid) = result?;
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
name, i as u32, nb_indexes,
));
progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes));
let index = self.index_mapper.index(&rtxn, name)?;
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?;

View File

@@ -1,49 +0,0 @@
use meilisearch_types::milli;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use crate::{Error, IndexScheduler, Result};
impl IndexScheduler {
pub(super) fn process_upgrade(
&self,
db_version: (u32, u32, u32),
progress: Progress,
) -> Result<()> {
#[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::ProcessUpgrade)?;
enum UpgradeIndex {}
let indexes = self.index_names()?;
for (i, uid) in indexes.iter().enumerate() {
progress.update_progress(VariableNameStep::<UpgradeIndex>::new(
format!("Upgrading index `{uid}`"),
i as u32,
indexes.len() as u32,
));
let index = self.index(uid)?;
let mut index_wtxn = index.write_txn()?;
let regen_stats = milli::update::upgrade::upgrade(
&mut index_wtxn,
&index,
db_version,
progress.clone(),
)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
if regen_stats {
let stats = crate::index_mapper::IndexStats::new(&index, &index_wtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
index_wtxn.commit()?;
// Release wtxn as soon as possible because it stops us from registering tasks
let mut index_schd_wtxn = self.env.write_txn()?;
self.index_mapper.store_stats_of(&mut index_schd_wtxn, uid, &stats)?;
index_schd_wtxn.commit()?;
} else {
index_wtxn.commit()?;
}
}
Ok(())
}
}

View File

@@ -56,13 +56,16 @@ succeeded [1,]
### Batches Index Tasks:
----------------------------------------------------------------------
### Batches Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [1,]
----------------------------------------------------------------------
### Batches Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Batches Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:

View File

@@ -54,12 +54,15 @@ succeeded [1,]
### Batches Index Tasks:
----------------------------------------------------------------------
### Batches Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Batches Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Batches Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:

View File

@@ -7,7 +7,7 @@ snapshot_kind: text
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task: simulated panic", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@@ -1,110 +0,0 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
4 {uid: 4, batch_uid: 4, status: succeeded, details: { primary_key: Some("leaves") }, kind: IndexCreation { index_uid: "girafo", primary_key: Some("leaves") }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,4,]
failed [3,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [1,2,3,4,]
"upgradeDatabase" [0,]
----------------------------------------------------------------------
### Index Tasks:
catto [1,]
doggo [2,3,]
girafo [4,]
----------------------------------------------------------------------
### Index Mapper:
catto: { number_of_documents: 0, field_distribution: {} }
doggo: { number_of_documents: 0, field_distribution: {} }
girafo: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
4 {uid: 4, details: {"primaryKey":"leaves"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"girafo":1}}, }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]
1 [1,]
2 [2,]
3 [3,]
4 [4,]
----------------------------------------------------------------------
### Batches Status:
succeeded [0,1,2,4,]
failed [3,]
----------------------------------------------------------------------
### Batches Kind:
"indexCreation" [1,2,3,4,]
"upgradeDatabase" [0,]
----------------------------------------------------------------------
### Batches Index Tasks:
catto [1,]
doggo [2,3,]
girafo [4,]
----------------------------------------------------------------------
### Batches Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### Batches Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### Batches Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,112 +0,0 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
4 {uid: 4, batch_uid: 4, status: succeeded, details: { primary_key: Some("leaves") }, kind: IndexCreation { index_uid: "girafo", primary_key: Some("leaves") }}
5 {uid: 5, batch_uid: 5, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "types=upgradeDatabase" }, kind: TaskDeletion { query: "types=upgradeDatabase", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [1,2,4,5,]
failed [3,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [1,2,3,4,]
"taskDeletion" [5,]
"upgradeDatabase" []
----------------------------------------------------------------------
### Index Tasks:
catto [1,]
doggo [2,3,]
girafo [4,]
----------------------------------------------------------------------
### Index Mapper:
catto: { number_of_documents: 0, field_distribution: {} }
doggo: { number_of_documents: 0, field_distribution: {} }
girafo: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### Started At:
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### All Batches:
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
4 {uid: 4, details: {"primaryKey":"leaves"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"girafo":1}}, }
5 {uid: 5, details: {"matchedTasks":1,"deletedTasks":1,"originalFilter":"types=upgradeDatabase"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"taskDeletion":1},"indexUids":{}}, }
----------------------------------------------------------------------
### Batch to tasks mapping:
1 [1,]
2 [2,]
3 [3,]
4 [4,]
5 [5,]
----------------------------------------------------------------------
### Batches Status:
succeeded [1,2,4,5,]
failed [3,]
----------------------------------------------------------------------
### Batches Kind:
"indexCreation" [1,2,3,4,]
"taskDeletion" [5,]
"upgradeDatabase" []
----------------------------------------------------------------------
### Batches Index Tasks:
catto [1,]
doggo [2,3,]
girafo [4,]
----------------------------------------------------------------------
### Batches Enqueued At:
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### Batches Started At:
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### Batches Finished At:
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,51 +0,0 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"upgradeDatabase" [0,]
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### All Batches:
----------------------------------------------------------------------
### Batch to tasks mapping:
----------------------------------------------------------------------
### Batches Status:
----------------------------------------------------------------------
### Batches Kind:
----------------------------------------------------------------------
### Batches Index Tasks:
----------------------------------------------------------------------
### Batches Enqueued At:
----------------------------------------------------------------------
### Batches Started At:
----------------------------------------------------------------------
### Batches Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,55 +0,0 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [1,]
"upgradeDatabase" [0,]
----------------------------------------------------------------------
### Index Tasks:
catto [1,]
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### All Batches:
----------------------------------------------------------------------
### Batch to tasks mapping:
----------------------------------------------------------------------
### Batches Status:
----------------------------------------------------------------------
### Batches Kind:
----------------------------------------------------------------------
### Batches Index Tasks:
----------------------------------------------------------------------
### Batches Enqueued At:
----------------------------------------------------------------------
### Batches Started At:
----------------------------------------------------------------------
### Batches Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,65 +0,0 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [1,]
"upgradeDatabase" [0,]
----------------------------------------------------------------------
### Index Tasks:
catto [1,]
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]
----------------------------------------------------------------------
### Batches Status:
failed [0,]
----------------------------------------------------------------------
### Batches Kind:
"upgradeDatabase" [0,]
----------------------------------------------------------------------
### Batches Index Tasks:
----------------------------------------------------------------------
### Batches Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Batches Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Batches Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,68 +0,0 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
### Status:
enqueued [1,2,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [1,2,]
"upgradeDatabase" [0,]
----------------------------------------------------------------------
### Index Tasks:
catto [1,]
doggo [2,]
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]
----------------------------------------------------------------------
### Batches Status:
failed [0,]
----------------------------------------------------------------------
### Batches Kind:
"upgradeDatabase" [0,]
----------------------------------------------------------------------
### Batches Index Tasks:
----------------------------------------------------------------------
### Batches Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Batches Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Batches Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,72 +0,0 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
### Status:
enqueued [1,2,3,]
succeeded [0,]
failed []
----------------------------------------------------------------------
### Kind:
"indexCreation" [1,2,3,]
"upgradeDatabase" [0,]
----------------------------------------------------------------------
### Index Tasks:
catto [1,]
doggo [2,3,]
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]
----------------------------------------------------------------------
### Batches Status:
succeeded [0,]
failed []
----------------------------------------------------------------------
### Batches Kind:
"upgradeDatabase" [0,]
----------------------------------------------------------------------
### Batches Index Tasks:
----------------------------------------------------------------------
### Batches Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Batches Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Batches Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -713,70 +713,68 @@ fn basic_get_stats() {
let kind = index_creation_task("whalo", "fish");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
{
"indexes": {
"catto": 1,
"doggo": 1,
"whalo": 1
},
"statuses": {
"canceled": 0,
"enqueued": 3,
"failed": 0,
"processing": 0,
"succeeded": 0
},
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
"taskDeletion": 0,
"upgradeDatabase": 0
}
}
"#);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
{
"indexes": {
"catto": 1,
"doggo": 1,
"whalo": 1
},
"statuses": {
"canceled": 0,
"enqueued": 3,
"failed": 0,
"processing": 0,
"succeeded": 0
},
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
"taskDeletion": 0
}
}
"###);
handle.advance_till([Start, BatchCreated]);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
{
"indexes": {
"catto": 1,
"doggo": 1,
"whalo": 1
},
"statuses": {
"canceled": 0,
"enqueued": 2,
"failed": 0,
"processing": 1,
"succeeded": 0
},
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
"taskDeletion": 0,
"upgradeDatabase": 0
}
}
"#);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
{
"indexes": {
"catto": 1,
"doggo": 1,
"whalo": 1
},
"statuses": {
"canceled": 0,
"enqueued": 2,
"failed": 0,
"processing": 1,
"succeeded": 0
},
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
"taskDeletion": 0
}
}
"###);
handle.advance_till([
InsideProcessBatch,
@@ -786,37 +784,36 @@ fn basic_get_stats() {
Start,
BatchCreated,
]);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
{
"indexes": {
"catto": 1,
"doggo": 1,
"whalo": 1
},
"statuses": {
"canceled": 0,
"enqueued": 1,
"failed": 0,
"processing": 1,
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
"taskDeletion": 0,
"upgradeDatabase": 0
}
}
"#);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
{
"indexes": {
"catto": 1,
"doggo": 1,
"whalo": 1
},
"statuses": {
"canceled": 0,
"enqueued": 1,
"failed": 0,
"processing": 1,
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
"taskDeletion": 0
}
}
"###);
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
handle.advance_till([
@@ -827,37 +824,36 @@ fn basic_get_stats() {
Start,
BatchCreated,
]);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
{
"indexes": {
"catto": 1,
"doggo": 1,
"whalo": 1
},
"statuses": {
"canceled": 0,
"enqueued": 0,
"failed": 0,
"processing": 1,
"succeeded": 2
},
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
"taskDeletion": 0,
"upgradeDatabase": 0
}
}
"#);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
{
"indexes": {
"catto": 1,
"doggo": 1,
"whalo": 1
},
"statuses": {
"canceled": 0,
"enqueued": 0,
"failed": 0,
"processing": 1,
"succeeded": 2
},
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
"taskDeletion": 0
}
}
"###);
}
#[test]
@@ -903,7 +899,7 @@ fn create_and_list_index() {
index_scheduler.index("kefir").unwrap();
let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###"
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]" }), @r#"
[
1,
[
@@ -911,10 +907,8 @@ fn create_and_list_index() {
"kefir",
{
"number_of_documents": 0,
"database_size": "[bytes]",
"number_of_embeddings": 0,
"number_of_embedded_documents": 0,
"used_database_size": "[bytes]",
"database_size": 24576,
"used_database_size": 8192,
"primary_key": null,
"field_distribution": {},
"created_at": "[date]",
@@ -923,5 +917,5 @@ fn create_and_list_index() {
]
]
]
"###);
"#);
}

View File

@@ -6,7 +6,7 @@ use meili_snap::snapshot;
use meilisearch_types::milli::obkv_to_json;
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::tasks::{Kind, KindWithContent};
use meilisearch_types::tasks::KindWithContent;
use crate::insta_snapshot::snapshot_index_scheduler;
use crate::test_utils::Breakpoint::*;
@@ -249,78 +249,3 @@ fn panic_in_process_batch_for_index_creation() {
// No matter what happens in process_batch, the index_scheduler should be internally consistent
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed");
}
#[test]
fn upgrade_failure() {
// By starting the index-scheduler at the v1.12.0 an upgrade task should be automatically enqueued
let (index_scheduler, mut handle) =
IndexScheduler::test_with_custom_config(vec![(1, FailureLocation::ProcessUpgrade)], |_| {
Some((1, 12, 0))
});
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "register_automatic_upgrade_task");
let kind = index_creation_task("catto", "mouse");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task_while_the_upgrade_task_is_enqueued");
handle.advance_one_failed_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "upgrade_task_failed");
// We can still register tasks
let kind = index_creation_task("doggo", "bone");
let _task = index_scheduler.register(kind, None, false).unwrap();
// But the scheduler is down and won't process anything ever again
handle.scheduler_is_down();
// =====> After a restart is it still working as expected?
let (index_scheduler, mut handle) =
handle.restart(index_scheduler, true, vec![(1, FailureLocation::ProcessUpgrade)], |_| {
Some((1, 12, 0)) // the upgrade task should be rerun automatically and nothing else should be enqueued
});
handle.advance_one_failed_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "upgrade_task_failed_again");
// We can still register tasks
let kind = index_creation_task("doggo", "bone");
let _task = index_scheduler.register(kind, None, false).unwrap();
// And the scheduler is still down and won't process anything ever again
handle.scheduler_is_down();
// =====> After a rerestart and without failure can we upgrade the indexes and process the tasks
let (index_scheduler, mut handle) =
handle.restart(index_scheduler, true, vec![], |_| Some((1, 12, 0)));
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "upgrade_task_succeeded");
// We can still register tasks
let kind = index_creation_task("girafo", "leaves");
let _task = index_scheduler.register(kind, None, false).unwrap();
// The scheduler is up and running
handle.advance_one_successful_batch();
handle.advance_one_successful_batch();
handle.advance_one_failed_batch(); // doggo already exists
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_everything");
let (upgrade_tasks_ids, _) = index_scheduler
.get_task_ids_from_authorized_indexes(
&crate::Query { types: Some(vec![Kind::UpgradeDatabase]), ..Default::default() },
&Default::default(),
)
.unwrap();
// When deleting the single upgrade task it should remove the associated batch
let _task = index_scheduler
.register(
KindWithContent::TaskDeletion {
query: String::from("types=upgradeDatabase"),
tasks: upgrade_tasks_ids,
},
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_upgrade_tasks");
}

View File

@@ -1,18 +1,10 @@
use std::io::{BufWriter, Write};
use std::sync::Arc;
use std::time::Duration;
use big_s::S;
use crossbeam_channel::RecvTimeoutError;
use file_store::File;
use meilisearch_types::document_formats::DocumentFormatError;
use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::{versioning, VERSION_FILE_NAME};
use tempfile::{NamedTempFile, TempDir};
use uuid::Uuid;
use Breakpoint::*;
use crate::insta_snapshot::snapshot_index_scheduler;
use crate::{Error, IndexScheduler, IndexSchedulerOptions};
@@ -36,13 +28,20 @@ pub(crate) enum FailureLocation {
InsideCreateBatch,
InsideProcessBatch,
PanicInsideProcessBatch,
ProcessUpgrade,
AcquiringWtxn,
UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 },
UpdatingTaskAfterProcessBatchFailure,
CommittingWtxn,
}
use big_s::S;
use crossbeam_channel::RecvTimeoutError;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::VERSION_FILE_NAME;
use tempfile::{NamedTempFile, TempDir};
use Breakpoint::*;
impl IndexScheduler {
/// Blocks the thread until the test handle asks to progress to/through this breakpoint.
///
@@ -56,6 +55,7 @@ impl IndexScheduler {
/// As soon as we find it, the index scheduler is unblocked but then wait again on the call to
/// `test_breakpoint_sdr.send(b, true)`. This message will only be able to send once the
/// test asks to progress to the next `(b2, false)`.
#[cfg(test)]
pub(crate) fn breakpoint(&self, b: Breakpoint) {
// We send two messages. The first one will sync with the call
// to `handle.wait_until(b)`. The second one will block until the
@@ -75,13 +75,12 @@ impl IndexScheduler {
) -> (Self, IndexSchedulerHandle) {
Self::test_with_custom_config(planned_failures, |config| {
config.autobatching_enabled = autobatching_enabled;
None
})
}
pub(crate) fn test_with_custom_config(
planned_failures: Vec<(usize, FailureLocation)>,
configuration: impl Fn(&mut IndexSchedulerOptions) -> Option<(u32, u32, u32)>,
configuration: impl Fn(&mut IndexSchedulerOptions),
) -> (Self, IndexSchedulerHandle) {
let tempdir = TempDir::new().unwrap();
let (sender, receiver) = crossbeam_channel::bounded(0);
@@ -110,17 +109,10 @@ impl IndexScheduler {
max_number_of_batched_tasks: usize::MAX,
batched_tasks_size_limit: u64::MAX,
instance_features: Default::default(),
auto_upgrade: true, // Don't cost much and will ensure the happy path works
};
let version = configuration(&mut options).unwrap_or_else(|| {
(
versioning::VERSION_MAJOR.parse().unwrap(),
versioning::VERSION_MINOR.parse().unwrap(),
versioning::VERSION_PATCH.parse().unwrap(),
)
});
configuration(&mut options);
let index_scheduler = Self::new(options, version, sender, planned_failures).unwrap();
let index_scheduler = Self::new(options, sender, planned_failures).unwrap();
// To be 100% consistent between all test we're going to start the scheduler right now
// and ensure it's in the expected starting state.
@@ -232,55 +224,6 @@ pub struct IndexSchedulerHandle {
}
impl IndexSchedulerHandle {
/// Restarts the index-scheduler on the same database.
/// To use this function you must give back the index-scheduler that was given to you when
/// creating the handle the first time.
/// If the index-scheduler has been cloned in the meantime you must drop all copy otherwise
/// the function will panic.
pub(crate) fn restart(
self,
index_scheduler: IndexScheduler,
autobatching_enabled: bool,
planned_failures: Vec<(usize, FailureLocation)>,
configuration: impl Fn(&mut IndexSchedulerOptions) -> Option<(u32, u32, u32)>,
) -> (IndexScheduler, Self) {
drop(index_scheduler);
let Self { _tempdir: tempdir, index_scheduler, test_breakpoint_rcv, last_breakpoint: _ } =
self;
let env = index_scheduler.env.clone();
drop(index_scheduler);
// We must ensure that the `run` function has stopped running before restarting the index scheduler
loop {
match test_breakpoint_rcv.recv_timeout(Duration::from_secs(5)) {
Ok((_, true)) => continue,
Ok((b, false)) => {
panic!("Scheduler is not stopped and passed {b:?}")
}
Err(RecvTimeoutError::Timeout) => panic!("The indexing loop is stuck somewhere"),
Err(RecvTimeoutError::Disconnected) => break,
}
}
let closed = env.prepare_for_closing().wait_timeout(Duration::from_secs(5));
assert!(closed, "The index scheduler couldn't close itself, it seems like someone else is holding the env somewhere");
let (scheduler, mut handle) =
IndexScheduler::test_with_custom_config(planned_failures, |config| {
let version = configuration(config);
config.autobatching_enabled = autobatching_enabled;
config.version_file_path = tempdir.path().join(VERSION_FILE_NAME);
config.auth_path = tempdir.path().join("auth");
config.tasks_path = tempdir.path().join("db_path");
config.update_file_path = tempdir.path().join("file_store");
config.indexes_path = tempdir.path().join("indexes");
config.snapshots_path = tempdir.path().join("snapshots");
config.dumps_path = tempdir.path().join("dumps");
version
});
handle._tempdir = tempdir;
(scheduler, handle)
}
/// Advance the scheduler to the next tick.
/// Panic
/// * If the scheduler is waiting for a task to be registered.
@@ -406,18 +349,4 @@ impl IndexSchedulerHandle {
}
self.advance_till([AfterProcessing]);
}
// Wait for one failed batch.
#[track_caller]
pub(crate) fn scheduler_is_down(&mut self) {
loop {
match self
.test_breakpoint_rcv
.recv_timeout(std::time::Duration::from_secs(1)) {
Ok((_, true)) => continue,
Ok((b, false)) => panic!("The scheduler was supposed to be down but successfully moved to the next breakpoint: {b:?}"),
Err(RecvTimeoutError::Timeout | RecvTimeoutError::Disconnected) => break,
}
}
}
}

View File

@@ -1,108 +0,0 @@
use anyhow::bail;
use meilisearch_types::heed::{Env, RwTxn};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use time::OffsetDateTime;
use tracing::info;
use crate::queue::TaskQueue;
use crate::versioning::Versioning;
trait UpgradeIndexScheduler {
fn upgrade(&self, env: &Env, wtxn: &mut RwTxn, original: (u32, u32, u32))
-> anyhow::Result<()>;
fn target_version(&self) -> (u32, u32, u32);
}
pub fn upgrade_index_scheduler(
env: &Env,
versioning: &Versioning,
from: (u32, u32, u32),
to: (u32, u32, u32),
) -> anyhow::Result<()> {
let current_major = to.0;
let current_minor = to.1;
let current_patch = to.2;
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[&V1_12_ToCurrent {}];
let start = match from {
(1, 12, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)
|| (major == current_major && minor == current_minor && patch > current_patch)
{
bail!(
"Database version {major}.{minor}.{patch} is higher than the Meilisearch version {current_major}.{current_minor}.{current_patch}. Downgrade is not supported",
);
} else if major < 1 || (major == current_major && minor < 12) {
bail!(
"Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{current_major}.{current_minor}.{current_patch}",
);
} else {
bail!("Unknown database version: v{major}.{minor}.{patch}");
}
}
};
let mut current_version = from;
info!("Upgrading the task queue");
for upgrade in upgrade_functions[start..].iter() {
let target = upgrade.target_version();
info!(
"Upgrading from v{}.{}.{} to v{}.{}.{}",
from.0, from.1, from.2, current_version.0, current_version.1, current_version.2
);
let mut wtxn = env.write_txn()?;
upgrade.upgrade(env, &mut wtxn, from)?;
versioning.set_version(&mut wtxn, target)?;
wtxn.commit()?;
current_version = target;
}
let mut wtxn = env.write_txn()?;
let queue = TaskQueue::new(env, &mut wtxn)?;
let uid = queue.next_task_id(&wtxn)?;
queue.register(
&mut wtxn,
&Task {
uid,
batch_uid: None,
enqueued_at: OffsetDateTime::now_utc(),
started_at: None,
finished_at: None,
error: None,
canceled_by: None,
details: Some(Details::UpgradeDatabase { from, to }),
status: Status::Enqueued,
kind: KindWithContent::UpgradeDatabase { from },
},
)?;
wtxn.commit()?;
Ok(())
}
#[allow(non_camel_case_types)]
struct V1_12_ToCurrent {}
impl UpgradeIndexScheduler for V1_12_ToCurrent {
fn upgrade(
&self,
_env: &Env,
_wtxn: &mut RwTxn,
_original: (u32, u32, u32),
) -> anyhow::Result<()> {
Ok(())
}
fn target_version(&self) -> (u32, u32, u32) {
(
VERSION_MAJOR.parse().unwrap(),
VERSION_MINOR.parse().unwrap(),
VERSION_PATCH.parse().unwrap(),
)
}
}

View File

@@ -3,7 +3,7 @@
use std::collections::{BTreeSet, HashSet};
use std::ops::Bound;
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
use meilisearch_types::batches::{Batch, BatchId, BatchStats};
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::CboRoaringBitmapCodec;
use meilisearch_types::task_view::DetailsView;
@@ -30,7 +30,8 @@ pub struct ProcessingBatch {
pub kinds: HashSet<Kind>,
pub indexes: HashSet<String>,
pub canceled_by: HashSet<TaskId>,
pub enqueued_at: Option<BatchEnqueuedAt>,
pub oldest_enqueued_at: Option<OffsetDateTime>,
pub earliest_enqueued_at: Option<OffsetDateTime>,
pub started_at: OffsetDateTime,
pub finished_at: Option<OffsetDateTime>,
}
@@ -50,7 +51,8 @@ impl ProcessingBatch {
kinds: HashSet::default(),
indexes: HashSet::default(),
canceled_by: HashSet::default(),
enqueued_at: None,
oldest_enqueued_at: None,
earliest_enqueued_at: None,
started_at: OffsetDateTime::now_utc(),
finished_at: None,
}
@@ -78,18 +80,14 @@ impl ProcessingBatch {
if let Some(canceled_by) = task.canceled_by {
self.canceled_by.insert(canceled_by);
}
match self.enqueued_at.as_mut() {
Some(BatchEnqueuedAt { earliest, oldest }) => {
*oldest = task.enqueued_at.min(*oldest);
*earliest = task.enqueued_at.max(*earliest);
}
None => {
self.enqueued_at = Some(BatchEnqueuedAt {
earliest: task.enqueued_at,
oldest: task.enqueued_at,
});
}
}
self.oldest_enqueued_at =
Some(self.oldest_enqueued_at.map_or(task.enqueued_at, |oldest_enqueued_at| {
task.enqueued_at.min(oldest_enqueued_at)
}));
self.earliest_enqueued_at =
Some(self.earliest_enqueued_at.map_or(task.enqueued_at, |earliest_enqueued_at| {
task.enqueued_at.max(earliest_enqueued_at)
}));
}
}
@@ -140,7 +138,6 @@ impl ProcessingBatch {
stats: self.stats.clone(),
started_at: self.started_at,
finished_at: self.finished_at,
enqueued_at: self.enqueued_at,
}
}
}
@@ -177,33 +174,6 @@ pub(crate) fn remove_task_datetime(
Ok(())
}
pub(crate) fn remove_n_tasks_datetime_earlier_than(
wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>,
earlier_than: OffsetDateTime,
mut count: usize,
task_id: TaskId,
) -> Result<()> {
let earlier_than = earlier_than.unix_timestamp_nanos();
let mut iter = database.rev_range_mut(wtxn, &(..earlier_than))?;
while let Some((current, mut existing)) = iter.next().transpose()? {
count -= existing.remove(task_id) as usize;
if existing.is_empty() {
// safety: We don't keep references to the database
unsafe { iter.del_current()? };
} else {
// safety: We don't keep references to the database
unsafe { iter.put_current(&current, &existing)? };
}
if count == 0 {
break;
}
}
Ok(())
}
pub(crate) fn keep_ids_within_datetimes(
rtxn: &RoTxn,
ids: &mut RoaringBitmap,
@@ -264,7 +234,6 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
K::TaskCancelation { .. }
| K::TaskDeletion { .. }
| K::DumpCreation { .. }
| K::UpgradeDatabase { .. }
| K::SnapshotCreation => (),
};
if let Some(Details::IndexSwap { swaps }) = &mut task.details {
@@ -359,27 +328,14 @@ impl crate::IndexScheduler {
kind,
} = task;
assert_eq!(uid, task.uid);
if task.status != Status::Enqueued {
let batch_uid = batch_uid.expect("All non enqueued tasks must be part of a batch");
if let Some(ref batch) = batch_uid {
assert!(self
.queue
.batch_to_tasks_mapping
.get(&rtxn, &batch_uid)
.get(&rtxn, batch)
.unwrap()
.unwrap()
.contains(uid));
let batch = self.queue.batches.get_batch(&rtxn, batch_uid).unwrap().unwrap();
assert_eq!(batch.uid, batch_uid);
if task.status == Status::Processing {
assert!(batch.progress.is_some());
} else {
assert!(batch.progress.is_none());
}
assert_eq!(batch.started_at, task.started_at.unwrap());
assert_eq!(batch.finished_at, task.finished_at);
let enqueued_at = batch.enqueued_at.unwrap();
assert!(task.enqueued_at >= enqueued_at.oldest);
assert!(task.enqueued_at <= enqueued_at.earliest);
}
if let Some(task_index_uid) = &task_index_uid {
assert!(self
@@ -591,9 +547,6 @@ impl crate::IndexScheduler {
Details::Dump { dump_uid: _ } => {
assert_eq!(kind.as_kind(), Kind::DumpCreation);
}
Details::UpgradeDatabase { from: _, to: _ } => {
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
}
}
}

View File

@@ -1,84 +0,0 @@
use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::heed_codec::version::VersionCodec;
use meilisearch_types::versioning;
use crate::upgrade::upgrade_index_scheduler;
use crate::Result;
/// The number of database used by queue itself
const NUMBER_OF_DATABASES: u32 = 1;
/// Database const names for the `IndexScheduler`.
mod db_name {
pub const VERSION: &str = "version";
}
mod entry_name {
pub const MAIN: &str = "main";
}
#[derive(Clone)]
pub struct Versioning {
pub version: Database<Str, VersionCodec>,
}
impl Versioning {
pub const fn nb_db() -> u32 {
NUMBER_OF_DATABASES
}
pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>, heed::Error> {
self.version.get(rtxn, entry_name::MAIN)
}
pub fn set_version(
&self,
wtxn: &mut RwTxn,
version: (u32, u32, u32),
) -> Result<(), heed::Error> {
self.version.put(wtxn, entry_name::MAIN, &version)
}
pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<(), heed::Error> {
let major = versioning::VERSION_MAJOR.parse().unwrap();
let minor = versioning::VERSION_MINOR.parse().unwrap();
let patch = versioning::VERSION_PATCH.parse().unwrap();
self.set_version(wtxn, (major, minor, patch))
}
/// Return `Self` without checking anything about the version
pub fn raw_new(env: &Env, wtxn: &mut RwTxn) -> Result<Self, heed::Error> {
let version = env.create_database(wtxn, Some(db_name::VERSION))?;
Ok(Self { version })
}
pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let this = Self::raw_new(env, &mut wtxn)?;
let from = match this.get_version(&wtxn)? {
Some(version) => version,
// fresh DB: use the db version
None => {
this.set_version(&mut wtxn, db_version)?;
db_version
}
};
wtxn.commit()?;
let bin_major: u32 = versioning::VERSION_MAJOR.parse().unwrap();
let bin_minor: u32 = versioning::VERSION_MINOR.parse().unwrap();
let bin_patch: u32 = versioning::VERSION_PATCH.parse().unwrap();
let to = (bin_major, bin_minor, bin_patch);
if from != to {
upgrade_index_scheduler(env, &this, from, to)?;
}
// Once we reach this point it means the upgrade process, if there was one is entirely finished
// we can safely say we reached the latest version of the index scheduler
let mut wtxn = env.write_txn()?;
this.set_current_version(&mut wtxn)?;
wtxn.commit()?;
Ok(this)
}
}

View File

@@ -24,35 +24,9 @@ pub struct Batch {
pub started_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option")]
pub finished_at: Option<OffsetDateTime>,
// Enqueued at is never displayed and is only required when removing a batch.
// It's always some except when upgrading from a database pre v1.12
pub enqueued_at: Option<BatchEnqueuedAt>,
}
impl PartialEq for Batch {
fn eq(&self, other: &Self) -> bool {
let Self { uid, progress, details, stats, started_at, finished_at, enqueued_at } = self;
*uid == other.uid
&& progress.is_none() == other.progress.is_none()
&& details == &other.details
&& stats == &other.stats
&& started_at == &other.started_at
&& finished_at == &other.finished_at
&& enqueued_at == &other.enqueued_at
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct BatchEnqueuedAt {
#[serde(with = "time::serde::rfc3339")]
pub earliest: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub oldest: OffsetDateTime,
}
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
#[derive(Default, Debug, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct BatchStats {

View File

@@ -193,8 +193,6 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
merge_with_error_impl_take_error_message!(IndexUidFormatError);
merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight);
merge_with_error_impl_take_error_message!(InvalidNetworkUrl);
merge_with_error_impl_take_error_message!(InvalidNetworkSearchApiKey);
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);

View File

@@ -260,13 +260,7 @@ InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
@@ -357,22 +351,14 @@ MissingDocumentId , InvalidRequest , BAD_REQUEST ;
MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
MissingIndexUid , InvalidRequest , BAD_REQUEST ;
MissingMasterKey , Auth , UNAUTHORIZED ;
MissingNetworkUrl , InvalidRequest , BAD_REQUEST ;
MissingPayload , InvalidRequest , BAD_REQUEST ;
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
RemoteBadResponse , System , BAD_GATEWAY ;
RemoteBadRequest , InvalidRequest , BAD_REQUEST ;
RemoteCouldNotSendRequest , System , BAD_GATEWAY ;
RemoteInvalidApiKey , Auth , FORBIDDEN ;
RemoteRemoteError , System , BAD_GATEWAY ;
RemoteTimeout , System , BAD_GATEWAY ;
TooManySearchRequests , System , SERVICE_UNAVAILABLE ;
TaskNotFound , InvalidRequest , NOT_FOUND ;
TaskFileNotFound , InvalidRequest , NOT_FOUND ;
BatchNotFound , InvalidRequest , NOT_FOUND ;
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
TooManyVectors , InvalidRequest , BAD_REQUEST ;
@@ -597,18 +583,6 @@ impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold {
}
}
impl fmt::Display for deserr_codes::InvalidNetworkUrl {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `url` is invalid, expected a string.")
}
}
impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `searchApiKey` is invalid, expected a string.")
}
}
#[macro_export]
macro_rules! internal_error {
($target:ty : $($other:path), *) => {

View File

@@ -1,5 +1,3 @@
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
@@ -9,8 +7,6 @@ pub struct RuntimeTogglableFeatures {
pub logs_route: bool,
pub edit_documents_by_function: bool,
pub contains_filter: bool,
pub network: bool,
pub get_task_documents_route: bool,
}
#[derive(Default, Debug, Clone, Copy)]
@@ -19,20 +15,3 @@ pub struct InstanceTogglableFeatures {
pub logs_route: bool,
pub contains_filter: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
}

View File

@@ -4,14 +4,13 @@ use std::fmt;
use std::str::FromStr;
use deserr::Deserr;
use serde::Serialize;
use utoipa::ToSchema;
use crate::error::{Code, ErrorCode};
/// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
/// bytes long
#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, Serialize, ToSchema)]
#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, ToSchema)]
#[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)]
#[schema(value_type = String, example = "movies")]
pub struct IndexUid(String);

View File

@@ -302,12 +302,6 @@ pub enum Action {
#[serde(rename = "experimental.update")]
#[deserr(rename = "experimental.update")]
ExperimentalFeaturesUpdate,
#[serde(rename = "network.get")]
#[deserr(rename = "network.get")]
NetworkGet,
#[serde(rename = "network.update")]
#[deserr(rename = "network.update")]
NetworkUpdate,
}
impl Action {
@@ -347,8 +341,6 @@ impl Action {
KEYS_DELETE => Some(Self::KeysDelete),
EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet),
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
NETWORK_GET => Some(Self::NetworkGet),
NETWORK_UPDATE => Some(Self::NetworkUpdate),
_otherwise => None,
}
}
@@ -394,7 +386,4 @@ pub mod actions {
pub const KEYS_DELETE: u8 = KeysDelete.repr();
pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr();
pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr();
pub const NETWORK_GET: u8 = NetworkGet.repr();
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();
}

View File

@@ -15,7 +15,7 @@ pub mod star_or;
pub mod task_view;
pub mod tasks;
pub mod versioning;
pub use milli::{heed, Index};
pub use milli::{heed, zstd, Index};
use uuid::Uuid;
pub use versioning::VERSION_FILE_NAME;
pub use {milli, serde_cs};

View File

@@ -114,10 +114,6 @@ pub struct DetailsView {
pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub swaps: Option<Vec<IndexSwap>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub upgrade_from: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub upgrade_to: Option<String>,
}
impl DetailsView {
@@ -238,18 +234,6 @@ impl DetailsView {
Some(left)
}
},
// We want the earliest version
upgrade_from: match (self.upgrade_from.clone(), other.upgrade_from.clone()) {
(None, None) => None,
(None, Some(from)) | (Some(from), None) => Some(from),
(Some(from), Some(_)) => Some(from),
},
// And the latest
upgrade_to: match (self.upgrade_to.clone(), other.upgrade_to.clone()) {
(None, None) => None,
(None, Some(to)) | (Some(to), None) => Some(to),
(Some(_), Some(to)) => Some(to),
},
}
}
}
@@ -327,11 +311,6 @@ impl From<Details> for DetailsView {
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
Details::UpgradeDatabase { from, to } => DetailsView {
upgrade_from: Some(format!("v{}.{}.{}", from.0, from.1, from.2)),
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
..Default::default()
},
}
}
}

View File

@@ -16,7 +16,7 @@ use crate::batches::BatchId;
use crate::error::ResponseError;
use crate::keys::Key;
use crate::settings::{Settings, Unchecked};
use crate::{versioning, InstanceUid};
use crate::InstanceUid;
pub type TaskId = u32;
@@ -50,7 +50,6 @@ impl Task {
| SnapshotCreation
| TaskCancelation { .. }
| TaskDeletion { .. }
| UpgradeDatabase { .. }
| IndexSwap { .. } => None,
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
@@ -85,8 +84,7 @@ impl Task {
| KindWithContent::TaskCancelation { .. }
| KindWithContent::TaskDeletion { .. }
| KindWithContent::DumpCreation { .. }
| KindWithContent::SnapshotCreation
| KindWithContent::UpgradeDatabase { .. } => None,
| KindWithContent::SnapshotCreation => None,
}
}
}
@@ -152,9 +150,6 @@ pub enum KindWithContent {
instance_uid: Option<InstanceUid>,
},
SnapshotCreation,
UpgradeDatabase {
from: (u32, u32, u32),
},
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
@@ -180,7 +175,6 @@ impl KindWithContent {
KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion,
KindWithContent::DumpCreation { .. } => Kind::DumpCreation,
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
}
}
@@ -191,8 +185,7 @@ impl KindWithContent {
DumpCreation { .. }
| SnapshotCreation
| TaskCancelation { .. }
| TaskDeletion { .. }
| UpgradeDatabase { .. } => vec![],
| TaskDeletion { .. } => vec![],
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
| DocumentDeletion { index_uid, .. }
@@ -269,14 +262,6 @@ impl KindWithContent {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: (from.0, from.1, from.2),
to: (
versioning::VERSION_MAJOR.parse().unwrap(),
versioning::VERSION_MINOR.parse().unwrap(),
versioning::VERSION_PATCH.parse().unwrap(),
),
}),
}
}
@@ -335,14 +320,6 @@ impl KindWithContent {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: *from,
to: (
versioning::VERSION_MAJOR.parse().unwrap(),
versioning::VERSION_MINOR.parse().unwrap(),
versioning::VERSION_PATCH.parse().unwrap(),
),
}),
}
}
}
@@ -383,14 +360,6 @@ impl From<&KindWithContent> for Option<Details> {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: *from,
to: (
versioning::VERSION_MAJOR.parse().unwrap(),
versioning::VERSION_MINOR.parse().unwrap(),
versioning::VERSION_PATCH.parse().unwrap(),
),
}),
}
}
}
@@ -499,7 +468,6 @@ pub enum Kind {
TaskDeletion,
DumpCreation,
SnapshotCreation,
UpgradeDatabase,
}
impl Kind {
@@ -516,7 +484,6 @@ impl Kind {
| Kind::TaskCancelation
| Kind::TaskDeletion
| Kind::DumpCreation
| Kind::UpgradeDatabase
| Kind::SnapshotCreation => false,
}
}
@@ -536,7 +503,6 @@ impl Display for Kind {
Kind::TaskDeletion => write!(f, "taskDeletion"),
Kind::DumpCreation => write!(f, "dumpCreation"),
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
}
}
}
@@ -568,8 +534,6 @@ impl FromStr for Kind {
Ok(Kind::DumpCreation)
} else if kind.eq_ignore_ascii_case("snapshotCreation") {
Ok(Kind::SnapshotCreation)
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
Ok(Kind::UpgradeDatabase)
} else {
Err(ParseTaskKindError(kind.to_owned()))
}
@@ -643,10 +607,6 @@ pub enum Details {
IndexSwap {
swaps: Vec<IndexSwap>,
},
UpgradeDatabase {
from: (u32, u32, u32),
to: (u32, u32, u32),
},
}
impl Details {
@@ -667,7 +627,6 @@ impl Details {
Self::SettingsUpdate { .. }
| Self::IndexInfo { .. }
| Self::Dump { .. }
| Self::UpgradeDatabase { .. }
| Self::IndexSwap { .. } => (),
}
@@ -728,9 +687,7 @@ pub fn serialize_duration<S: Serializer>(
#[cfg(test)]
mod tests {
use std::str::FromStr;
use super::{Details, Kind};
use super::Details;
use crate::heed::types::SerdeJson;
use crate::heed::{BytesDecode, BytesEncode};
@@ -746,13 +703,4 @@ mod tests {
meili_snap::snapshot!(format!("{:?}", details), @r###"TaskDeletion { matched_tasks: 1, deleted_tasks: None, original_filter: "hello" }"###);
meili_snap::snapshot!(format!("{:?}", deserialised), @r###"TaskDeletion { matched_tasks: 1, deleted_tasks: None, original_filter: "hello" }"###);
}
#[test]
fn all_kind_can_be_from_str() {
for kind in enum_iterator::all::<Kind>() {
let s = kind.to_string();
let k = Kind::from_str(&s).map_err(|e| format!("Could not from_str {s}: {e}")).unwrap();
assert_eq!(kind, k, "{kind}.to_string() returned {s} which was parsed as {k}");
}
}
}

View File

@@ -1,19 +1,16 @@
use std::fs;
use std::io::{ErrorKind, Write};
use std::io::{self, ErrorKind};
use std::path::Path;
use milli::heed;
use tempfile::NamedTempFile;
/// The name of the file that contains the version of the database.
pub const VERSION_FILE_NAME: &str = "VERSION";
pub static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR");
pub static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR");
static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
/// Persists the version of the current Meilisearch binary to a VERSION file
pub fn create_current_version_file(db_path: &Path) -> anyhow::Result<()> {
pub fn create_current_version_file(db_path: &Path) -> io::Result<()> {
create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
}
@@ -22,50 +19,39 @@ pub fn create_version_file(
major: &str,
minor: &str,
patch: &str,
) -> anyhow::Result<()> {
) -> io::Result<()> {
let version_path = db_path.join(VERSION_FILE_NAME);
// In order to persist the file later we must create it in the `data.ms` and not in `/tmp`
let mut file = NamedTempFile::new_in(db_path)?;
file.write_all(format!("{}.{}.{}", major, minor, patch).as_bytes())?;
file.flush()?;
file.persist(version_path)?;
fs::write(version_path, format!("{}.{}.{}", major, minor, patch))
}
/// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch.
pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
let (major, minor, patch) = get_version(db_path)?;
if major != VERSION_MAJOR || minor != VERSION_MINOR {
return Err(VersionFileError::VersionMismatch { major, minor, patch }.into());
}
Ok(())
}
pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError> {
pub fn get_version(db_path: &Path) -> Result<(String, String, String), VersionFileError> {
let version_path = db_path.join(VERSION_FILE_NAME);
match fs::read_to_string(version_path) {
Ok(version) => parse_version(&version),
Err(error) => match error.kind() {
ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile),
_ => Err(anyhow::Error::from(error).into()),
_ => Err(error.into()),
},
}
}
pub fn parse_version(version: &str) -> Result<(u32, u32, u32), VersionFileError> {
let version_components = version.trim().split('.').collect::<Vec<_>>();
pub fn parse_version(version: &str) -> Result<(String, String, String), VersionFileError> {
let version_components = version.split('.').collect::<Vec<_>>();
let (major, minor, patch) = match &version_components[..] {
[major, minor, patch] => (
major.parse().map_err(|e| VersionFileError::MalformedVersionFile {
context: format!("Could not parse the major: {e}"),
})?,
minor.parse().map_err(|e| VersionFileError::MalformedVersionFile {
context: format!("Could not parse the minor: {e}"),
})?,
patch.parse().map_err(|e| VersionFileError::MalformedVersionFile {
context: format!("Could not parse the patch: {e}"),
})?,
),
_ => {
return Err(VersionFileError::MalformedVersionFile {
context: format!(
"The version contains {} parts instead of 3 (major, minor and patch)",
version_components.len()
),
})
}
[major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()),
_ => return Err(VersionFileError::MalformedVersionFile),
};
Ok((major, minor, patch))
}
@@ -78,21 +64,15 @@ pub enum VersionFileError {
env!("CARGO_PKG_VERSION").to_string()
)]
MissingVersionFile,
#[error("Version file is corrupted and thus Meilisearch is unable to determine the version of the database. {context}")]
MalformedVersionFile { context: String },
#[error("Version file is corrupted and thus Meilisearch is unable to determine the version of the database.")]
MalformedVersionFile,
#[error(
"Your database version ({major}.{minor}.{patch}) is incompatible with your current engine version ({}).\n\
To migrate data between Meilisearch versions, please follow our guide on https://www.meilisearch.com/docs/learn/update_and_migration/updating.",
env!("CARGO_PKG_VERSION").to_string()
)]
VersionMismatch { major: u32, minor: u32, patch: u32 },
#[error("Database version {major}.{minor}.{patch} is higher than the Meilisearch version {VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}. Downgrade is not supported")]
DowngradeNotSupported { major: u32, minor: u32, patch: u32 },
#[error("Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}")]
TooOldForAutomaticUpgrade { major: u32, minor: u32, patch: u32 },
#[error("Error while modifying the database: {0}")]
ErrorWhileModifyingTheDatabase(#[from] heed::Error),
VersionMismatch { major: String, minor: String, patch: String },
#[error(transparent)]
AnyhowError(#[from] anyhow::Error),
IoError(#[from] std::io::Error),
}

View File

@@ -105,16 +105,8 @@ tracing-actix-web = "0.7.15"
build-info = { version = "1.7.0", path = "../build-info" }
roaring = "0.10.10"
mopa-maintained = "0.2.3"
utoipa = { version = "5.3.1", features = [
"actix_extras",
"macros",
"non_strict_integers",
"preserve_order",
"uuid",
"time",
"openapi_extensions",
] }
utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
utoipa = { version = "5.3.1", features = ["actix_extras", "macros", "non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
utoipa-scalar = { version = "0.2.1", optional = true, features = ["actix-web"] }
[dev-dependencies]
actix-rt = "2.10.0"
@@ -140,7 +132,7 @@ reqwest = { version = "0.12.12", features = [
sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.4", optional = true }
tempfile = { version = "3.15.0", optional = true }
zip = { version = "2.2.2", optional = true }
zip = { version = "2.2.2", default-features = false, features = ["deflate"], optional = true }
[features]
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]

View File

@@ -31,7 +31,6 @@ use crate::routes::{create_all_stats, Stats};
use crate::Opt;
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
const MEILI_SERVER_PROVIDER: &str = "MEILI_SERVER_PROVIDER";
/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors.
fn write_user_id(db_path: &Path, user_id: &InstanceUid) {
@@ -190,14 +189,12 @@ struct Infos {
experimental_drop_search_after: usize,
experimental_nb_searches_per_core: usize,
experimental_logs_mode: LogMode,
experimental_dumpless_upgrade: bool,
experimental_replication_parameters: bool,
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize,
experimental_limit_batched_tasks_total_size: u64,
experimental_network: bool,
experimental_get_task_documents_route: bool,
experimental_enable_document_compression: bool,
gpu_enabled: bool,
db_path: bool,
import_dump: bool,
@@ -239,12 +236,12 @@ impl Infos {
experimental_drop_search_after,
experimental_nb_searches_per_core,
experimental_logs_mode,
experimental_dumpless_upgrade,
experimental_replication_parameters,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size,
experimental_enable_document_compression,
http_addr,
master_key: _,
env,
@@ -288,8 +285,6 @@ impl Infos {
logs_route,
edit_documents_by_function,
contains_filter,
network,
get_task_documents_route,
} = features;
// We're going to override every sensible information.
@@ -303,12 +298,10 @@ impl Infos {
experimental_drop_search_after: experimental_drop_search_after.into(),
experimental_nb_searches_per_core: experimental_nb_searches_per_core.into(),
experimental_logs_mode,
experimental_dumpless_upgrade,
experimental_replication_parameters,
experimental_enable_logs_route: experimental_enable_logs_route | logs_route,
experimental_reduce_indexing_memory_usage,
experimental_network: network,
experimental_get_task_documents_route: get_task_documents_route,
experimental_enable_document_compression,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),
@@ -364,7 +357,7 @@ impl Segment {
"cores": sys.cpus().len(),
"ram_size": sys.total_memory(),
"disk_size": disks.iter().map(|disk| disk.total_space()).max(),
"server_provider": std::env::var(MEILI_SERVER_PROVIDER).ok(),
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
})
});
let number_of_documents =
@@ -387,18 +380,10 @@ impl Segment {
index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>,
) {
let interval: Duration = match std::env::var(MEILI_SERVER_PROVIDER) {
Ok(provider) if provider.starts_with("meili_cloud:") => {
Duration::from_secs(60 * 60) // one hour
}
_ => {
// We're an open source instance
Duration::from_secs(60 * 60 * 24) // one day
}
};
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
// The first batch must be sent after one hour.
let mut interval =
tokio::time::interval_at(tokio::time::Instant::now() + interval, interval);
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL);
loop {
select! {

View File

@@ -32,18 +32,14 @@ use analytics::Analytics;
use anyhow::bail;
use error::PayloadError;
use extractors::payload::PayloadConfig;
use index_scheduler::versioning::Versioning;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use meilisearch_auth::AuthController;
use meilisearch_types::milli::constants::VERSION_MAJOR;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::versioning::{
create_current_version_file, get_version, VersionFileError, VERSION_MINOR, VERSION_PATCH,
};
use meilisearch_types::{compression, heed, milli, VERSION_FILE_NAME};
use meilisearch_types::versioning::{check_version_file, create_current_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use option::ScheduleSnapshot;
use search_queue::SearchQueue;
@@ -210,47 +206,13 @@ enum OnFailure {
}
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
let index_scheduler_opt = IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
task_db_size: opt.max_task_db_size.as_u64() as usize,
index_base_map_size: opt.max_index_size.as_u64() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: Arc::new((&opt.indexer_options).try_into()?),
autobatching_enabled: true,
cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features: opt.to_instance_features(),
auto_upgrade: opt.experimental_dumpless_upgrade,
};
let bin_major: u32 = VERSION_MAJOR.parse().unwrap();
let bin_minor: u32 = VERSION_MINOR.parse().unwrap();
let bin_patch: u32 = VERSION_PATCH.parse().unwrap();
let binary_version = (bin_major, bin_minor, bin_patch);
let empty_db = is_empty_db(&opt.db_path);
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
let snapshot_path_exists = snapshot_path.exists();
// the db is empty and the snapshot exists, import it
if empty_db && snapshot_path_exists {
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
Ok(()) => open_or_create_database_unchecked(
opt,
index_scheduler_opt,
OnFailure::RemoveDb,
binary_version, // the db is empty
)?,
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
@@ -267,18 +229,14 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
bail!("snapshot doesn't exist at {}", snapshot_path.display())
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
} else {
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
open_or_create_database(opt, empty_db)?
}
} else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists();
// the db is empty and the dump exists, import it
if empty_db && src_path_exists {
let (mut index_scheduler, mut auth_controller) = open_or_create_database_unchecked(
opt,
index_scheduler_opt,
OnFailure::RemoveDb,
binary_version, // the db is empty
)?;
let (mut index_scheduler, mut auth_controller) =
open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
@@ -298,10 +256,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
} else {
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
open_or_create_database(opt, empty_db)?
}
} else {
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
open_or_create_database(opt, empty_db)?
};
// We create a loop in a thread that registers snapshotCreation tasks
@@ -329,15 +287,37 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
fn open_or_create_database_unchecked(
opt: &Opt,
index_scheduler_opt: IndexSchedulerOptions,
on_failure: OnFailure,
version: (u32, u32, u32),
) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
let index_scheduler_builder =
|| -> anyhow::Result<_> { Ok(IndexScheduler::new(index_scheduler_opt, version)?) };
let instance_features = opt.to_instance_features();
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
task_db_size: opt.max_task_db_size.as_u64() as usize,
index_base_map_size: opt.max_index_size.as_u64() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: Arc::new((&opt.indexer_options).try_into()?),
autobatching_enabled: true,
cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features,
})?)
};
match (
index_scheduler_builder(),
@@ -354,102 +334,16 @@ fn open_or_create_database_unchecked(
}
}
/// Ensures Meilisearch version is compatible with the database, returns an error in case of version mismatch.
/// Returns the version that was contained in the version file
fn check_version(
opt: &Opt,
index_scheduler_opt: &IndexSchedulerOptions,
binary_version: (u32, u32, u32),
) -> anyhow::Result<(u32, u32, u32)> {
let (bin_major, bin_minor, bin_patch) = binary_version;
let (db_major, db_minor, db_patch) = get_version(&opt.db_path)?;
if db_major != bin_major || db_minor != bin_minor || db_patch > bin_patch {
if opt.experimental_dumpless_upgrade {
update_version_file_for_dumpless_upgrade(
opt,
index_scheduler_opt,
(db_major, db_minor, db_patch),
(bin_major, bin_minor, bin_patch),
)?;
} else {
return Err(VersionFileError::VersionMismatch {
major: db_major,
minor: db_minor,
patch: db_patch,
}
.into());
}
}
Ok((db_major, db_minor, db_patch))
}
/// Persists the version of the current Meilisearch binary to a VERSION file
pub fn update_version_file_for_dumpless_upgrade(
opt: &Opt,
index_scheduler_opt: &IndexSchedulerOptions,
from: (u32, u32, u32),
to: (u32, u32, u32),
) -> Result<(), VersionFileError> {
let (from_major, from_minor, from_patch) = from;
let (to_major, to_minor, to_patch) = to;
// Early exit in case of error
if from_major > to_major
|| (from_major == to_major && from_minor > to_minor)
|| (from_major == to_major && from_minor == to_minor && from_patch > to_patch)
{
return Err(VersionFileError::DowngradeNotSupported {
major: from_major,
minor: from_minor,
patch: from_patch,
});
} else if from_major < 1 || (from_major == to_major && from_minor < 12) {
return Err(VersionFileError::TooOldForAutomaticUpgrade {
major: from_major,
minor: from_minor,
patch: from_patch,
});
}
// In the case of v1.12, the index-scheduler didn't store its internal version at the time.
// => We must write it immediately **in the index-scheduler** otherwise we'll update the version file
// there is a risk of DB corruption if a restart happens after writing the version file but before
// writing the version in the index-scheduler. See <https://github.com/meilisearch/meilisearch/issues/5280>
if from_major == 1 && from_minor == 12 {
let env = unsafe {
heed::EnvOpenOptions::new()
.max_dbs(Versioning::nb_db())
.map_size(index_scheduler_opt.task_db_size)
.open(&index_scheduler_opt.tasks_path)
}?;
let mut wtxn = env.write_txn()?;
let versioning = Versioning::raw_new(&env, &mut wtxn)?;
versioning.set_version(&mut wtxn, (from_major, from_minor, from_patch))?;
wtxn.commit()?;
// Should be instant since we're the only one using the env
env.prepare_for_closing().wait();
}
create_current_version_file(&opt.db_path)?;
Ok(())
}
/// Ensure you're in a valid state and open the IndexScheduler + AuthController for you.
fn open_or_create_database(
opt: &Opt,
index_scheduler_opt: IndexSchedulerOptions,
empty_db: bool,
binary_version: (u32, u32, u32),
) -> anyhow::Result<(IndexScheduler, AuthController)> {
let version = if !empty_db {
check_version(opt, &index_scheduler_opt, binary_version)?
} else {
binary_version
};
if !empty_db {
check_version_file(&opt.db_path)?;
}
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version)
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
}
fn import_dump(
@@ -491,13 +385,10 @@ fn import_dump(
keys.push(key);
}
// 3. Import the runtime features and network
// 3. Import the runtime features.
let features = dump_reader.features()?.unwrap_or_default();
index_scheduler.put_runtime_features(features)?;
let network = dump_reader.network()?.cloned().unwrap_or_default();
index_scheduler.put_network(network)?;
let indexer_config = index_scheduler.indexer_config();
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
@@ -571,15 +462,9 @@ fn import_dump(
index_scheduler.refresh_index_stats(&uid)?;
}
// 5. Import the queue
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
// 5.1. Import the batches
for ret in dump_reader.batches()? {
let batch = ret?;
index_scheduler_dump.register_dumped_batch(batch)?;
}
// 5.2. Import the tasks
// 5. Import the tasks.
for ret in dump_reader.tasks()? {
let (task, file) = ret?;
index_scheduler_dump.register_dumped_task(task, file)?;

View File

@@ -49,7 +49,6 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE";
const MEILI_EXPERIMENTAL_DUMPLESS_UPGRADE: &str = "MEILI_EXPERIMENTAL_DUMPLESS_UPGRADE";
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
@@ -63,6 +62,8 @@ const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE";
const MEILI_EXPERIMENTAL_ENABLE_DOCUMENT_COMPRESSION: &str =
"MEILI_EXPERIMENTAL_ENABLE_DOCUMENT_COMPRESSION";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
@@ -401,13 +402,6 @@ pub struct Opt {
#[serde(default)]
pub experimental_logs_mode: LogMode,
/// Experimental dumpless upgrade. For more information, see: <https://github.com/orgs/meilisearch/discussions/804>
///
/// When set, Meilisearch will auto-update its database without using a dump.
#[clap(long, env = MEILI_EXPERIMENTAL_DUMPLESS_UPGRADE, default_value_t)]
#[serde(default)]
pub experimental_dumpless_upgrade: bool,
/// Experimental logs route feature. For more information,
/// see: <https://github.com/orgs/meilisearch/discussions/721>
///
@@ -446,6 +440,11 @@ pub struct Opt {
#[serde(default = "default_limit_batched_tasks_total_size")]
pub experimental_limit_batched_tasks_total_size: u64,
/// Experimentally enable the document compression feature, see: <https://github.com/orgs/meilisearch/discussions/802>
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_DOCUMENT_COMPRESSION)]
#[serde(default)]
pub experimental_enable_document_compression: bool,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
@@ -543,12 +542,12 @@ impl Opt {
experimental_drop_search_after,
experimental_nb_searches_per_core,
experimental_logs_mode,
experimental_dumpless_upgrade,
experimental_enable_logs_route,
experimental_replication_parameters,
experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size,
experimental_enable_document_compression,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -617,10 +616,6 @@ impl Opt {
MEILI_EXPERIMENTAL_LOGS_MODE,
experimental_logs_mode.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_DUMPLESS_UPGRADE,
experimental_dumpless_upgrade.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS,
experimental_replication_parameters.to_string(),
@@ -641,6 +636,10 @@ impl Opt {
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
experimental_limit_batched_tasks_total_size.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_DOCUMENT_COMPRESSION,
experimental_enable_document_compression.to_string(),
);
indexer_options.export_to_env();
}

View File

@@ -50,8 +50,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
logs_route: Some(false),
edit_documents_by_function: Some(false),
contains_filter: Some(false),
network: Some(false),
get_task_documents_route: Some(false),
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
@@ -90,10 +88,6 @@ pub struct RuntimeTogglableFeatures {
pub edit_documents_by_function: Option<bool>,
#[deserr(default)]
pub contains_filter: Option<bool>,
#[deserr(default)]
pub network: Option<bool>,
#[deserr(default)]
pub get_task_documents_route: Option<bool>,
}
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
@@ -103,8 +97,6 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
logs_route,
edit_documents_by_function,
contains_filter,
network,
get_task_documents_route,
} = value;
Self {
@@ -112,8 +104,6 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
logs_route: Some(logs_route),
edit_documents_by_function: Some(edit_documents_by_function),
contains_filter: Some(contains_filter),
network: Some(network),
get_task_documents_route: Some(get_task_documents_route),
}
}
}
@@ -124,8 +114,6 @@ pub struct PatchExperimentalFeatureAnalytics {
logs_route: bool,
edit_documents_by_function: bool,
contains_filter: bool,
network: bool,
get_task_documents_route: bool,
}
impl Aggregate for PatchExperimentalFeatureAnalytics {
@@ -139,8 +127,6 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
logs_route: new.logs_route,
edit_documents_by_function: new.edit_documents_by_function,
contains_filter: new.contains_filter,
network: new.network,
get_task_documents_route: new.get_task_documents_route,
})
}
@@ -163,8 +149,6 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
logs_route: Some(false),
edit_documents_by_function: Some(false),
contains_filter: Some(false),
network: Some(false),
get_task_documents_route: Some(false),
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
@@ -197,23 +181,16 @@ async fn patch_features(
.edit_documents_by_function
.unwrap_or(old_features.edit_documents_by_function),
contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter),
network: new_features.0.network.unwrap_or(old_features.network),
get_task_documents_route: new_features
.0
.get_task_documents_route
.unwrap_or(old_features.get_task_documents_route),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
// it renames to camelCase, which we don't want for analytics.
// the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures {
metrics,
logs_route,
edit_documents_by_function,
contains_filter,
network,
get_task_documents_route,
} = new_features;
analytics.publish(
@@ -222,8 +199,6 @@ async fn patch_features(
logs_route,
edit_documents_by_function,
contains_filter,
network,
get_task_documents_route,
},
&req,
);

View File

@@ -1411,43 +1411,50 @@ fn some_documents<'a, 't: 'a>(
retrieve_vectors: RetrieveVectors,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let dictionary = index.document_decompression_dictionary(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index.embedding_configs(rtxn)?;
let mut buffer = Vec::new();
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
match retrieve_vectors {
RetrieveVectors::Hide => {
document.remove("_vectors");
}
RetrieveVectors::Retrieve => {
// Clippy is simply wrong
#[allow(clippy::manual_unwrap_or_default)]
let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, vector) in index.embeddings(rtxn, key)? {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_provided.contains(key));
let embeddings = ExplicitVectors {
embeddings: Some(vector.into()),
regenerate: !user_provided,
};
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
);
Ok(index.iter_compressed_documents(rtxn, doc_ids)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(
|(key, compressed_document)| -> Result<_, ResponseError> {
let document = compressed_document
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())?;
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
match retrieve_vectors {
RetrieveVectors::Hide => {
document.remove("_vectors");
}
RetrieveVectors::Retrieve => {
// Clippy is simply wrong
#[allow(clippy::manual_unwrap_or_default)]
let mut vectors = match document.remove("_vectors") {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, vector) in index.embeddings(rtxn, key)? {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_provided.contains(key));
let embeddings = ExplicitVectors {
embeddings: Some(vector.into()),
regenerate: !user_provided,
};
vectors.insert(
name,
serde_json::to_value(embeddings)
.map_err(MeilisearchHttpError::from)?,
);
}
document.insert("_vectors".into(), vectors.into());
}
document.insert("_vectors".into(), vectors.into());
}
}
Ok(document)
})
Ok(document)
},
)
}))
}

View File

@@ -496,12 +496,6 @@ pub struct IndexStats {
pub number_of_documents: u64,
/// Whether or not the index is currently ingesting document
pub is_indexing: bool,
/// Number of embeddings in the index
#[serde(skip_serializing_if = "Option::is_none")]
pub number_of_embeddings: Option<u64>,
/// Number of embedded documents in the index
#[serde(skip_serializing_if = "Option::is_none")]
pub number_of_embedded_documents: Option<u64>,
/// Association of every field name with the number of times it occurs in the documents.
#[schema(value_type = HashMap<String, u64>)]
pub field_distribution: FieldDistribution,
@@ -512,8 +506,6 @@ impl From<index_scheduler::IndexStats> for IndexStats {
IndexStats {
number_of_documents: stats.inner_stats.number_of_documents,
is_indexing: stats.is_indexing,
number_of_embeddings: stats.inner_stats.number_of_embeddings,
number_of_embedded_documents: stats.inner_stats.number_of_embedded_documents,
field_distribution: stats.inner_stats.field_distribution,
}
}
@@ -532,8 +524,6 @@ impl From<index_scheduler::IndexStats> for IndexStats {
(status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!(
{
"numberOfDocuments": 10,
"numberOfEmbeddings": 10,
"numberOfEmbeddedDocuments": 10,
"isIndexing": true,
"fieldDistribution": {
"genre": 10,

View File

@@ -34,7 +34,6 @@ use crate::routes::features::RuntimeTogglableFeatures;
use crate::routes::indexes::documents::{DocumentDeletionByFilter, DocumentEditionByFunction};
use crate::routes::indexes::IndexView;
use crate::routes::multi_search::SearchResults;
use crate::routes::network::{Network, Remote};
use crate::routes::swap_indexes::SwapIndexesPayload;
use crate::search::{
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
@@ -55,7 +54,6 @@ mod logs;
mod metrics;
mod multi_search;
mod multi_search_analytics;
pub mod network;
mod open_api_utils;
mod snapshot;
mod swap_indexes;
@@ -77,7 +75,6 @@ pub mod tasks;
(path = "/multi-search", api = multi_search::MultiSearchApi),
(path = "/swap-indexes", api = swap_indexes::SwapIndexesApi),
(path = "/experimental-features", api = features::ExperimentalFeaturesApi),
(path = "/network", api = network::NetworkApi),
),
paths(get_health, get_version, get_stats),
tags(
@@ -88,7 +85,7 @@ pub mod tasks;
url = "/",
description = "Local server",
)),
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote))
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind))
)]
pub struct MeilisearchApi;
@@ -106,8 +103,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/multi-search").configure(multi_search::configure))
.service(web::scope("/swap-indexes").configure(swap_indexes::configure))
.service(web::scope("/metrics").configure(metrics::configure))
.service(web::scope("/experimental-features").configure(features::configure))
.service(web::scope("/network").configure(network::configure));
.service(web::scope("/experimental-features").configure(features::configure));
#[cfg(feature = "swagger")]
{
@@ -363,9 +359,9 @@ pub async fn running() -> HttpResponse {
#[derive(Serialize, Debug, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct Stats {
/// The disk space used by the database, in bytes.
pub database_size: u64,
/// The size of the database, in bytes.
pub database_size: u64,
#[serde(skip)]
pub used_database_size: u64,
/// The date of the last update in the RFC 3339 formats. Can be `null` if no update has ever been processed.
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
@@ -387,7 +383,6 @@ pub struct Stats {
(status = 200, description = "The stats of the instance", body = Stats, content_type = "application/json", example = json!(
{
"databaseSize": 567,
"usedDatabaseSize": 456,
"lastUpdate": "2019-11-20T09:40:33.711324Z",
"indexes": {
"movies": {

View File

@@ -20,7 +20,6 @@ use crate::routes::indexes::search::search_kind;
use crate::search::{
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
};
use crate::search_queue::SearchQueue;
@@ -49,7 +48,6 @@ pub struct SearchResults {
/// Bundle multiple search queries in a single API request. Use this endpoint to search through multiple indexes at once.
#[utoipa::path(
post,
request_body = FederatedSearch,
path = "",
tag = "Multi-search",
security(("Bearer" = ["search", "*"])),
@@ -188,22 +186,18 @@ pub async fn multi_search_with_post(
let response = match federation {
Some(federation) => {
// check remote header
let is_proxy = req
.headers()
.get(PROXY_SEARCH_HEADER)
.is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes());
let search_result =
perform_federated_search(&index_scheduler, queries, federation, features, is_proxy)
.await;
let search_result = tokio::task::spawn_blocking(move || {
perform_federated_search(&index_scheduler, queries, federation, features)
})
.await;
permit.drop().await;
if search_result.is_ok() {
if let Ok(Ok(_)) = search_result {
multi_aggregate.succeed();
}
analytics.publish(multi_aggregate, &req);
HttpResponse::Ok().json(search_result?)
HttpResponse::Ok().json(search_result??)
}
None => {
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,

View File

@@ -13,8 +13,6 @@ pub struct MultiSearchAggregator {
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
total_distinct_index_count: usize,
// sum of the number of distinct remotes in each single request, use with total_received to compute an avg
total_distinct_remote_count: usize,
// number of queries with a single index, use with total_received to compute a proportion
total_single_index: usize,
@@ -33,49 +31,46 @@ impl MultiSearchAggregator {
pub fn from_federated_search(federated_search: &FederatedSearch) -> Self {
let use_federation = federated_search.federation.is_some();
let mut distinct_indexes = HashSet::with_capacity(federated_search.queries.len());
let mut distinct_remotes = HashSet::with_capacity(federated_search.queries.len());
let distinct_indexes: HashSet<_> = federated_search
.queries
.iter()
.map(|query| {
let query = &query;
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
let SearchQueryWithIndex {
index_uid,
federation_options: _,
q: _,
vector: _,
offset: _,
limit: _,
page: _,
hits_per_page: _,
attributes_to_retrieve: _,
retrieve_vectors: _,
attributes_to_crop: _,
crop_length: _,
attributes_to_highlight: _,
show_ranking_score: _,
show_ranking_score_details: _,
show_matches_position: _,
filter: _,
sort: _,
distinct: _,
facets: _,
highlight_pre_tag: _,
highlight_post_tag: _,
crop_marker: _,
matching_strategy: _,
attributes_to_search_on: _,
hybrid: _,
ranking_score_threshold: _,
locales: _,
} = query;
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
for SearchQueryWithIndex {
index_uid,
federation_options,
q: _,
vector: _,
offset: _,
limit: _,
page: _,
hits_per_page: _,
attributes_to_retrieve: _,
retrieve_vectors: _,
attributes_to_crop: _,
crop_length: _,
attributes_to_highlight: _,
show_ranking_score: _,
show_ranking_score_details: _,
show_matches_position: _,
filter: _,
sort: _,
distinct: _,
facets: _,
highlight_pre_tag: _,
highlight_post_tag: _,
crop_marker: _,
matching_strategy: _,
attributes_to_search_on: _,
hybrid: _,
ranking_score_threshold: _,
locales: _,
} in &federated_search.queries
{
if let Some(federation_options) = federation_options {
if let Some(remote) = &federation_options.remote {
distinct_remotes.insert(remote.as_str());
}
}
distinct_indexes.insert(index_uid.as_str());
}
index_uid.as_str()
})
.collect();
let show_ranking_score =
federated_search.queries.iter().any(|query| query.show_ranking_score);
@@ -86,7 +81,6 @@ impl MultiSearchAggregator {
total_received: 1,
total_succeeded: 0,
total_distinct_index_count: distinct_indexes.len(),
total_distinct_remote_count: distinct_remotes.len(),
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
total_search_count: federated_search.queries.len(),
show_ranking_score,
@@ -116,8 +110,6 @@ impl Aggregate for MultiSearchAggregator {
let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded);
let total_distinct_index_count =
this.total_distinct_index_count.saturating_add(new.total_distinct_index_count);
let total_distinct_remote_count =
this.total_distinct_remote_count.saturating_add(new.total_distinct_remote_count);
let total_single_index = this.total_single_index.saturating_add(new.total_single_index);
let total_search_count = this.total_search_count.saturating_add(new.total_search_count);
let show_ranking_score = this.show_ranking_score || new.show_ranking_score;
@@ -129,7 +121,6 @@ impl Aggregate for MultiSearchAggregator {
total_received,
total_succeeded,
total_distinct_index_count,
total_distinct_remote_count,
total_single_index,
total_search_count,
show_ranking_score,
@@ -143,7 +134,6 @@ impl Aggregate for MultiSearchAggregator {
total_received,
total_succeeded,
total_distinct_index_count,
total_distinct_remote_count,
total_single_index,
total_search_count,
show_ranking_score,
@@ -162,10 +152,6 @@ impl Aggregate for MultiSearchAggregator {
"total_distinct_index_count": total_distinct_index_count,
"avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
},
"remotes": {
"total_distinct_remote_count": total_distinct_remote_count,
"avg_distinct_remote_count": (total_distinct_remote_count as f64) / (total_received as f64), // not 0 else returned early
},
"searches": {
"total_search_count": total_search_count,
"avg_search_count": (total_search_count as f64) / (total_received as f64),

View File

@@ -1,261 +0,0 @@
use std::collections::BTreeMap;
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
use itertools::{EitherOrBoth, Itertools};
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::{
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkUrl,
};
use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{Network as DbNetwork, Remote as DbRemote};
use meilisearch_types::keys::actions;
use meilisearch_types::milli::update::Setting;
use serde::Serialize;
use tracing::debug;
use utoipa::{OpenApi, ToSchema};
use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
#[derive(OpenApi)]
#[openapi(
paths(get_network, patch_network),
tags((
name = "Network",
description = "The `/network` route allows you to describe the topology of a network of Meilisearch instances.
This route is **synchronous**. This means that no task object will be returned, and any change to the network will be made available immediately.",
external_docs(url = "https://www.meilisearch.com/docs/reference/api/network"),
)),
)]
pub struct NetworkApi;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(get_network))
.route(web::patch().to(SeqHandler(patch_network))),
);
}
/// Get network topology
///
/// Get a list of all Meilisearch instances currently known to this instance.
#[utoipa::path(
get,
path = "",
tag = "Network",
security(("Bearer" = ["network.get", "network.*", "*"])),
responses(
(status = OK, description = "Known nodes are returned", body = Network, content_type = "application/json", example = json!(
{
"self": "ms-0",
"remotes": {
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
}
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
)
)]
async fn get_network(
index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_GET }>, Data<IndexScheduler>>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_network("Using the /network route")?;
let network = index_scheduler.network();
debug!(returns = ?network, "Get network");
Ok(HttpResponse::Ok().json(network))
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError<InvalidNetworkRemotes>, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct Remote {
#[schema(value_type = Option<String>, example = json!({
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
}))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)]
#[serde(default)]
pub url: Setting<String>,
#[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkSearchApiKey>)]
#[serde(default)]
pub search_api_key: Setting<String>,
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct Network {
#[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!("http://localhost:7700"))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkRemotes>)]
#[serde(default)]
pub remotes: Setting<BTreeMap<String, Option<Remote>>>,
#[schema(value_type = Option<String>, example = json!("ms-00"), rename = "self")]
#[serde(default, rename = "self")]
#[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)]
pub local: Setting<String>,
}
impl Remote {
pub fn try_into_db_node(self, name: &str) -> Result<DbRemote, ResponseError> {
Ok(DbRemote {
url: self.url.set().ok_or(ResponseError::from_msg(
format!("Missing field `.remotes.{name}.url`"),
meilisearch_types::error::Code::MissingNetworkUrl,
))?,
search_api_key: self.search_api_key.set(),
})
}
}
#[derive(Serialize)]
pub struct PatchNetworkAnalytics {
network_size: usize,
network_has_self: bool,
}
impl Aggregate for PatchNetworkAnalytics {
fn event_name(&self) -> &'static str {
"Network Updated"
}
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
Box::new(Self { network_size: new.network_size, network_has_self: new.network_has_self })
}
fn into_event(self: Box<Self>) -> serde_json::Value {
serde_json::to_value(*self).unwrap_or_default()
}
}
/// Configure Network
///
/// Add or remove nodes from network.
#[utoipa::path(
patch,
path = "",
tag = "Network",
request_body = Network,
security(("Bearer" = ["network.update", "network.*", "*"])),
responses(
(status = OK, description = "New network state is returned", body = Network, content_type = "application/json", example = json!(
{
"self": "ms-0",
"remotes": {
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
}
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
)
)]
async fn patch_network(
index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_UPDATE }>, Data<IndexScheduler>>,
new_network: AwebJson<Network, DeserrJsonError>,
req: HttpRequest,
analytics: Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_network("Using the /network route")?;
let new_network = new_network.0;
let old_network = index_scheduler.network();
debug!(parameters = ?new_network, "Patch network");
let merged_self = match new_network.local {
Setting::Set(new_self) => Some(new_self),
Setting::Reset => None,
Setting::NotSet => old_network.local,
};
let merged_remotes = match new_network.remotes {
Setting::Set(new_remotes) => {
let mut merged_remotes = BTreeMap::new();
for either_or_both in old_network
.remotes
.into_iter()
.merge_join_by(new_remotes.into_iter(), |left, right| left.0.cmp(&right.0))
{
match either_or_both {
EitherOrBoth::Both((key, old), (_, Some(new))) => {
let DbRemote { url: old_url, search_api_key: old_search_api_key } = old;
let Remote { url: new_url, search_api_key: new_search_api_key } = new;
let merged = DbRemote {
url: match new_url {
Setting::Set(new_url) => new_url,
Setting::Reset => {
return Err(ResponseError::from_msg(
format!(
"Field `.remotes.{key}.url` cannot be set to `null`"
),
meilisearch_types::error::Code::InvalidNetworkUrl,
))
}
Setting::NotSet => old_url,
},
search_api_key: match new_search_api_key {
Setting::Set(new_search_api_key) => Some(new_search_api_key),
Setting::Reset => None,
Setting::NotSet => old_search_api_key,
},
};
merged_remotes.insert(key, merged);
}
EitherOrBoth::Both((_, _), (_, None)) | EitherOrBoth::Right((_, None)) => {}
EitherOrBoth::Left((key, node)) => {
merged_remotes.insert(key, node);
}
EitherOrBoth::Right((key, Some(node))) => {
let node = node.try_into_db_node(&key)?;
merged_remotes.insert(key, node);
}
}
}
merged_remotes
}
Setting::Reset => BTreeMap::new(),
Setting::NotSet => old_network.remotes,
};
analytics.publish(
PatchNetworkAnalytics {
network_size: merged_remotes.len(),
network_has_self: merged_self.is_some(),
},
&req,
);
let merged_network = DbNetwork { local: merged_self, remotes: merged_remotes };
index_scheduler.put_network(merged_network.clone())?;
debug!(returns = ?merged_network, "Patch network");
Ok(HttpResponse::Ok().json(merged_network))
}

View File

@@ -1,5 +1,3 @@
use std::io::ErrorKind;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebQueryParameter;
@@ -18,7 +16,6 @@ use serde::Serialize;
use time::format_description::well_known::Rfc3339;
use time::macros::format_description;
use time::{Date, Duration, OffsetDateTime, Time};
use tokio::io::AsyncReadExt;
use tokio::task;
use utoipa::{IntoParams, OpenApi, ToSchema};
@@ -47,11 +44,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.route(web::delete().to(SeqHandler(delete_tasks))),
)
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))))
.service(
web::resource("/{task_id}/documents")
.route(web::get().to(SeqHandler(get_task_documents_file))),
);
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
}
#[derive(Debug, Deserr, IntoParams)]
@@ -646,76 +639,6 @@ async fn get_task(
}
}
/// Get a task's documents.
///
/// Get a [task's documents file](https://www.meilisearch.com/docs/learn/async/asynchronous_operations).
#[utoipa::path(
get,
path = "/{taskUid}/documents",
tag = "Tasks",
security(("Bearer" = ["tasks.get", "tasks.*", "*"])),
params(("taskUid", format = UInt32, example = 0, description = "The task identifier", nullable = false)),
responses(
(status = 200, description = "The content of the task update", body = serde_json::Value, content_type = "application/x-ndjson"),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
(status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "Task :taskUid not found.",
"code": "task_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors/#task_not_found"
}
))
)
)]
async fn get_task_documents_file(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
task_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_get_task_documents_route()?;
let task_uid_string = task_uid.into_inner();
let task_uid: TaskId = match task_uid_string.parse() {
Ok(id) => id,
Err(_e) => {
return Err(index_scheduler::Error::InvalidTaskUid { task_uid: task_uid_string }.into())
}
};
let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
let filters = index_scheduler.filters();
let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(&query, filters)?;
if let Some(task) = tasks.first() {
match task.content_uuid() {
Some(uuid) => {
let mut tfile = match index_scheduler.queue.update_file(uuid) {
Ok(file) => tokio::fs::File::from_std(file),
Err(file_store::Error::IoError(e)) if e.kind() == ErrorKind::NotFound => {
return Err(index_scheduler::Error::TaskFileNotFound(task_uid).into())
}
Err(e) => return Err(e.into()),
};
// Yes, that's awful to put everything in memory when we could have streamed it from
// disk but it's really (really) complex to do with the current state of async Rust.
let mut content = String::new();
tfile.read_to_string(&mut content).await?;
Ok(HttpResponse::Ok().content_type("application/x-ndjson").body(content))
}
None => Err(index_scheduler::Error::TaskFileNotFound(task_uid).into()),
}
} else {
Err(index_scheduler::Error::TaskNotFound(task_uid).into())
}
}
pub enum DeserializeDateOption {
Before,
After,
@@ -989,14 +912,14 @@ mod tests {
{
let params = "types=createIndex";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r#"
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `upgradeDatabase`.",
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
}
"#);
"###);
}
}
#[test]

View File

@@ -0,0 +1,923 @@
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::fmt;
use std::iter::Zip;
use std::rc::Rc;
use std::str::FromStr as _;
use std::time::Duration;
use std::vec::{IntoIter, Vec};
use actix_http::StatusCode;
use index_scheduler::{IndexScheduler, RoFeatures};
use indexmap::IndexMap;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::{
InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet,
InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit,
InvalidSearchOffset,
};
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget};
use roaring::RoaringBitmap;
use serde::Serialize;
use utoipa::ToSchema;
use super::ranking_rules::{self, RankingRules};
use super::{
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats,
HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
};
use crate::error::MeilisearchHttpError;
use crate::routes::indexes::search::search_kind;
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FederationOptions {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
#[schema(value_type = f64)]
pub weight: Weight,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
pub struct Weight(f64);
impl Default for Weight {
fn default() -> Self {
Weight(DEFAULT_FEDERATED_WEIGHT)
}
}
impl std::convert::TryFrom<f64> for Weight {
type Error = InvalidMultiSearchWeight;
fn try_from(f: f64) -> Result<Self, Self::Error> {
if f < 0.0 {
Err(InvalidMultiSearchWeight)
} else {
Ok(Weight(f))
}
}
}
impl std::ops::Deref for Weight {
type Target = f64;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, deserr::Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
pub struct Federation {
#[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)]
pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)]
pub merge_facets: Option<MergeFacets>,
}
#[derive(Copy, Clone, Debug, deserr::Deserr, Default, ToSchema)]
#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
pub struct MergeFacets {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
pub max_values_per_facet: Option<usize>,
}
#[derive(Debug, deserr::Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
pub struct FederatedSearch {
pub queries: Vec<SearchQueryWithIndex>,
#[deserr(default)]
pub federation: Option<Federation>,
}
#[derive(Serialize, Clone, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct FederatedSearchResult {
pub hits: Vec<SearchHit>,
pub processing_time_ms: u128,
#[serde(flatten)]
pub hits_info: HitsInfo,
#[serde(skip_serializing_if = "Option::is_none")]
pub semantic_hit_count: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
#[schema(value_type = Option<BTreeMap<String, BTreeMap<String, u64>>>)]
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
#[serde(skip_serializing_if = "FederatedFacets::is_empty")]
pub facets_by_index: FederatedFacets,
// These fields are only used for analytics purposes
#[serde(skip)]
pub degraded: bool,
#[serde(skip)]
pub used_negative_operator: bool,
}
impl fmt::Debug for FederatedSearchResult {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let FederatedSearchResult {
hits,
processing_time_ms,
hits_info,
semantic_hit_count,
degraded,
used_negative_operator,
facet_distribution,
facet_stats,
facets_by_index,
} = self;
let mut debug = f.debug_struct("SearchResult");
// The most important thing when looking at a search result is the time it took to process
debug.field("processing_time_ms", &processing_time_ms);
debug.field("hits", &format!("[{} hits returned]", hits.len()));
debug.field("hits_info", &hits_info);
if *used_negative_operator {
debug.field("used_negative_operator", used_negative_operator);
}
if *degraded {
debug.field("degraded", degraded);
}
if let Some(facet_distribution) = facet_distribution {
debug.field("facet_distribution", &facet_distribution);
}
if let Some(facet_stats) = facet_stats {
debug.field("facet_stats", &facet_stats);
}
if let Some(semantic_hit_count) = semantic_hit_count {
debug.field("semantic_hit_count", &semantic_hit_count);
}
if !facets_by_index.is_empty() {
debug.field("facets_by_index", &facets_by_index);
}
debug.finish()
}
}
struct WeightedScore<'a> {
details: &'a [ScoreDetails],
weight: f64,
}
impl<'a> WeightedScore<'a> {
pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self {
Self { details, weight }
}
pub fn weighted_global_score(&self) -> f64 {
ScoreDetails::global_score(self.details.iter()) * self.weight
}
pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering {
self.weighted_global_score()
.partial_cmp(&other.weighted_global_score())
// both are numbers, possibly infinite
.unwrap()
}
pub fn compare(&self, other: &Self) -> Ordering {
let mut left_it = ScoreDetails::score_values(self.details.iter());
let mut right_it = ScoreDetails::score_values(other.details.iter());
loop {
let left = left_it.next();
let right = right_it.next();
match (left, right) {
(None, None) => return Ordering::Equal,
(None, Some(_)) => return Ordering::Less,
(Some(_), None) => return Ordering::Greater,
(Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => {
let left = left * self.weight;
let right = right * other.weight;
if (left - right).abs() <= f64::EPSILON {
continue;
}
return left.partial_cmp(&right).unwrap();
}
(Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => {
match left.partial_cmp(right) {
Some(Ordering::Equal) => continue,
Some(order) => return order,
None => return self.compare_weighted_global_scores(other),
}
}
(Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => {
match left.partial_cmp(right) {
Some(Ordering::Equal) => continue,
Some(order) => return order,
None => {
return self.compare_weighted_global_scores(other);
}
}
}
// not comparable details, use global
(Some(ScoreValue::Score(_)), Some(_))
| (Some(_), Some(ScoreValue::Score(_)))
| (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
| (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
let left_count = left_it.count();
let right_count = right_it.count();
// compare how many remaining groups of rules each side has.
// the group with the most remaining groups wins.
return left_count
.cmp(&right_count)
// breaks ties with the global ranking score
.then_with(|| self.compare_weighted_global_scores(other));
}
}
}
}
}
struct QueryByIndex {
query: SearchQuery,
federation_options: FederationOptions,
query_index: usize,
}
struct SearchResultByQuery<'a> {
documents_ids: Vec<DocumentId>,
document_scores: Vec<Vec<ScoreDetails>>,
federation_options: FederationOptions,
hit_maker: HitMaker<'a>,
query_index: usize,
}
struct SearchResultByQueryIter<'a> {
it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>,
federation_options: FederationOptions,
hit_maker: Rc<HitMaker<'a>>,
query_index: usize,
}
impl<'a> SearchResultByQueryIter<'a> {
fn new(
SearchResultByQuery {
documents_ids,
document_scores,
federation_options,
hit_maker,
query_index,
}: SearchResultByQuery<'a>,
) -> Self {
let it = documents_ids.into_iter().zip(document_scores);
Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index }
}
}
struct SearchResultByQueryIterItem<'a> {
docid: DocumentId,
score: Vec<ScoreDetails>,
federation_options: FederationOptions,
hit_maker: Rc<HitMaker<'a>>,
query_index: usize,
}
fn merge_index_local_results(
results_by_query: Vec<SearchResultByQuery<'_>>,
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
itertools::kmerge_by(
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
match left_score.compare(&right_score) {
// the biggest score goes first
Ordering::Greater => true,
// break ties using query index
Ordering::Equal => left.query_index < right.query_index,
Ordering::Less => false,
}
},
)
}
fn merge_index_global_results(
results_by_index: Vec<SearchResultByIndex>,
) -> impl Iterator<Item = SearchHitByIndex> {
itertools::kmerge_by(
results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()),
|left: &SearchHitByIndex, right: &SearchHitByIndex| {
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
match left_score.compare(&right_score) {
// the biggest score goes first
Ordering::Greater => true,
// break ties using query index
Ordering::Equal => left.query_index < right.query_index,
Ordering::Less => false,
}
},
)
}
impl<'a> Iterator for SearchResultByQueryIter<'a> {
type Item = SearchResultByQueryIterItem<'a>;
fn next(&mut self) -> Option<Self::Item> {
let (docid, score) = self.it.next()?;
Some(SearchResultByQueryIterItem {
docid,
score,
federation_options: self.federation_options,
hit_maker: Rc::clone(&self.hit_maker),
query_index: self.query_index,
})
}
}
struct SearchHitByIndex {
hit: SearchHit,
score: Vec<ScoreDetails>,
federation_options: FederationOptions,
query_index: usize,
}
struct SearchResultByIndex {
index: String,
hits: Vec<SearchHitByIndex>,
estimated_total_hits: usize,
degraded: bool,
used_negative_operator: bool,
facets: Option<ComputedFacets>,
}
#[derive(Debug, Clone, Default, Serialize, ToSchema)]
pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
impl FederatedFacets {
pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) {
if let Some(facets) = facets {
self.0.insert(index, facets);
}
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn merge(
self,
MergeFacets { max_values_per_facet }: MergeFacets,
facet_order: BTreeMap<String, (String, OrderBy)>,
) -> Option<ComputedFacets> {
if self.is_empty() {
return None;
}
let mut distribution: BTreeMap<String, _> = Default::default();
let mut stats: BTreeMap<String, FacetStats> = Default::default();
for facets_by_index in self.0.into_values() {
for (facet, index_distribution) in facets_by_index.distribution {
match distribution.entry(facet) {
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(index_distribution);
}
std::collections::btree_map::Entry::Occupied(mut entry) => {
let distribution = entry.get_mut();
for (value, index_count) in index_distribution {
distribution
.entry(value)
.and_modify(|count| *count += index_count)
.or_insert(index_count);
}
}
}
}
for (facet, index_stats) in facets_by_index.stats {
match stats.entry(facet) {
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(index_stats);
}
std::collections::btree_map::Entry::Occupied(mut entry) => {
let stats = entry.get_mut();
stats.min = f64::min(stats.min, index_stats.min);
stats.max = f64::max(stats.max, index_stats.max);
}
}
}
}
// fixup order
for (facet, values) in &mut distribution {
let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default();
match order_by {
OrderBy::Lexicographic => {
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
}
OrderBy::Count => {
values.sort_unstable_by(|_, left, _, right| {
left.cmp(right)
// biggest first
.reverse()
})
}
}
if let Some(max_values_per_facet) = max_values_per_facet {
values.truncate(max_values_per_facet)
};
}
Some(ComputedFacets { distribution, stats })
}
}
pub fn perform_federated_search(
index_scheduler: &IndexScheduler,
queries: Vec<SearchQueryWithIndex>,
mut federation: Federation,
features: RoFeatures,
) -> Result<FederatedSearchResult, ResponseError> {
let before_search = std::time::Instant::now();
// this implementation partition the queries by index to guarantee an important property:
// - all the queries to a particular index use the same read transaction.
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
// 1. partition queries by index
let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default();
for (query_index, federated_query) in queries.into_iter().enumerate() {
if let Some(pagination_field) = federated_query.has_pagination() {
return Err(MeilisearchHttpError::PaginationInFederatedQuery(
query_index,
pagination_field,
)
.into());
}
if let Some(facets) = federated_query.has_facets() {
let facets = facets.to_owned();
return Err(MeilisearchHttpError::FacetsInFederatedQuery(
query_index,
federated_query.index_uid.into_inner(),
facets,
)
.into());
}
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
query,
federation_options: federation_options.unwrap_or_default(),
query_index,
})
}
// 2. perform queries, merge and make hits index by index
let required_hit_count = federation.limit + federation.offset;
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
// Then in step (3), we'll update its value if there is any semantic search
let mut semantic_hit_count = None;
let mut results_by_index = Vec::with_capacity(queries_by_index.len());
let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
// remember the order and name of first index for each facet when merging with index settings
// to detect if the order is inconsistent for a facet.
let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets
{
Some(MergeFacets { .. }) => Some(Default::default()),
_ => None,
};
for (index_uid, queries) in queries_by_index {
let first_query_index = queries.first().map(|query| query.query_index);
let index = match index_scheduler.index(&index_uid) {
Ok(index) => index,
Err(err) => {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
if let Some(query_index) = first_query_index {
err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message);
}
return Err(err);
}
};
// Important: this is the only transaction we'll use for this index during this federated search
let rtxn = index.read_txn()?;
let criteria = index.criteria(&rtxn)?;
let dictionary = index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
let separators = index.allowed_separators(&rtxn)?;
let separators: Option<Vec<_>> =
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
// each query gets its individual cutoff
let cutoff = index.search_cutoff(&rtxn)?;
let mut degraded = false;
let mut used_negative_operator = false;
let mut candidates = RoaringBitmap::new();
let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten();
// TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries
if let Err(mut error) =
check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn)
{
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {error}{}",
if let Some(query_index) = first_query_index {
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
} else {
Default::default()
}
);
return Err(error);
}
// 2.1. Compute all candidates for each query in the index
let mut results_by_query = Vec::with_capacity(queries.len());
for QueryByIndex { query, federation_options, query_index } in queries {
// use an immediately invoked lambda to capture the result without returning from the function
let res: Result<(), ResponseError> = (|| {
let search_kind =
search_kind(&query, index_scheduler, index_uid.to_string(), &index)?;
let canonicalization_kind = match (&search_kind, &query.q) {
(SearchKind::SemanticOnly { .. }, _) => {
ranking_rules::CanonicalizationKind::Vector
}
(_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword,
_ => ranking_rules::CanonicalizationKind::Placeholder,
};
let sort = if let Some(sort) = &query.sort {
let sorts: Vec<_> =
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(milli::SortError::from(
asc_desc_error,
))
.into())
}
};
Some(sorts)
} else {
None
};
let ranking_rules = ranking_rules::RankingRules::new(
criteria.clone(),
sort,
query.matching_strategy.into(),
canonicalization_kind,
);
if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) =
previous_query_data.take()
{
if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) {
return Err(error.to_response_error(
&ranking_rules,
&previous_ranking_rules,
query_index,
previous_query_index,
&index_uid,
&previous_index_uid,
));
}
previous_query_data = if previous_ranking_rules.constraint_count()
> ranking_rules.constraint_count()
{
Some((previous_ranking_rules, previous_query_index, previous_index_uid))
} else {
Some((ranking_rules, query_index, index_uid.clone()))
};
} else {
previous_query_data = Some((ranking_rules, query_index, index_uid.clone()));
}
match search_kind {
SearchKind::KeywordOnly => {}
_ => semantic_hit_count = Some(0),
}
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
let time_budget = match cutoff {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
None => TimeBudget::default(),
};
let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index, &rtxn, &query, &search_kind, time_budget, features)?;
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
search.offset(0);
search.limit(required_hit_count);
let (result, _semantic_hit_count) =
super::search_from_kind(index_uid.to_string(), search_kind, search)?;
let format = AttributesFormat {
attributes_to_retrieve: query.attributes_to_retrieve,
retrieve_vectors,
attributes_to_highlight: query.attributes_to_highlight,
attributes_to_crop: query.attributes_to_crop,
crop_length: query.crop_length,
crop_marker: query.crop_marker,
highlight_pre_tag: query.highlight_pre_tag,
highlight_post_tag: query.highlight_post_tag,
show_matches_position: query.show_matches_position,
sort: query.sort,
show_ranking_score: query.show_ranking_score,
show_ranking_score_details: query.show_ranking_score_details,
locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()),
};
let milli::SearchResult {
matching_words,
candidates: query_candidates,
documents_ids,
document_scores,
degraded: query_degraded,
used_negative_operator: query_used_negative_operator,
} = result;
candidates |= query_candidates;
degraded |= query_degraded;
used_negative_operator |= query_used_negative_operator;
let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
let hit_maker =
HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| {
MeilisearchHttpError::from_milli(e, Some(index_uid.to_string()))
})?;
results_by_query.push(SearchResultByQuery {
federation_options,
hit_maker,
query_index,
documents_ids,
document_scores,
});
Ok(())
})();
if let Err(mut error) = res {
error.message = format!("Inside `.queries[{query_index}]`: {}", error.message);
return Err(error);
}
}
// 2.2. merge inside index
let mut documents_seen = RoaringBitmap::new();
let merged_result: Result<Vec<_>, ResponseError> =
merge_index_local_results(results_by_query)
// skip documents we've already seen & mark that we saw the current document
.filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid))
.take(required_hit_count)
// 2.3 make hits
.map(
|SearchResultByQueryIterItem {
docid,
score,
federation_options,
hit_maker,
query_index,
}| {
let mut hit = hit_maker.make_hit(docid, &score)?;
let weighted_score =
ScoreDetails::global_score(score.iter()) * (*federation_options.weight);
let _federation = serde_json::json!(
{
"indexUid": index_uid,
"queriesPosition": query_index,
"weightedRankingScore": weighted_score,
}
);
hit.document.insert("_federation".to_string(), _federation);
Ok(SearchHitByIndex { hit, score, federation_options, query_index })
},
)
.collect();
let merged_result = merged_result?;
let estimated_total_hits = candidates.len() as usize;
let facets = facets_by_index
.map(|facets_by_index| {
compute_facet_distribution_stats(
&facets_by_index,
&index,
&rtxn,
candidates,
super::Route::MultiSearch,
)
})
.transpose()
.map_err(|mut error| {
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {}{}",
error.message,
if let Some(query_index) = first_query_index {
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
} else {
Default::default()
}
);
error
})?;
results_by_index.push(SearchResultByIndex {
index: index_uid,
hits: merged_result,
estimated_total_hits,
degraded,
used_negative_operator,
facets,
});
}
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
for (index_uid, facets) in federation.facets_by_index {
let index = match index_scheduler.index(&index_uid) {
Ok(index) => index,
Err(err) => {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries",
err.message
);
return Err(err);
}
};
// Important: this is the only transaction we'll use for this index during this federated search
let rtxn = index.read_txn()?;
if let Err(mut error) =
check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn)
{
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries",
);
return Err(error);
}
if let Some(facets) = facets {
if let Err(mut error) = compute_facet_distribution_stats(
&facets,
&index,
&rtxn,
Default::default(),
super::Route::MultiSearch,
) {
error.message =
format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message);
return Err(error);
}
}
}
// 3. merge hits and metadata across indexes
// 3.1 merge metadata
let (estimated_total_hits, degraded, used_negative_operator, facets) = {
let mut estimated_total_hits = 0;
let mut degraded = false;
let mut used_negative_operator = false;
let mut facets: FederatedFacets = FederatedFacets::default();
for SearchResultByIndex {
index,
hits: _,
estimated_total_hits: estimated_total_hits_by_index,
facets: facets_by_index,
degraded: degraded_by_index,
used_negative_operator: used_negative_operator_by_index,
} in &mut results_by_index
{
estimated_total_hits += *estimated_total_hits_by_index;
degraded |= *degraded_by_index;
used_negative_operator |= *used_negative_operator_by_index;
let facets_by_index = std::mem::take(facets_by_index);
let index = std::mem::take(index);
facets.insert(index, facets_by_index);
}
(estimated_total_hits, degraded, used_negative_operator, facets)
};
// 3.2 merge hits
let merged_hits: Vec<_> = merge_index_global_results(results_by_index)
.skip(federation.offset)
.take(federation.limit)
.inspect(|hit| {
if let Some(semantic_hit_count) = &mut semantic_hit_count {
if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) {
*semantic_hit_count += 1;
}
}
})
.map(|hit| hit.hit)
.collect();
let (facet_distribution, facet_stats, facets_by_index) =
match federation.merge_facets.zip(facet_order) {
Some((merge_facets, facet_order)) => {
let facets = facets.merge(merge_facets, facet_order);
let (facet_distribution, facet_stats) = facets
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
.unzip();
(facet_distribution, facet_stats, FederatedFacets::default())
}
None => (None, None, facets),
};
let search_result = FederatedSearchResult {
hits: merged_hits,
processing_time_ms: before_search.elapsed().as_millis(),
hits_info: HitsInfo::OffsetLimit {
limit: federation.limit,
offset: federation.offset,
estimated_total_hits,
},
semantic_hit_count,
degraded,
used_negative_operator,
facet_distribution,
facet_stats,
facets_by_index,
};
Ok(search_result)
}
fn check_facet_order(
facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>,
current_index: &str,
facets_by_index: &Option<Vec<String>>,
index: &milli::Index,
rtxn: &milli::heed::RoTxn<'_>,
) -> Result<(), ResponseError> {
if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) {
let index_facet_order = index.sort_facet_values_by(rtxn)?;
for facet in facets_by_index {
let index_facet_order = index_facet_order.get(facet);
let (previous_index, previous_facet_order) = facet_order
.entry(facet.to_owned())
.or_insert_with(|| (current_index.to_owned(), index_facet_order));
if previous_facet_order != &index_facet_order {
return Err(MeilisearchHttpError::InconsistentFacetOrder {
facet: facet.clone(),
previous_facet_order: *previous_facet_order,
previous_uid: previous_index.clone(),
current_uid: current_index.to_owned(),
index_facet_order,
}
.into());
}
}
};
Ok(())
}

View File

@@ -1,10 +0,0 @@
mod perform;
mod proxy;
mod types;
mod weighted_scores;
pub use perform::perform_federated_search;
pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE};
pub use types::{
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
};

File diff suppressed because it is too large Load Diff

View File

@@ -1,267 +0,0 @@
pub use error::ProxySearchError;
use error::ReqwestErrorWithoutUrl;
use meilisearch_types::features::Remote;
use rand::Rng as _;
use reqwest::{Client, Response, StatusCode};
use serde::de::DeserializeOwned;
use serde_json::Value;
use super::types::{FederatedSearch, FederatedSearchResult, Federation};
use crate::search::SearchQueryWithIndex;
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
mod error {
use meilisearch_types::error::ResponseError;
use reqwest::StatusCode;
#[derive(Debug, thiserror::Error)]
pub enum ProxySearchError {
#[error("{0}")]
CouldNotSendRequest(ReqwestErrorWithoutUrl),
#[error("could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `search` action")]
AuthenticationError,
#[error(
"could not parse response from the remote host as a federated search response{}\n - hint: check that the remote instance is a Meilisearch instance running the same version",
response_from_remote(response)
)]
CouldNotParseResponse { response: Result<String, ReqwestErrorWithoutUrl> },
#[error("remote host responded with code {}{}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))]
BadRequest { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
#[error("remote host did not answer before the deadline")]
Timeout,
#[error("remote hit does not contain `{0}`\n - hint: check that the remote instance is a Meilisearch instance running the same version")]
MissingPathInResponse(&'static str),
#[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))]
RemoteError { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
#[error("remote hit contains an unexpected value at path `{path}`: expected {expected_type}, received `{received_value}`\n - hint: check that the remote instance is a Meilisearch instance running the same version")]
UnexpectedValueInPath {
path: &'static str,
expected_type: &'static str,
received_value: String,
},
#[error("could not parse weighted score values in the remote hit: {0}")]
CouldNotParseWeightedScoreValues(serde_json::Error),
}
impl ProxySearchError {
pub fn as_response_error(&self) -> ResponseError {
use meilisearch_types::error::Code;
let message = self.to_string();
let code = match self {
ProxySearchError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest,
ProxySearchError::AuthenticationError => Code::RemoteInvalidApiKey,
ProxySearchError::BadRequest { .. } => Code::RemoteBadRequest,
ProxySearchError::Timeout => Code::RemoteTimeout,
ProxySearchError::RemoteError { .. } => Code::RemoteRemoteError,
ProxySearchError::CouldNotParseResponse { .. }
| ProxySearchError::MissingPathInResponse(_)
| ProxySearchError::UnexpectedValueInPath { .. }
| ProxySearchError::CouldNotParseWeightedScoreValues(_) => Code::RemoteBadResponse,
};
ResponseError::from_msg(message, code)
}
}
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct ReqwestErrorWithoutUrl(reqwest::Error);
impl ReqwestErrorWithoutUrl {
pub fn new(inner: reqwest::Error) -> Self {
Self(inner.without_url())
}
}
fn response_from_remote(response: &Result<String, ReqwestErrorWithoutUrl>) -> String {
match response {
Ok(response) => {
format!(":\n - response from remote: {}", response)
}
Err(error) => {
format!(":\n - additionally, could not retrieve response from remote: {error}")
}
}
}
}
#[derive(Clone)]
pub struct ProxySearchParams {
pub deadline: Option<std::time::Instant>,
pub try_count: u32,
pub client: reqwest::Client,
}
/// Performs a federated search on a remote host and returns the results
pub async fn proxy_search(
node: &Remote,
queries: Vec<SearchQueryWithIndex>,
federation: Federation,
params: &ProxySearchParams,
) -> Result<FederatedSearchResult, ProxySearchError> {
let url = format!("{}/multi-search", node.url);
let federated = FederatedSearch { queries, federation: Some(federation) };
let search_api_key = node.search_api_key.as_deref();
let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
let deadline = if let Some(deadline) = params.deadline {
std::time::Instant::min(deadline, max_deadline)
} else {
max_deadline
};
for i in 0..params.try_count {
match try_proxy_search(&url, search_api_key, &federated, &params.client, deadline).await {
Ok(response) => return Ok(response),
Err(retry) => {
let duration = retry.into_duration(i)?;
tokio::time::sleep(duration).await;
}
}
}
try_proxy_search(&url, search_api_key, &federated, &params.client, deadline)
.await
.map_err(Retry::into_error)
}
async fn try_proxy_search(
url: &str,
search_api_key: Option<&str>,
federated: &FederatedSearch,
client: &Client,
deadline: std::time::Instant,
) -> Result<FederatedSearchResult, Retry> {
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
let request = client.post(url).json(&federated).timeout(timeout);
let request = if let Some(search_api_key) = search_api_key {
request.bearer_auth(search_api_key)
} else {
request
};
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
let response = request.send().await;
let response = match response {
Ok(response) => response,
Err(error) if error.is_timeout() => return Err(Retry::give_up(ProxySearchError::Timeout)),
Err(error) => {
return Err(Retry::retry_later(ProxySearchError::CouldNotSendRequest(
ReqwestErrorWithoutUrl::new(error),
)))
}
};
match response.status() {
status_code if status_code.is_success() => (),
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
return Err(Retry::give_up(ProxySearchError::AuthenticationError))
}
status_code if status_code.is_client_error() => {
let response = parse_error(response).await;
return Err(Retry::give_up(ProxySearchError::BadRequest { status_code, response }));
}
status_code if status_code.is_server_error() => {
let response = parse_error(response).await;
return Err(Retry::retry_later(ProxySearchError::RemoteError {
status_code,
response,
}));
}
status_code => {
tracing::warn!(
status_code = status_code.as_u16(),
"remote replied with unexpected status code"
);
}
}
let response = match parse_response(response).await {
Ok(response) => response,
Err(response) => {
return Err(Retry::retry_later(ProxySearchError::CouldNotParseResponse { response }))
}
};
Ok(response)
}
/// Always parse the body of the response of a failed request as JSON.
async fn parse_error(response: Response) -> Result<String, ReqwestErrorWithoutUrl> {
let bytes = match response.bytes().await {
Ok(bytes) => bytes,
Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)),
};
Ok(parse_bytes_as_error(&bytes))
}
fn parse_bytes_as_error(bytes: &[u8]) -> String {
match serde_json::from_slice::<Value>(bytes) {
Ok(value) => value.to_string(),
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
}
}
async fn parse_response<T: DeserializeOwned>(
response: Response,
) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> {
let bytes = match response.bytes().await {
Ok(bytes) => bytes,
Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))),
};
match serde_json::from_slice::<T>(&bytes) {
Ok(value) => Ok(value),
Err(_) => Err(Ok(parse_bytes_as_error(&bytes))),
}
}
pub struct Retry {
error: ProxySearchError,
strategy: RetryStrategy,
}
pub enum RetryStrategy {
GiveUp,
Retry,
}
impl Retry {
pub fn give_up(error: ProxySearchError) -> Self {
Self { error, strategy: RetryStrategy::GiveUp }
}
pub fn retry_later(error: ProxySearchError) -> Self {
Self { error, strategy: RetryStrategy::Retry }
}
pub fn into_duration(self, attempt: u32) -> Result<std::time::Duration, ProxySearchError> {
match self.strategy {
RetryStrategy::GiveUp => Err(self.error),
RetryStrategy::Retry => {
let retry_duration = std::time::Duration::from_nanos((10u64).pow(attempt));
let retry_duration = retry_duration.min(std::time::Duration::from_millis(100)); // don't wait more than 100ms
// randomly up to double the retry duration
let retry_duration = retry_duration
+ rand::thread_rng().gen_range(std::time::Duration::ZERO..retry_duration);
tracing::warn!(
"Attempt #{}, failed with {}, retrying after {}ms.",
attempt,
self.error,
retry_duration.as_millis()
);
Ok(retry_duration)
}
}
}
pub fn into_error(self) -> ProxySearchError {
self.error
}
}

View File

@@ -1,322 +0,0 @@
use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::fmt;
use std::vec::Vec;
use indexmap::IndexMap;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::{
InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet,
InvalidMultiSearchMergeFacets, InvalidMultiSearchQueryPosition, InvalidMultiSearchRemote,
InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset,
};
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::order_by_map::OrderByMap;
use meilisearch_types::milli::OrderBy;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
// fields in the response
pub const FEDERATION_HIT: &str = "_federation";
pub const INDEX_UID: &str = "indexUid";
pub const QUERIES_POSITION: &str = "queriesPosition";
pub const WEIGHTED_RANKING_SCORE: &str = "weightedRankingScore";
pub const WEIGHTED_SCORE_VALUES: &str = "weightedScoreValues";
pub const FEDERATION_REMOTE: &str = "remote";
#[derive(Debug, Default, Clone, PartialEq, Serialize, deserr::Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct FederationOptions {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
#[schema(value_type = f64)]
pub weight: Weight,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchRemote>)]
pub remote: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchQueryPosition>)]
pub query_position: Option<usize>,
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, deserr::Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
pub struct Weight(f64);
impl Default for Weight {
fn default() -> Self {
Weight(DEFAULT_FEDERATED_WEIGHT)
}
}
impl std::convert::TryFrom<f64> for Weight {
type Error = InvalidMultiSearchWeight;
fn try_from(f: f64) -> Result<Self, Self::Error> {
if f < 0.0 {
Err(InvalidMultiSearchWeight)
} else {
Ok(Weight(f))
}
}
}
impl std::ops::Deref for Weight {
type Target = f64;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, Clone, deserr::Deserr, Serialize, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
#[serde(rename_all = "camelCase")]
pub struct Federation {
#[deserr(default = super::super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default = super::super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)]
pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)]
pub merge_facets: Option<MergeFacets>,
}
#[derive(Copy, Clone, Debug, deserr::Deserr, Serialize, Default, ToSchema)]
#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
#[serde(rename_all = "camelCase")]
pub struct MergeFacets {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
pub max_values_per_facet: Option<usize>,
}
#[derive(Debug, deserr::Deserr, Serialize, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
#[serde(rename_all = "camelCase")]
pub struct FederatedSearch {
pub queries: Vec<SearchQueryWithIndex>,
#[deserr(default)]
pub federation: Option<Federation>,
}
#[derive(Serialize, Deserialize, Clone, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct FederatedSearchResult {
pub hits: Vec<SearchHit>,
pub processing_time_ms: u128,
#[serde(flatten)]
pub hits_info: HitsInfo,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub semantic_hit_count: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[schema(value_type = Option<BTreeMap<String, BTreeMap<String, u64>>>)]
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
#[serde(default, skip_serializing_if = "FederatedFacets::is_empty")]
pub facets_by_index: FederatedFacets,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub remote_errors: Option<BTreeMap<String, ResponseError>>,
// These fields are only used for analytics purposes
#[serde(skip)]
pub degraded: bool,
#[serde(skip)]
pub used_negative_operator: bool,
}
impl fmt::Debug for FederatedSearchResult {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let FederatedSearchResult {
hits,
processing_time_ms,
hits_info,
semantic_hit_count,
degraded,
used_negative_operator,
facet_distribution,
facet_stats,
facets_by_index,
remote_errors,
} = self;
let mut debug = f.debug_struct("SearchResult");
// The most important thing when looking at a search result is the time it took to process
debug.field("processing_time_ms", &processing_time_ms);
debug.field("hits", &format!("[{} hits returned]", hits.len()));
debug.field("hits_info", &hits_info);
if *used_negative_operator {
debug.field("used_negative_operator", used_negative_operator);
}
if *degraded {
debug.field("degraded", degraded);
}
if let Some(facet_distribution) = facet_distribution {
debug.field("facet_distribution", &facet_distribution);
}
if let Some(facet_stats) = facet_stats {
debug.field("facet_stats", &facet_stats);
}
if let Some(semantic_hit_count) = semantic_hit_count {
debug.field("semantic_hit_count", &semantic_hit_count);
}
if !facets_by_index.is_empty() {
debug.field("facets_by_index", &facets_by_index);
}
if let Some(remote_errors) = remote_errors {
debug.field("remote_errors", &remote_errors);
}
debug.finish()
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
impl FederatedFacets {
pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) {
if let Some(facets) = facets {
self.0.insert(index, facets);
}
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn merge(
self,
MergeFacets { max_values_per_facet }: MergeFacets,
facet_order: BTreeMap<String, (String, OrderBy)>,
) -> Option<ComputedFacets> {
if self.is_empty() {
return None;
}
let mut distribution: BTreeMap<String, _> = Default::default();
let mut stats: BTreeMap<String, FacetStats> = Default::default();
for facets_by_index in self.0.into_values() {
for (facet, index_distribution) in facets_by_index.distribution {
match distribution.entry(facet) {
Entry::Vacant(entry) => {
entry.insert(index_distribution);
}
Entry::Occupied(mut entry) => {
let distribution = entry.get_mut();
for (value, index_count) in index_distribution {
distribution
.entry(value)
.and_modify(|count| *count += index_count)
.or_insert(index_count);
}
}
}
}
for (facet, index_stats) in facets_by_index.stats {
match stats.entry(facet) {
Entry::Vacant(entry) => {
entry.insert(index_stats);
}
Entry::Occupied(mut entry) => {
let stats = entry.get_mut();
stats.min = f64::min(stats.min, index_stats.min);
stats.max = f64::max(stats.max, index_stats.max);
}
}
}
}
// fixup order
for (facet, values) in &mut distribution {
let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default();
match order_by {
OrderBy::Lexicographic => {
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
}
OrderBy::Count => {
values.sort_unstable_by(|_, left, _, right| {
left.cmp(right)
// biggest first
.reverse()
})
}
}
if let Some(max_values_per_facet) = max_values_per_facet {
values.truncate(max_values_per_facet)
};
}
Some(ComputedFacets { distribution, stats })
}
pub(crate) fn append(&mut self, FederatedFacets(remote_facets_by_index): FederatedFacets) {
for (index, remote_facets) in remote_facets_by_index {
let merged_facets = self.0.entry(index).or_default();
for (remote_facet, remote_stats) in remote_facets.stats {
match merged_facets.stats.entry(remote_facet) {
Entry::Vacant(vacant_entry) => {
vacant_entry.insert(remote_stats);
}
Entry::Occupied(mut occupied_entry) => {
let stats = occupied_entry.get_mut();
stats.min = f64::min(stats.min, remote_stats.min);
stats.max = f64::max(stats.max, remote_stats.max);
}
}
}
for (remote_facet, remote_values) in remote_facets.distribution {
let merged_facet = merged_facets.distribution.entry(remote_facet).or_default();
for (remote_value, remote_count) in remote_values {
let count = merged_facet.entry(remote_value).or_default();
*count += remote_count;
}
}
}
}
pub fn sort_and_truncate(&mut self, facet_order: BTreeMap<String, (OrderByMap, usize)>) {
for (index, facets) in &mut self.0 {
let Some((order_by, max_values_per_facet)) = facet_order.get(index) else {
continue;
};
for (facet, values) in &mut facets.distribution {
match order_by.get(facet) {
OrderBy::Lexicographic => {
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
}
OrderBy::Count => {
values.sort_unstable_by(|_, left, _, right| {
left.cmp(right)
// biggest first
.reverse()
})
}
}
values.truncate(*max_values_per_facet);
}
}
}
}

View File

@@ -1,88 +0,0 @@
use std::cmp::Ordering;
use meilisearch_types::milli::score_details::{self, WeightedScoreValue};
pub fn compare(
mut left_it: impl Iterator<Item = WeightedScoreValue>,
left_weighted_global_score: f64,
mut right_it: impl Iterator<Item = WeightedScoreValue>,
right_weighted_global_score: f64,
) -> Ordering {
loop {
let left = left_it.next();
let right = right_it.next();
match (left, right) {
(None, None) => return Ordering::Equal,
(None, Some(_)) => return Ordering::Less,
(Some(_), None) => return Ordering::Greater,
(
Some(
WeightedScoreValue::WeightedScore(left) | WeightedScoreValue::VectorSort(left),
),
Some(
WeightedScoreValue::WeightedScore(right)
| WeightedScoreValue::VectorSort(right),
),
) => {
if (left - right).abs() <= f64::EPSILON {
continue;
}
return left.partial_cmp(&right).unwrap();
}
(
Some(WeightedScoreValue::Sort { asc: left_asc, value: left }),
Some(WeightedScoreValue::Sort { asc: right_asc, value: right }),
) => {
if left_asc != right_asc {
return left_weighted_global_score
.partial_cmp(&right_weighted_global_score)
.unwrap();
}
match score_details::compare_sort_values(left_asc, &left, &right) {
Ordering::Equal => continue,
order => return order,
}
}
(
Some(WeightedScoreValue::GeoSort { asc: left_asc, distance: left }),
Some(WeightedScoreValue::GeoSort { asc: right_asc, distance: right }),
) => {
if left_asc != right_asc {
continue;
}
match (left, right) {
(None, None) => continue,
(None, Some(_)) => return Ordering::Less,
(Some(_), None) => return Ordering::Greater,
(Some(left), Some(right)) => {
if (left - right).abs() <= f64::EPSILON {
continue;
}
return left.partial_cmp(&right).unwrap();
}
}
}
// not comparable details, use global
(Some(WeightedScoreValue::WeightedScore(_)), Some(_))
| (Some(_), Some(WeightedScoreValue::WeightedScore(_)))
| (Some(WeightedScoreValue::VectorSort(_)), Some(_))
| (Some(_), Some(WeightedScoreValue::VectorSort(_)))
| (Some(WeightedScoreValue::GeoSort { .. }), Some(WeightedScoreValue::Sort { .. }))
| (Some(WeightedScoreValue::Sort { .. }), Some(WeightedScoreValue::GeoSort { .. })) => {
let left_count = left_it.count();
let right_count = right_it.count();
// compare how many remaining groups of rules each side has.
// the group with the most remaining groups wins.
return left_count
.cmp(&right_count)
// breaks ties with the global ranking score
.then_with(|| {
left_weighted_global_score
.partial_cmp(&right_weighted_global_score)
.unwrap()
});
}
}
}
}

View File

@@ -30,7 +30,7 @@ use milli::{
MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
};
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde::Serialize;
use serde_json::{json, Value};
#[cfg(test)]
mod mod_test;
@@ -41,7 +41,7 @@ use crate::error::MeilisearchHttpError;
mod federated;
pub use federated::{
perform_federated_search, FederatedSearch, FederatedSearchResult, Federation,
FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
FederationOptions, MergeFacets,
};
mod ranking_rules;
@@ -119,7 +119,7 @@ pub struct SearchQuery {
pub locales: Option<Vec<Locale>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema, Serialize)]
#[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
pub struct RankingScoreThreshold(f64);
impl std::convert::TryFrom<f64> for RankingScoreThreshold {
@@ -275,13 +275,11 @@ impl fmt::Debug for SearchQuery {
}
}
#[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema, Serialize)]
#[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema)]
#[deserr(error = DeserrJsonError<InvalidSearchHybridQuery>, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct HybridQuery {
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
#[schema(value_type = f32, default)]
#[serde(default)]
pub semantic_ratio: SemanticRatio,
#[deserr(error = DeserrJsonError<InvalidSearchEmbedder>)]
pub embedder: String,
@@ -371,7 +369,7 @@ impl SearchKind {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr, Serialize)]
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
pub struct SemanticRatio(f32);
@@ -413,9 +411,8 @@ impl SearchQuery {
// This struct contains the fields of `SearchQuery` inline.
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
#[derive(Debug, Clone, Serialize, PartialEq, Deserr, ToSchema)]
#[derive(Debug, Clone, PartialEq, Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct SearchQueryWithIndex {
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
@@ -496,72 +493,6 @@ impl SearchQueryWithIndex {
self.facets.as_deref().filter(|v| !v.is_empty())
}
pub fn from_index_query_federation(
index_uid: IndexUid,
query: SearchQuery,
federation_options: Option<FederationOptions>,
) -> Self {
let SearchQuery {
q,
vector,
hybrid,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
retrieve_vectors,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
show_ranking_score,
show_ranking_score_details,
filter,
sort,
distinct,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
attributes_to_search_on,
ranking_score_threshold,
locales,
} = query;
SearchQueryWithIndex {
index_uid,
q,
vector,
hybrid,
offset: if offset == DEFAULT_SEARCH_OFFSET() { None } else { Some(offset) },
limit: if limit == DEFAULT_SEARCH_LIMIT() { None } else { Some(limit) },
page,
hits_per_page,
attributes_to_retrieve,
retrieve_vectors,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_ranking_score,
show_ranking_score_details,
show_matches_position,
filter,
sort,
distinct,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
attributes_to_search_on,
ranking_score_threshold,
locales,
federation_options,
}
}
pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
let SearchQueryWithIndex {
index_uid,
@@ -689,9 +620,8 @@ impl TryFrom<Value> for ExternalDocumentId {
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema, Serialize)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema)]
#[deserr(rename_all = camelCase)]
#[serde(rename_all = "camelCase")]
pub enum MatchingStrategy {
/// Remove query words from last to first
Last,
@@ -737,19 +667,19 @@ impl From<FacetValuesSort> for OrderBy {
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
#[derive(Debug, Clone, Serialize, PartialEq, ToSchema)]
pub struct SearchHit {
#[serde(flatten)]
#[schema(additional_properties, inline, value_type = HashMap<String, Value>)]
pub document: Document,
#[serde(default, rename = "_formatted", skip_serializing_if = "Document::is_empty")]
#[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")]
#[schema(additional_properties, value_type = HashMap<String, Value>)]
pub formatted: Document,
#[serde(default, rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
pub matches_position: Option<MatchesPosition>,
#[serde(default, rename = "_rankingScore", skip_serializing_if = "Option::is_none")]
#[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")]
pub ranking_score: Option<f64>,
#[serde(default, rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
#[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
}
@@ -837,7 +767,7 @@ pub struct SearchResultWithIndex {
pub result: SearchResult,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)]
#[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)]
#[serde(untagged)]
pub enum HitsInfo {
#[serde(rename_all = "camelCase")]
@@ -848,7 +778,7 @@ pub enum HitsInfo {
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
#[derive(Serialize, Debug, Clone, PartialEq, ToSchema)]
pub struct FacetStats {
pub min: f64,
pub max: f64,
@@ -1131,7 +1061,7 @@ pub fn perform_search(
Ok(result)
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
#[derive(Debug, Clone, Default, Serialize, ToSchema)]
pub struct ComputedFacets {
#[schema(value_type = BTreeMap<String, BTreeMap<String, u64>>)]
pub distribution: BTreeMap<String, IndexMap<String, u64>>,
@@ -1397,12 +1327,13 @@ impl<'a> HitMaker<'a> {
}
pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
let (_, obkv) =
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
let mut buffer = Vec::new();
let dict = self.index.document_decompression_dictionary(self.rtxn)?;
let compressed = self.index.compressed_document(self.rtxn, id)?.unwrap();
let doc = compressed.decompress_with_optional_dictionary(&mut buffer, dict.as_ref())?;
// First generate a document with all the displayed fields
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?;
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, doc)?;
let add_vectors_fid =
self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);

View File

@@ -1 +0,0 @@

View File

@@ -421,7 +421,7 @@ async fn error_add_api_key_invalid_parameters_actions() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
{
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`",
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"

View File

@@ -68,8 +68,6 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("GET", "/keys") => hashset!{"keys.get", "*"},
("GET", "/experimental-features") => hashset!{"experimental.get", "*"},
("PATCH", "/experimental-features") => hashset!{"experimental.update", "*"},
("GET", "/network") => hashset!{"network.get", "*"},
("PATCH", "/network") => hashset!{"network.update", "*"},
};
authorizations

View File

@@ -93,7 +93,7 @@ async fn create_api_key_bad_actions() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`",
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"

View File

@@ -42,7 +42,7 @@ async fn batch_bad_types() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r#"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `upgradeDatabase`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"

View File

@@ -41,8 +41,9 @@ async fn list_batches() {
let index = server.index("test");
let (task, _status_code) = index.create(None).await;
index.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index.create(None).await;
index.wait_task(task.uid()).await.failed();
index
.add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None)
.await;
let (response, code) = index.list_batches().await;
assert_eq!(code, 200);
assert_eq!(
@@ -95,12 +96,11 @@ async fn list_batches_pagination_and_reverse() {
async fn list_batches_with_star_filters() {
let server = Server::new().await;
let index = server.index("test");
let (task, _code) = index.create(None).await;
index.wait_task(task.uid()).await.succeeded();
let index = server.index("test");
let (task, _code) = index.create(None).await;
index.wait_task(task.uid()).await.failed();
let (batch, _code) = index.create(None).await;
index.wait_task(batch.uid()).await.succeeded();
index
.add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None)
.await;
let (response, code) = index.service.get("/batches?indexUids=test").await;
assert_eq!(code, 200);
assert_eq!(response["results"].as_array().unwrap().len(), 2);
@@ -187,6 +187,9 @@ async fn list_batches_invalid_canceled_by_filter() {
let index = server.index("test");
let (task, _status_code) = index.create(None).await;
index.wait_task(task.uid()).await.succeeded();
index
.add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None)
.await;
let (response, code) = index.filtered_batches(&[], &[], &["0"]).await;
assert_eq!(code, 200, "{}", response);
@@ -199,8 +202,9 @@ async fn list_batches_status_and_type_filtered() {
let index = server.index("test");
let (task, _status_code) = index.create(None).await;
index.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index.update(Some("id")).await;
index.wait_task(task.uid()).await.succeeded();
index
.add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None)
.await;
let (response, code) = index.filtered_batches(&["indexCreation"], &["failed"], &[]).await;
assert_eq!(code, 200, "{}", response);
@@ -208,7 +212,7 @@ async fn list_batches_status_and_type_filtered() {
let (response, code) = index
.filtered_batches(
&["indexCreation", "IndexUpdate"],
&["indexCreation", "documentAdditionOrUpdate"],
&["succeeded", "processing", "enqueued"],
&[],
)

View File

@@ -88,10 +88,6 @@ impl Server<Owned> {
self.service.api_key = Some(api_key.as_ref().to_string());
}
pub fn clear_api_key(&mut self) {
self.service.api_key = None;
}
/// Fetch and use the default admin key for nexts http requests.
pub async fn use_admin_key(&mut self, master_key: impl AsRef<str>) {
self.use_api_key(master_key);
@@ -163,18 +159,10 @@ impl Server<Owned> {
self.service.get("/tasks").await
}
pub async fn batches(&self) -> (Value, StatusCode) {
self.service.get("/batches").await
}
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
self.service.patch("/experimental-features", value).await
}
pub async fn set_network(&self, value: Value) -> (Value, StatusCode) {
self.service.patch("/network", value).await
}
pub async fn get_metrics(&self) -> (Value, StatusCode) {
self.service.get("/metrics").await
}
@@ -400,10 +388,6 @@ impl<State> Server<State> {
pub async fn get_features(&self) -> (Value, StatusCode) {
self.service.get("/experimental-features").await
}
pub async fn get_network(&self) -> (Value, StatusCode) {
self.service.get("/network").await
}
}
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {

View File

@@ -1803,275 +1803,6 @@ async fn add_documents_with_geo_field() {
"finishedAt": "[date]"
}
"###);
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"results": [
{
"id": "1"
},
{
"id": "2",
"_geo": null
},
{
"id": "3",
"_geo": {
"lat": 1,
"lng": 1
}
},
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
}
}
],
"offset": 0,
"limit": 20,
"total": 4
}
"###);
let (response, code) = index
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
snapshot!(code, @"200 OK");
// we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@r###"
{
"hits": [
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
},
"_geoDistance": 5522018
},
{
"id": "3",
"_geo": {
"lat": 1,
"lng": 1
},
"_geoDistance": 5522018
},
{
"id": "1"
},
{
"id": "2",
"_geo": null
}
],
"query": "",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 4
}
"###);
}
#[actix_rt::test]
async fn update_documents_with_geo_field() {
let server = Server::new().await;
let index = server.index("doggo");
index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
let documents = json!([
{
"id": "1",
},
{
"id": "2",
"_geo": null,
},
{
"id": "3",
"_geo": { "lat": 1, "lng": 1 },
},
{
"id": "4",
"_geo": { "lat": "1", "lng": "1" },
},
]);
let (task, _status_code) = index.add_documents(documents, None).await;
let response = index.wait_task(task.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 1,
"batchUid": 1,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 4,
"indexedDocuments": 4
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
snapshot!(code, @"200 OK");
// we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@r###"
{
"hits": [
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
},
"_geoDistance": 5522018
},
{
"id": "3",
"_geo": {
"lat": 1,
"lng": 1
},
"_geoDistance": 5522018
},
{
"id": "1"
},
{
"id": "2",
"_geo": null
}
],
"query": "",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 4
}
"###);
let updated_documents = json!([{
"id": "3",
"doggo": "kefir",
}]);
let (task, _status_code) = index.update_documents(updated_documents, None).await;
let response = index.wait_task(task.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 2,
"batchUid": 2,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"results": [
{
"id": "1"
},
{
"id": "2",
"_geo": null
},
{
"id": "3",
"_geo": {
"lat": 1,
"lng": 1
},
"doggo": "kefir"
},
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
}
}
],
"offset": 0,
"limit": 20,
"total": 4
}
"###);
let (response, code) = index
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
snapshot!(code, @"200 OK");
// the search response should not have changed: we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@r###"
{
"hits": [
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
},
"_geoDistance": 5522018
},
{
"id": "3",
"_geo": {
"lat": 1,
"lng": 1
},
"doggo": "kefir",
"_geoDistance": 5522018
},
{
"id": "1"
},
{
"id": "2",
"_geo": null
}
],
"query": "",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 4
}
"###);
}
#[actix_rt::test]

View File

@@ -161,8 +161,6 @@ async fn delete_document_by_filter() {
{
"numberOfDocuments": 4,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"color": 3,
"id": 4
@@ -210,8 +208,6 @@ async fn delete_document_by_filter() {
{
"numberOfDocuments": 2,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"color": 1,
"id": 2
@@ -278,8 +274,6 @@ async fn delete_document_by_filter() {
{
"numberOfDocuments": 1,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"color": 1,
"id": 1

View File

@@ -22,7 +22,6 @@ pub enum GetDump {
TestV5,
TestV6WithExperimental,
TestV6WithBatchesAndEnqueuedTasks,
}
impl GetDump {
@@ -75,10 +74,6 @@ impl GetDump {
"tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump"
)
.into(),
GetDump::TestV6WithBatchesAndEnqueuedTasks => {
exist_relative_path!("tests/assets/v6_v1.13.0_batches_and_enqueued_tasks.dump")
.into()
}
}
}
}

View File

@@ -27,24 +27,9 @@ async fn import_dump_v1_movie_raw() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"genres": 53,
"id": 53,
"overview": 53,
"poster": 53,
"release_date": 53,
"title": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
);
let (settings, code) = index.settings().await;
@@ -188,8 +173,6 @@ async fn import_dump_v1_movie_with_settings() {
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"genres": 53,
"id": 53,
@@ -350,24 +333,9 @@ async fn import_dump_v1_rubygems_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"description": 53,
"id": 53,
"name": 53,
"summary": 53,
"total_downloads": 53,
"version": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
);
let (settings, code) = index.settings().await;
@@ -515,24 +483,9 @@ async fn import_dump_v2_movie_raw() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"genres": 53,
"id": 53,
"overview": 53,
"poster": 53,
"release_date": 53,
"title": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
);
let (settings, code) = index.settings().await;
@@ -670,24 +623,9 @@ async fn import_dump_v2_movie_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"genres": 53,
"id": 53,
"overview": 53,
"poster": 53,
"release_date": 53,
"title": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
);
let (settings, code) = index.settings().await;
@@ -835,24 +773,9 @@ async fn import_dump_v2_rubygems_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"description": 53,
"id": 53,
"name": 53,
"summary": 53,
"total_downloads": 53,
"version": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
);
let (settings, code) = index.settings().await;
@@ -997,24 +920,9 @@ async fn import_dump_v3_movie_raw() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"genres": 53,
"id": 53,
"overview": 53,
"poster": 53,
"release_date": 53,
"title": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
);
let (settings, code) = index.settings().await;
@@ -1152,24 +1060,9 @@ async fn import_dump_v3_movie_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"genres": 53,
"id": 53,
"overview": 53,
"poster": 53,
"release_date": 53,
"title": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
);
let (settings, code) = index.settings().await;
@@ -1317,24 +1210,9 @@ async fn import_dump_v3_rubygems_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"description": 53,
"id": 53,
"name": 53,
"summary": 53,
"total_downloads": 53,
"version": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
);
let (settings, code) = index.settings().await;
@@ -1479,24 +1357,9 @@ async fn import_dump_v4_movie_raw() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"genres": 53,
"id": 53,
"overview": 53,
"poster": 53,
"release_date": 53,
"title": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
);
let (settings, code) = index.settings().await;
@@ -1634,24 +1497,9 @@ async fn import_dump_v4_movie_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"genres": 53,
"id": 53,
"overview": 53,
"poster": 53,
"release_date": 53,
"title": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
);
let (settings, code) = index.settings().await;
@@ -1799,24 +1647,9 @@ async fn import_dump_v4_rubygems_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 53,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"description": 53,
"id": 53,
"name": 53,
"summary": 53,
"total_downloads": 53,
"version": 53
}
}
"###
assert_eq!(
stats,
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
);
let (settings, code) = index.settings().await;
@@ -1965,35 +1798,33 @@ async fn import_dump_v5() {
server.wait_task(task["uid"].as_u64().unwrap()).await;
}
let expected_stats = json!({
"numberOfDocuments": 10,
"isIndexing": false,
"fieldDistribution": {
"cast": 10,
"director": 10,
"genres": 10,
"id": 10,
"overview": 10,
"popularity": 10,
"poster_path": 10,
"producer": 10,
"production_companies": 10,
"release_date": 10,
"tagline": 10,
"title": 10,
"vote_average": 10,
"vote_count": 10
}
});
let index1 = server.index("test");
let index2 = server.index("test2");
let (stats, code) = index1.stats().await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 10,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"cast": 10,
"director": 10,
"genres": 10,
"id": 10,
"overview": 10,
"popularity": 10,
"poster_path": 10,
"producer": 10,
"production_companies": 10,
"release_date": 10,
"tagline": 10,
"title": 10,
"vote_average": 10,
"vote_count": 10
}
}
"###);
assert_eq!(stats, expected_stats);
let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
@@ -2004,32 +1835,7 @@ async fn import_dump_v5() {
let (stats, code) = index2.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
@r###"
{
"numberOfDocuments": 10,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"cast": 10,
"director": 10,
"genres": 10,
"id": 10,
"overview": 10,
"popularity": 10,
"poster_path": 10,
"producer": 10,
"production_companies": 10,
"release_date": 10,
"tagline": 10,
"title": 10,
"vote_average": 10,
"vote_count": 10
}
}
"###);
assert_eq!(stats, expected_stats);
let (keys, code) = server.list_api_keys("").await;
snapshot!(code, @"200 OK");
@@ -2102,9 +1908,7 @@ async fn import_dump_v6_containing_experimental_features() {
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"containsFilter": false
}
"###);
@@ -2188,63 +1992,6 @@ async fn import_dump_v6_containing_experimental_features() {
.await;
}
#[actix_rt::test]
async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
let temp = tempfile::tempdir().unwrap();
let options = Opt {
import_dump: Some(GetDump::TestV6WithBatchesAndEnqueuedTasks.path()),
..default_settings(temp.path())
};
let mut server = Server::new_auth_with_options(options, temp).await;
server.use_api_key("MASTER_KEY");
server.wait_task(2).await.succeeded();
let (tasks, _) = server.tasks().await;
snapshot!(json_string!(tasks, { ".results[1].startedAt" => "[date]", ".results[1].finishedAt" => "[date]", ".results[1].duration" => "[date]" }), name: "tasks");
let (batches, _) = server.batches().await;
snapshot!(json_string!(batches, { ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].duration" => "[date]" }), name: "batches");
let (indexes, code) = server.list_indexes(None, None).await;
assert_eq!(code, 200, "{indexes}");
assert_eq!(indexes["results"].as_array().unwrap().len(), 1);
assert_eq!(indexes["results"][0]["uid"], json!("kefir"));
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
let (response, code) = server.get_features().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
}
"###);
let index = server.index("kefir");
let (documents, _) = index.get_all_documents_raw("").await;
snapshot!(documents, @r#"
{
"results": [
{
"id": 1,
"dog": "kefir"
},
{
"id": 2,
"dog": "intel"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"#);
}
// In this test we must generate the dump ourselves to ensure the
// `user provided` vectors are well set
#[actix_rt::test]
@@ -2322,9 +2069,7 @@ async fn generate_and_import_dump_containing_vectors() {
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"containsFilter": false
}
"###);

View File

@@ -1,78 +0,0 @@
---
source: crates/meilisearch/tests/dumps/mod.rs
snapshot_kind: text
---
{
"results": [
{
"uid": 2,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
},
{
"uid": 1,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.144827890S",
"startedAt": "2025-02-04T10:15:21.275640274Z",
"finishedAt": "2025-02-04T10:15:21.420468164Z"
},
{
"uid": 0,
"progress": null,
"details": {},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"indexCreation": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.032902186S",
"startedAt": "2025-02-04T10:14:43.559526162Z",
"finishedAt": "2025-02-04T10:14:43.592428348Z"
}
],
"total": 3,
"limit": 20,
"from": 2,
"next": null
}

View File

@@ -1,78 +0,0 @@
---
source: crates/meilisearch/tests/dumps/mod.rs
snapshot_kind: text
---
{
"results": [
{
"uid": 3,
"batchUid": null,
"indexUid": null,
"status": "succeeded",
"type": "dumpCreation",
"canceledBy": null,
"details": {
"dumpUid": null
},
"error": null,
"duration": "PT0.000629059S",
"enqueuedAt": "2025-02-04T10:22:31.318175268Z",
"startedAt": "2025-02-04T10:22:31.331701375Z",
"finishedAt": "2025-02-04T10:22:31.332330434Z"
},
{
"uid": 2,
"batchUid": 2,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[date]",
"enqueuedAt": "2025-02-04T10:15:49.212484063Z",
"startedAt": "[date]",
"finishedAt": "[date]"
},
{
"uid": 1,
"batchUid": null,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.144827890S",
"enqueuedAt": "2025-02-04T10:15:21.258630973Z",
"startedAt": "2025-02-04T10:15:21.275640274Z",
"finishedAt": "2025-02-04T10:15:21.420468164Z"
},
{
"uid": 0,
"batchUid": null,
"indexUid": "kefir",
"status": "succeeded",
"type": "indexCreation",
"canceledBy": null,
"details": {
"primaryKey": null
},
"error": null,
"duration": "PT0.032902186S",
"enqueuedAt": "2025-02-04T10:14:43.550379968Z",
"startedAt": "2025-02-04T10:14:43.559526162Z",
"finishedAt": "2025-02-04T10:14:43.592428348Z"
}
],
"total": 4,
"limit": 20,
"from": 3,
"next": null
}

View File

@@ -21,9 +21,7 @@ async fn experimental_features() {
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"containsFilter": false
}
"###);
@@ -35,9 +33,7 @@ async fn experimental_features() {
"metrics": true,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"containsFilter": false
}
"###);
@@ -49,9 +45,7 @@ async fn experimental_features() {
"metrics": true,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"containsFilter": false
}
"###);
@@ -64,9 +58,7 @@ async fn experimental_features() {
"metrics": true,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"containsFilter": false
}
"###);
@@ -79,9 +71,7 @@ async fn experimental_features() {
"metrics": true,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"containsFilter": false
}
"###);
}
@@ -101,9 +91,7 @@ async fn experimental_feature_metrics() {
"metrics": true,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"containsFilter": false
}
"###);
@@ -158,7 +146,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`",
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@@ -7,7 +7,6 @@ mod dumps;
mod features;
mod index;
mod logs;
mod network;
mod search;
mod settings;
mod similar;
@@ -15,7 +14,6 @@ mod snapshot;
mod stats;
mod swap_indexes;
mod tasks;
mod upgrade;
mod vector;
// Tests are isolated by features in different modules to allow better readability, test

View File

@@ -1,606 +0,0 @@
use serde_json::Value::Null;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn error_network_not_enabled() {
let server = Server::new().await;
let (response, code) = server.get_network().await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Using the /network route requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.set_network(json!({"self": "myself"})).await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Using the /network route requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}
#[actix_rt::test]
async fn errors_on_param() {
let server = Server::new().await;
let (response, code) = server.set_features(json!({"network": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#);
// non-existing param
let (response, code) = server.set_network(json!({"selfie": "myself"})).await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `selfie`: expected one of `remotes`, `self`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// self not a string
let (response, code) = server.set_network(json!({"self": 42})).await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Invalid value type at `.self`: expected a string, but found a positive integer: `42`",
"code": "invalid_network_self",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_network_self"
}
"###);
// remotes not an object
let (response, code) = server.set_network(json!({"remotes": 42})).await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Invalid value type at `.remotes`: expected an object, but found a positive integer: `42`",
"code": "invalid_network_remotes",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_network_remotes"
}
"###);
// new remote without url
let (response, code) = server
.set_network(json!({"remotes": {
"new": {
"searchApiKey": "http://localhost:7700"
}
}}))
.await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Missing field `.remotes.new.url`",
"code": "missing_network_url",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_network_url"
}
"###);
// remote with url not a string
let (response, code) = server
.set_network(json!({"remotes": {
"new": {
"url": 7700
}
}}))
.await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Invalid value type at `.remotes.new.url`: expected a string, but found a positive integer: `7700`",
"code": "invalid_network_url",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_network_url"
}
"###);
// remote with non-existing param
let (response, code) = server
.set_network(json!({"remotes": {
"new": {
"url": "http://localhost:7700",
"doggo": "Intel the Beagle"
}
}}))
.await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`",
"code": "invalid_network_remotes",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_network_remotes"
}
"###);
// remote with non-string searchApiKey
let (response, code) = server
.set_network(json!({"remotes": {
"new": {
"url": "http://localhost:7700",
"searchApiKey": 1204664602099962445u64,
}
}}))
.await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Invalid value type at `.remotes.new.searchApiKey`: expected a string, but found a positive integer: `1204664602099962445`",
"code": "invalid_network_search_api_key",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_network_search_api_key"
}
"###);
// setting `null` on URL a posteriori
let (response, code) = server
.set_network(json!({"remotes": {
"kefir": {
"url": "http://localhost:7700",
}
}}))
.await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": null,
"remotes": {
"kefir": {
"url": "http://localhost:7700",
"searchApiKey": null
}
}
}
"###);
let (response, code) = server
.set_network(json!({"remotes": {
"kefir": {
"url": Null,
}
}}))
.await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Field `.remotes.kefir.url` cannot be set to `null`",
"code": "invalid_network_url",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_network_url"
}
"###);
}
#[actix_rt::test]
async fn auth() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
let (response, code) = server.set_features(json!({"network": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#);
let (get_network_key, code) = server
.add_api_key(json!({
"actions": ["network.get"],
"indexes": ["*"],
"expiresAt": serde_json::Value::Null
}))
.await;
meili_snap::snapshot!(code, @"201 Created");
let get_network_key = get_network_key["key"].clone();
let (update_network_key, code) = server
.add_api_key(json!({
"actions": ["network.update"],
"indexes": ["*"],
"expiresAt": serde_json::Value::Null
}))
.await;
meili_snap::snapshot!(code, @"201 Created");
let update_network_key = update_network_key["key"].clone();
let (search_api_key, code) = server
.add_api_key(json!({
"actions": ["search"],
"indexes": ["*"],
"expiresAt": serde_json::Value::Null
}))
.await;
meili_snap::snapshot!(code, @"201 Created");
let search_api_key = search_api_key["key"].clone();
// try with master key
let (response, code) = server
.set_network(json!({
"self": "master"
}))
.await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "master",
"remotes": {}
}
"###);
let (response, code) = server.get_network().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "master",
"remotes": {}
}
"###);
// try get with get permission
server.use_api_key(get_network_key.as_str().unwrap());
let (response, code) = server.get_network().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "master",
"remotes": {}
}
"###);
// try update with update permission
server.use_api_key(update_network_key.as_str().unwrap());
let (response, code) = server
.set_network(json!({
"self": "api_key"
}))
.await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "api_key",
"remotes": {}
}
"###);
// try with the other's permission
let (response, code) = server.get_network().await;
meili_snap::snapshot!(code, @"403 Forbidden");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
server.use_api_key(get_network_key.as_str().unwrap());
let (response, code) = server
.set_network(json!({
"self": "get_api_key"
}))
.await;
meili_snap::snapshot!(code, @"403 Forbidden");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
// try either with bad permission
server.use_api_key(search_api_key.as_str().unwrap());
let (response, code) = server.get_network().await;
meili_snap::snapshot!(code, @"403 Forbidden");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
let (response, code) = server
.set_network(json!({
"self": "get_api_key"
}))
.await;
meili_snap::snapshot!(code, @"403 Forbidden");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "The provided API key is invalid.",
"code": "invalid_api_key",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#invalid_api_key"
}
"###);
}
#[actix_rt::test]
async fn get_and_set_network() {
let server = Server::new().await;
let (response, code) = server.set_features(json!({"network": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#);
let (response, code) = server.get_network().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": null,
"remotes": {}
}
"###);
// adding self
let (response, code) = server.set_network(json!({"self": "myself"})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "myself",
"remotes": {}
}
"###);
// adding remotes
let (response, code) = server
.set_network(json!({"remotes": {
"myself": {
"url": "http://localhost:7700"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "foo"
}
}}))
.await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "myself",
"remotes": {
"myself": {
"url": "http://localhost:7700",
"searchApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "foo"
}
}
}
"###);
// partially updating one remote
let (response, code) = server
.set_network(json!({"remotes": {
"thy": {
"searchApiKey": "bar"
}
}}))
.await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "myself",
"remotes": {
"myself": {
"url": "http://localhost:7700",
"searchApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
}
"###);
// adding one remote
let (response, code) = server
.set_network(json!({"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
}
}}))
.await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "myself",
"remotes": {
"myself": {
"url": "http://localhost:7700",
"searchApiKey": null
},
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
}
"###);
// deleting one remote
let (response, code) = server
.set_network(json!({"remotes": {
"myself": Null,
}}))
.await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "myself",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
}
"###);
// removing self
let (response, code) = server.set_network(json!({"self": Null})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": null,
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
}
"###);
// setting self again
let (response, code) = server.set_network(json!({"self": "thy"})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
}
"###);
// doing nothing
let (response, code) = server.set_network(json!({})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
}
"###);
// still doing nothing
let (response, code) = server.set_network(json!({"remotes": {}})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
}
"###);
// good time to check GET
let (response, code) = server.get_network().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
}
"###);
// deleting everything
let (response, code) = server
.set_network(json!({
"remotes": Null,
}))
.await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {}
}
"###);
}

View File

@@ -1524,57 +1524,3 @@ async fn change_attributes_settings() {
)
.await;
}
/// Modifying facets with different casing should work correctly
#[actix_rt::test]
async fn change_facet_casing() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index
.update_settings(json!({
"filterableAttributes": ["dog"],
}))
.await;
assert_eq!("202", code.as_str(), "{:?}", response);
index.wait_task(response.uid()).await;
let (response, _code) = index
.add_documents(
json!([
{
"id": 1,
"dog": "Bouvier Bernois"
}
]),
None,
)
.await;
index.wait_task(response.uid()).await;
let (response, _code) = index
.add_documents(
json!([
{
"id": 1,
"dog": "bouvier bernois"
}
]),
None,
)
.await;
index.wait_task(response.uid()).await;
index
.search(json!({ "facets": ["dog"] }), |response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["facetDistribution"]), @r###"
{
"dog": {
"bouvier bernois": 1
}
}
"###);
})
.await;
}

View File

@@ -5,8 +5,6 @@ use crate::common::Server;
use crate::json;
use crate::search::{SCORE_DOCUMENTS, VECTOR_DOCUMENTS};
mod proxy;
#[actix_rt::test]
async fn search_empty_list() {
let server = Server::new().await;

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,3 @@
use meili_snap::{json_string, snapshot};
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
@@ -75,253 +74,3 @@ async fn stats() {
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1);
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1);
}
#[actix_rt::test]
async fn add_remove_embeddings() {
let server = Server::new().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
},
"handcrafted": {
"source": "userProvided",
"dimensions": 3,
},
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
// 2 embedded documents for 5 embeddings in total
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 2,
"isIndexing": false,
"numberOfEmbeddings": 5,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"id": 2,
"name": 2
}
}
"###);
// 2 embedded documents for 3 embeddings in total
let documents = json!([
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
]);
let (response, code) = index.update_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 2,
"isIndexing": false,
"numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"id": 2,
"name": 2
}
}
"###);
// 2 embedded documents for 2 embeddings in total
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": null, "handcrafted": [0, 0, 0] }},
]);
let (response, code) = index.update_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 2,
"isIndexing": false,
"numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"id": 2,
"name": 2
}
}
"###);
// 1 embedded documents for 2 embeddings in total
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
{"id": 1, "name": "echo", "_vectors": { "manual": null, "handcrafted": null }},
]);
let (response, code) = index.update_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 2,
"isIndexing": false,
"numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 1,
"fieldDistribution": {
"id": 2,
"name": 2
}
}
"###);
}
#[actix_rt::test]
async fn add_remove_embedded_documents() {
let server = Server::new().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
},
"handcrafted": {
"source": "userProvided",
"dimensions": 3,
},
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
// 2 embedded documents for 5 embeddings in total
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 2,
"isIndexing": false,
"numberOfEmbeddings": 5,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"id": 2,
"name": 2
}
}
"###);
// delete one embedded document, remaining 1 embedded documents for 3 embeddings in total
let (response, code) = index.delete_document(0).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 1,
"isIndexing": false,
"numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 1,
"fieldDistribution": {
"id": 1,
"name": 1
}
}
"###);
}
#[actix_rt::test]
async fn update_embedder_settings() {
let server = Server::new().await;
let index = server.index("doggo");
// 2 embedded documents for 3 embeddings in total
// but no embedders are added in the settings yet so we expect 0 embedded documents for 0 embeddings in total
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
]);
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 2,
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"fieldDistribution": {
"id": 2,
"name": 2
}
}
"###);
// add embedders to the settings
// 2 embedded documents for 3 embeddings in total
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
},
"handcrafted": {
"source": "userProvided",
"dimensions": 3,
},
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 2,
"isIndexing": false,
"numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"id": 2,
"name": 2
}
}
"###);
}

View File

@@ -95,36 +95,36 @@ async fn task_bad_types() {
let (response, code) = server.tasks_filter("types=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r#"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `upgradeDatabase`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
}
"#);
"###);
let (response, code) = server.cancel_tasks("types=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r#"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `upgradeDatabase`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
}
"#);
"###);
let (response, code) = server.delete_tasks("types=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r#"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `upgradeDatabase`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
}
"#);
"###);
}
#[actix_rt::test]

Some files were not shown because too many files have changed in this diff Show More