mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-03 01:16:27 +00:00
Move crates under a sub folder to clean up the code
This commit is contained in:
14
crates/dump/src/reader/v2/errors.rs
Normal file
14
crates/dump/src/reader/v2/errors.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use http::StatusCode;
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResponseError {
|
||||
#[serde(skip)]
|
||||
pub code: StatusCode,
|
||||
pub message: String,
|
||||
pub error_code: String,
|
||||
pub error_type: String,
|
||||
pub error_link: String,
|
||||
}
|
||||
18
crates/dump/src/reader/v2/meta.rs
Normal file
18
crates/dump/src/reader/v2/meta.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
use serde::Deserialize;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::Settings;
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
pub struct IndexUuid {
|
||||
pub uid: String,
|
||||
pub uuid: Uuid,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
pub struct DumpMeta {
|
||||
pub settings: Settings<super::Unchecked>,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
416
crates/dump/src/reader/v2/mod.rs
Normal file
416
crates/dump/src/reader/v2/mod.rs
Normal file
@@ -0,0 +1,416 @@
|
||||
//! ```text
|
||||
//! .
|
||||
//! ├── indexes
|
||||
//! │  ├── index-40d14c5f-37ae-4873-9d51-b69e014a0d30
|
||||
//! │  │  ├── documents.jsonl
|
||||
//! │  │  └── meta.json
|
||||
//! │  ├── index-88202369-4524-4410-9b3d-3e924c867fec
|
||||
//! │  │  ├── documents.jsonl
|
||||
//! │  │  └── meta.json
|
||||
//! │  ├── index-b7f2d03b-bf9b-40d9-a25b-94dc5ec60c32
|
||||
//! │  │  ├── documents.jsonl
|
||||
//! │  │  └── meta.json
|
||||
//! │  └── index-dc9070b3-572d-4f30-ab45-d4903ab71708
|
||||
//! │  ├── documents.jsonl
|
||||
//! │  └── meta.json
|
||||
//! ├── index_uuids
|
||||
//! │  └── data.jsonl
|
||||
//! ├── metadata.json
|
||||
//! └── updates
|
||||
//! ├── data.jsonl
|
||||
//! └── update_files
|
||||
//! └── update_202573df-718b-4d80-9a65-2ee397c23dc3
|
||||
//! ```
|
||||
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::path::Path;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
pub mod errors;
|
||||
pub mod meta;
|
||||
pub mod settings;
|
||||
pub mod updates;
|
||||
|
||||
use self::meta::{DumpMeta, IndexUuid};
|
||||
use super::compat::v2_to_v3::CompatV2ToV3;
|
||||
use super::Document;
|
||||
use crate::{IndexMetadata, Result, Version};
|
||||
|
||||
pub type Settings<T> = settings::Settings<T>;
|
||||
pub type Setting<T> = settings::Setting<T>;
|
||||
pub type Checked = settings::Checked;
|
||||
pub type Unchecked = settings::Unchecked;
|
||||
|
||||
pub type Task = updates::UpdateEntry;
|
||||
pub type Kind = updates::UpdateMeta;
|
||||
|
||||
// everything related to the errors
|
||||
pub type ResponseError = errors::ResponseError;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Metadata {
|
||||
db_version: String,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
dump_date: OffsetDateTime,
|
||||
}
|
||||
|
||||
pub struct V2Reader {
|
||||
dump: TempDir,
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
pub index_uuid: Vec<IndexUuid>,
|
||||
}
|
||||
|
||||
impl V2Reader {
|
||||
pub fn open(dump: TempDir) -> Result<Self> {
|
||||
let meta_file = fs::read(dump.path().join("metadata.json"))?;
|
||||
let metadata = serde_json::from_reader(&*meta_file)?;
|
||||
let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?;
|
||||
let index_uuid = BufReader::new(index_uuid);
|
||||
let index_uuid = index_uuid
|
||||
.lines()
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(V2Reader {
|
||||
metadata,
|
||||
tasks: BufReader::new(
|
||||
File::open(dump.path().join("updates").join("data.jsonl")).unwrap(),
|
||||
),
|
||||
index_uuid,
|
||||
dump,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn to_v3(self) -> CompatV2ToV3 {
|
||||
CompatV2ToV3::new(self)
|
||||
}
|
||||
|
||||
pub fn index_uuid(&self) -> Vec<IndexUuid> {
|
||||
self.index_uuid.clone()
|
||||
}
|
||||
|
||||
pub fn version(&self) -> Version {
|
||||
Version::V2
|
||||
}
|
||||
|
||||
pub fn date(&self) -> Option<OffsetDateTime> {
|
||||
Some(self.metadata.dump_date)
|
||||
}
|
||||
|
||||
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
|
||||
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
|
||||
V2IndexReader::new(
|
||||
&self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
|
||||
index,
|
||||
BufReader::new(
|
||||
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
|
||||
),
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn tasks(&mut self) -> Box<dyn Iterator<Item = Result<(Task, Option<UpdateFile>)>> + '_> {
|
||||
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
if !task.is_finished() {
|
||||
if let Some(uuid) = task.get_content_uuid() {
|
||||
let update_file_path = self
|
||||
.dump
|
||||
.path()
|
||||
.join("updates")
|
||||
.join("update_files")
|
||||
.join(format!("update_{}", uuid));
|
||||
Ok((task, Some(UpdateFile::new(&update_file_path)?)))
|
||||
} else {
|
||||
Ok((task, None))
|
||||
}
|
||||
} else {
|
||||
Ok((task, None))
|
||||
}
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct V2IndexReader {
|
||||
metadata: IndexMetadata,
|
||||
settings: Settings<Checked>,
|
||||
|
||||
documents: BufReader<File>,
|
||||
}
|
||||
|
||||
impl V2IndexReader {
|
||||
pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
|
||||
let meta = File::open(path.join("meta.json"))?;
|
||||
let meta: DumpMeta = serde_json::from_reader(meta)?;
|
||||
|
||||
let mut created_at = None;
|
||||
let mut updated_at = None;
|
||||
|
||||
for line in tasks.lines() {
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let new_created_at = match task.update.meta() {
|
||||
Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
|
||||
_ => None,
|
||||
};
|
||||
let new_updated_at = task.update.finished_at();
|
||||
|
||||
if created_at.is_none() || created_at > new_created_at {
|
||||
created_at = new_created_at;
|
||||
}
|
||||
|
||||
if updated_at.is_none() || updated_at < new_updated_at {
|
||||
updated_at = new_updated_at;
|
||||
}
|
||||
}
|
||||
|
||||
let current_time = OffsetDateTime::now_utc();
|
||||
|
||||
let metadata = IndexMetadata {
|
||||
uid: index_uuid.uid.clone(),
|
||||
primary_key: meta.primary_key,
|
||||
created_at: created_at.unwrap_or(current_time),
|
||||
updated_at: updated_at.unwrap_or(current_time),
|
||||
};
|
||||
|
||||
let ret = V2IndexReader {
|
||||
metadata,
|
||||
settings: meta.settings.check(),
|
||||
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
|
||||
};
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub fn metadata(&self) -> &IndexMetadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
|
||||
Ok((&mut self.documents)
|
||||
.lines()
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
Ok(self.settings.clone())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
documents: Vec<Document>,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl UpdateFile {
|
||||
fn new(path: &Path) -> Result<Self> {
|
||||
let reader = BufReader::new(File::open(path)?);
|
||||
Ok(UpdateFile { documents: serde_json::from_reader(reader)?, index: 0 })
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for UpdateFile {
|
||||
type Item = Result<Document>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.index += 1;
|
||||
self.documents.get(self.index - 1).cloned().map(Ok)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use flate2::bufread::GzDecoder;
|
||||
use meili_snap::insta;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn read_dump_v2() {
|
||||
let dump = File::open("tests/assets/v2.dump").unwrap();
|
||||
let dir = TempDir::new().unwrap();
|
||||
let mut dump = BufReader::new(dump);
|
||||
let gz = GzDecoder::new(&mut dump);
|
||||
let mut archive = tar::Archive::new(gz);
|
||||
archive.unpack(dir.path()).unwrap();
|
||||
|
||||
let mut dump = V2Reader::open(dir).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ec5fc0a14bf735ad4e361d5aa8a89ac6");
|
||||
assert_eq!(update_files.len(), 9);
|
||||
assert!(update_files[0].is_some()); // the enqueued document addition
|
||||
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
|
||||
|
||||
let update_file = update_files.remove(0).unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d");
|
||||
|
||||
// indexes
|
||||
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
// the index are not ordered in any way by default
|
||||
indexes.sort_by_key(|index| index.metadata().uid.to_string());
|
||||
|
||||
let mut products = indexes.pop().unwrap();
|
||||
let mut movies2 = indexes.pop().unwrap();
|
||||
let mut movies = indexes.pop().unwrap();
|
||||
let mut spells = indexes.pop().unwrap();
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "2022-10-09T20:27:22.688964637Z",
|
||||
"updatedAt": "2022-10-09T20:27:23.951017769Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
insta::assert_json_snapshot!(products.settings().unwrap());
|
||||
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
assert_eq!(documents.len(), 10);
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "2022-10-09T20:27:22.197788495Z",
|
||||
"updatedAt": "2022-10-09T20:28:01.93111053Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
insta::assert_json_snapshot!(movies.settings().unwrap());
|
||||
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
assert_eq!(documents.len(), 110);
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
|
||||
|
||||
// movies2
|
||||
insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
||||
{
|
||||
"uid": "movies_2",
|
||||
"primaryKey": null,
|
||||
"createdAt": "[now]",
|
||||
"updatedAt": "[now]"
|
||||
}
|
||||
"###);
|
||||
|
||||
insta::assert_json_snapshot!(movies2.settings().unwrap());
|
||||
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
assert_eq!(documents.len(), 0);
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "2022-10-09T20:27:24.242683494Z",
|
||||
"updatedAt": "2022-10-09T20:27:24.312809641Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
insta::assert_json_snapshot!(spells.settings().unwrap());
|
||||
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
assert_eq!(documents.len(), 10);
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_dump_v2_from_meilisearch_v0_22_0_issue_3435() {
|
||||
let dump = File::open("tests/assets/v2-v0.22.0.dump").unwrap();
|
||||
let dir = TempDir::new().unwrap();
|
||||
let mut dump = BufReader::new(dump);
|
||||
let gz = GzDecoder::new(&mut dump);
|
||||
let mut archive = tar::Archive::new(gz);
|
||||
archive.unpack(dir.path()).unwrap();
|
||||
|
||||
let mut dump = V2Reader::open(dir).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"aca8ba13046272664eb3ea2da3031633");
|
||||
assert_eq!(update_files.len(), 8);
|
||||
assert!(update_files[0..].iter().all(|u| u.is_none())); // everything has already been processed
|
||||
|
||||
// indexes
|
||||
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
// the index are not ordered in any way by default
|
||||
indexes.sort_by_key(|index| index.metadata().uid.to_string());
|
||||
|
||||
let mut products = indexes.pop().unwrap();
|
||||
let mut movies = indexes.pop().unwrap();
|
||||
let mut spells = indexes.pop().unwrap();
|
||||
assert!(indexes.is_empty());
|
||||
|
||||
// products
|
||||
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||
{
|
||||
"uid": "products",
|
||||
"primaryKey": "sku",
|
||||
"createdAt": "2023-01-30T16:25:56.595257Z",
|
||||
"updatedAt": "2023-01-30T16:25:58.70348Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
insta::assert_json_snapshot!(products.settings().unwrap());
|
||||
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
assert_eq!(documents.len(), 10);
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||
|
||||
// movies
|
||||
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||
{
|
||||
"uid": "movies",
|
||||
"primaryKey": "id",
|
||||
"createdAt": "2023-01-30T16:25:56.192178Z",
|
||||
"updatedAt": "2023-01-30T16:25:56.455714Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
insta::assert_json_snapshot!(movies.settings().unwrap());
|
||||
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
assert_eq!(documents.len(), 10);
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
|
||||
|
||||
// spells
|
||||
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||
{
|
||||
"uid": "dnd_spells",
|
||||
"primaryKey": "index",
|
||||
"createdAt": "2023-01-30T16:25:58.876405Z",
|
||||
"updatedAt": "2023-01-30T16:25:59.079906Z"
|
||||
}
|
||||
"###);
|
||||
|
||||
insta::assert_json_snapshot!(spells.settings().unwrap());
|
||||
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
assert_eq!(documents.len(), 10);
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
|
||||
}
|
||||
}
|
||||
269
crates/dump/src/reader/v2/settings.rs
Normal file
269
crates/dump/src/reader/v2/settings.rs
Normal file
@@ -0,0 +1,269 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::fmt;
|
||||
use std::marker::PhantomData;
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Deserializer};
|
||||
|
||||
#[cfg(test)]
|
||||
fn serialize_with_wildcard<S>(
|
||||
field: &Setting<Vec<String>>,
|
||||
s: S,
|
||||
) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
use serde::Serialize;
|
||||
|
||||
let wildcard = vec!["*".to_string()];
|
||||
match field {
|
||||
Setting::Set(value) => Some(value),
|
||||
Setting::Reset => Some(&wildcard),
|
||||
Setting::NotSet => None,
|
||||
}
|
||||
.serialize(s)
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
pub struct Checked;
|
||||
|
||||
#[derive(Clone, Default, Debug, Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
pub struct Unchecked;
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: Deserialize<'static>"))]
|
||||
pub struct Settings<T> {
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
pub displayed_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
pub searchable_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub ranking_rules: Setting<Vec<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub distinct_attribute: Setting<String>,
|
||||
|
||||
#[serde(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl Settings<Unchecked> {
|
||||
pub fn check(self) -> Settings<Checked> {
|
||||
let displayed_attributes = match self.displayed_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
let searchable_attributes = match self.searchable_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes: self.filterable_attributes,
|
||||
sortable_attributes: self.sortable_attributes,
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Setting<T> {
|
||||
Set(T),
|
||||
Reset,
|
||||
NotSet,
|
||||
}
|
||||
|
||||
impl<T> Default for Setting<T> {
|
||||
fn default() -> Self {
|
||||
Self::NotSet
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Setting<T> {
|
||||
pub const fn is_not_set(&self) -> bool {
|
||||
matches!(self, Self::NotSet)
|
||||
}
|
||||
|
||||
pub fn map<A>(self, f: fn(T) -> A) -> Setting<A> {
|
||||
match self {
|
||||
Setting::Set(a) => Setting::Set(f(a)),
|
||||
Setting::Reset => Setting::Reset,
|
||||
Setting::NotSet => Setting::NotSet,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<T: serde::Serialize> serde::Serialize for Setting<T> {
|
||||
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
match self {
|
||||
Self::Set(value) => Some(value),
|
||||
// Usually not_set isn't serialized by setting skip_serializing_if field attribute
|
||||
Self::NotSet | Self::Reset => None,
|
||||
}
|
||||
.serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
|
||||
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
Deserialize::deserialize(deserializer).map(|x| match x {
|
||||
Some(x) => Self::Set(x),
|
||||
None => Self::Reset, // Reset is forced by sending null value
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Criterion {
|
||||
/// Sorted by decreasing number of matched query terms.
|
||||
/// Query words at the front of an attribute is considered better than if it was at the back.
|
||||
Words,
|
||||
/// Sorted by increasing number of typos.
|
||||
Typo,
|
||||
/// Sorted by increasing distance between matched query terms.
|
||||
Proximity,
|
||||
/// Documents with quey words contained in more important
|
||||
/// attributes are considered better.
|
||||
Attribute,
|
||||
/// Dynamically sort at query time the documents. None, one or multiple Asc/Desc sortable
|
||||
/// attributes can be used in place of this criterion at query time.
|
||||
Sort,
|
||||
/// Sorted by the similarity of the matched words with the query words.
|
||||
Exactness,
|
||||
/// Sorted by the increasing value of the field specified.
|
||||
Asc(String),
|
||||
/// Sorted by the decreasing value of the field specified.
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl Criterion {
|
||||
/// Returns the field name parameter of this criterion.
|
||||
pub fn field_name(&self) -> Option<&str> {
|
||||
match self {
|
||||
Criterion::Asc(name) | Criterion::Desc(name) => Some(name),
|
||||
_otherwise => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Criterion {
|
||||
// since we're not going to show the custom error message we can override the
|
||||
// error type.
|
||||
type Err = ();
|
||||
|
||||
fn from_str(text: &str) -> Result<Criterion, Self::Err> {
|
||||
match text {
|
||||
"words" => Ok(Criterion::Words),
|
||||
"typo" => Ok(Criterion::Typo),
|
||||
"proximity" => Ok(Criterion::Proximity),
|
||||
"attribute" => Ok(Criterion::Attribute),
|
||||
"sort" => Ok(Criterion::Sort),
|
||||
"exactness" => Ok(Criterion::Exactness),
|
||||
text => match AscDesc::from_str(text) {
|
||||
Ok(AscDesc::Asc(field)) => Ok(Criterion::Asc(field)),
|
||||
Ok(AscDesc::Desc(field)) => Ok(Criterion::Desc(field)),
|
||||
Err(_) => Err(()),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum AscDesc {
|
||||
Asc(String),
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl FromStr for AscDesc {
|
||||
type Err = ();
|
||||
|
||||
// since we don't know if this comes from the old or new syntax we need to check
|
||||
// for both syntax.
|
||||
// WARN: this code doesn't come from the original meilisearch v0.22.0 but was
|
||||
// written specifically to be able to import the dump of meilisearch v0.21.0 AND
|
||||
// meilisearch v0.22.0.
|
||||
fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
|
||||
if let Some((field_name, asc_desc)) = text.rsplit_once(':') {
|
||||
match asc_desc {
|
||||
"asc" => Ok(AscDesc::Asc(field_name.to_string())),
|
||||
"desc" => Ok(AscDesc::Desc(field_name.to_string())),
|
||||
_ => Err(()),
|
||||
}
|
||||
} else if text.starts_with("asc(") && text.ends_with(')') {
|
||||
Ok(AscDesc::Asc(
|
||||
text.strip_prefix("asc(").unwrap().strip_suffix(')').unwrap().to_string(),
|
||||
))
|
||||
} else if text.starts_with("desc(") && text.ends_with(')') {
|
||||
Ok(AscDesc::Desc(
|
||||
text.strip_prefix("desc(").unwrap().strip_suffix(')').unwrap().to_string(),
|
||||
))
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Criterion {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use Criterion::*;
|
||||
|
||||
match self {
|
||||
Words => f.write_str("words"),
|
||||
Typo => f.write_str("typo"),
|
||||
Proximity => f.write_str("proximity"),
|
||||
Attribute => f.write_str("attribute"),
|
||||
Sort => f.write_str("sort"),
|
||||
Exactness => f.write_str("exactness"),
|
||||
Asc(attr) => write!(f, "{}:asc", attr),
|
||||
Desc(attr) => write!(f, "{}:desc", attr),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
---
|
||||
source: dump/src/reader/v2/mod.rs
|
||||
expression: movies2.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
---
|
||||
source: dump/src/reader/v2/mod.rs
|
||||
expression: spells.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
---
|
||||
source: dump/src/reader/v2/mod.rs
|
||||
expression: products.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {
|
||||
"android": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"iphone": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"phone": [
|
||||
"android",
|
||||
"iphone",
|
||||
"smartphone"
|
||||
]
|
||||
},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
---
|
||||
source: dump/src/reader/v2/mod.rs
|
||||
expression: movies.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness",
|
||||
"asc(release_date)"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
---
|
||||
source: dump/src/reader/v2/mod.rs
|
||||
expression: spells.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"sort",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
---
|
||||
source: dump/src/reader/v2/mod.rs
|
||||
expression: products.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"sort",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {
|
||||
"android": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"iphone": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"phone": [
|
||||
"android",
|
||||
"iphone",
|
||||
"smartphone"
|
||||
]
|
||||
},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
---
|
||||
source: dump/src/reader/v2/mod.rs
|
||||
expression: movies.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"release_date"
|
||||
],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness",
|
||||
"release_date:asc"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
240
crates/dump/src/reader/v2/updates.rs
Normal file
240
crates/dump/src/reader/v2/updates.rs
Normal file
@@ -0,0 +1,240 @@
|
||||
use serde::Deserialize;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{ResponseError, Settings, Unchecked};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
pub struct UpdateEntry {
|
||||
pub uuid: Uuid,
|
||||
pub update: UpdateStatus,
|
||||
}
|
||||
|
||||
impl UpdateEntry {
|
||||
pub fn is_finished(&self) -> bool {
|
||||
match self.update {
|
||||
UpdateStatus::Processing(_) | UpdateStatus::Enqueued(_) => false,
|
||||
UpdateStatus::Processed(_) | UpdateStatus::Aborted(_) | UpdateStatus::Failed(_) => true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_content_uuid(&self) -> Option<&Uuid> {
|
||||
match &self.update {
|
||||
UpdateStatus::Enqueued(enqueued) => enqueued.content.as_ref(),
|
||||
UpdateStatus::Processing(processing) => processing.from.content.as_ref(),
|
||||
UpdateStatus::Processed(processed) => processed.from.from.content.as_ref(),
|
||||
UpdateStatus::Aborted(aborted) => aborted.from.content.as_ref(),
|
||||
UpdateStatus::Failed(failed) => failed.from.from.content.as_ref(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
pub enum UpdateResult {
|
||||
DocumentsAddition(DocumentAdditionResult),
|
||||
DocumentDeletion { deleted: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
pub struct DocumentAdditionResult {
|
||||
pub nb_documents: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[non_exhaustive]
|
||||
pub enum IndexDocumentsMethod {
|
||||
/// Replace the previous document with the new one,
|
||||
/// removing all the already known attributes.
|
||||
ReplaceDocuments,
|
||||
|
||||
/// Merge the previous version of the document with the new version,
|
||||
/// replacing old attributes values with the new ones and add the new attributes.
|
||||
UpdateDocuments,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[non_exhaustive]
|
||||
pub enum UpdateFormat {
|
||||
/// The given update is a real **comma separated** CSV with headers on the first line.
|
||||
Csv,
|
||||
/// The given update is a JSON array with documents inside.
|
||||
Json,
|
||||
/// The given update is a JSON stream with a document on each line.
|
||||
JsonStream,
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateMeta {
|
||||
DocumentsAddition {
|
||||
method: IndexDocumentsMethod,
|
||||
format: UpdateFormat,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
ClearDocuments,
|
||||
DeleteDocuments {
|
||||
ids: Vec<String>,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Enqueued {
|
||||
pub update_id: u64,
|
||||
pub meta: UpdateMeta,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
pub content: Option<Uuid>,
|
||||
}
|
||||
|
||||
impl Enqueued {
|
||||
pub fn meta(&self) -> &UpdateMeta {
|
||||
&self.meta
|
||||
}
|
||||
|
||||
pub fn id(&self) -> u64 {
|
||||
self.update_id
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processed {
|
||||
pub success: UpdateResult,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub processed_at: OffsetDateTime,
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
}
|
||||
|
||||
impl Processed {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &UpdateMeta {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processing {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub started_processing_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl Processing {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &UpdateMeta {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Aborted {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub aborted_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl Aborted {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &UpdateMeta {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Failed {
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
pub error: ResponseError,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub failed_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl Failed {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &UpdateMeta {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(tag = "status", rename_all = "camelCase")]
|
||||
pub enum UpdateStatus {
|
||||
Processing(Processing),
|
||||
Enqueued(Enqueued),
|
||||
Processed(Processed),
|
||||
Aborted(Aborted),
|
||||
Failed(Failed),
|
||||
}
|
||||
|
||||
impl UpdateStatus {
|
||||
pub fn id(&self) -> u64 {
|
||||
match self {
|
||||
UpdateStatus::Processing(u) => u.id(),
|
||||
UpdateStatus::Enqueued(u) => u.id(),
|
||||
UpdateStatus::Processed(u) => u.id(),
|
||||
UpdateStatus::Aborted(u) => u.id(),
|
||||
UpdateStatus::Failed(u) => u.id(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &UpdateMeta {
|
||||
match self {
|
||||
UpdateStatus::Processing(u) => u.meta(),
|
||||
UpdateStatus::Enqueued(u) => u.meta(),
|
||||
UpdateStatus::Processed(u) => u.meta(),
|
||||
UpdateStatus::Aborted(u) => u.meta(),
|
||||
UpdateStatus::Failed(u) => u.meta(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn processed(&self) -> Option<&Processed> {
|
||||
match self {
|
||||
UpdateStatus::Processed(p) => Some(p),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finished_at(&self) -> Option<OffsetDateTime> {
|
||||
match self {
|
||||
UpdateStatus::Processing(_) => None,
|
||||
UpdateStatus::Enqueued(_) => None,
|
||||
UpdateStatus::Processed(u) => Some(u.processed_at),
|
||||
UpdateStatus::Aborted(_) => None,
|
||||
UpdateStatus::Failed(u) => Some(u.failed_at),
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user