mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-11 14:16:31 +00:00
Merge branch 'release-v0.30.0' into stable
This commit is contained in:
@@ -1,19 +0,0 @@
|
||||
# Seeds for failure cases proptest has generated in the past. It is
|
||||
# automatically read and these particular cases re-run before any
|
||||
# novel cases are generated.
|
||||
#
|
||||
# It is recommended to check this file in to source control so that
|
||||
# everyone who runs the test benefits from these saved cases.
|
||||
cc 6f3ae3cba934ba3e328e2306218c32f27a46ce2d54a1258b05fef65663208662 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentAddition { content_uuid: 37bc137d-2038-47f0-819f-b133233daadc, merge_strategy: ReplaceDocuments, primary_key: None, documents_count: 0 }, events: [] }
|
||||
cc b726f7d9f44a9216aad302ddba0f04e7108817e741d656a4759aea8562de4d63 # shrinks to task = Task { id: 0, index_uid: IndexUid("_"), content: IndexDeletion, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
|
||||
cc 427ec2dde3260b1ab334207bdc22adef28a5b8532b9902c84b55fd2c017ea7e1 # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: IndexDeletion, events: [] }, index_exists = true, index_op_fails = false, any_int = 0
|
||||
cc c24f3d42f0f36fbdbf4e9d4327e75529b163ac580d63a5934ca05e9b5bd23a65 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: IndexDeletion, events: [] }, index_exists = true, index_op_fails = true, any_int = 0
|
||||
cc 8084e2410801b997533b0bcbad75cd212873cfc2677f26847f68c568ead1604c # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: false }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
|
||||
cc 330085e0200a9a2ddfdd764a03d768aa95c431bcaafbd530c8c949425beed18b # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: None }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0
|
||||
cc c70e901576ef2fb9622e814bdecd11e4747cd70d71a9a6ce771b5b7256a187c0 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: true }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
|
||||
cc 3fe2c38cbc2cca34ecde321472141d386056f0cd332cbf700773657715a382b5 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: UpdateIndex { primary_key: None }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
|
||||
cc c31cf86692968483f1ab08a6a9d4667ccb9635c306998551bf1eb1f135ef0d4b # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: UpdateIndex { primary_key: Some("") }, events: [] }, index_exists = true, index_op_fails = false, any_int = 0
|
||||
cc 3a01c78db082434b8a4f8914abf0d1059d39f4426d16df20d72e1bd7ebb94a6a # shrinks to task = Task { id: 0, index_uid: IndexUid("0"), content: UpdateIndex { primary_key: None }, events: [] }, index_exists = true, index_op_fails = true, any_int = 0
|
||||
cc c450806df3921d1e6fe9b6af93d999e8196d0175b69b64f1810802582421e94a # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
|
||||
cc fb6b98947cbdbdee05ed3c0bf2923aad2c311edc276253642eb43a0c0ec4888a # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0
|
||||
cc 1aa59d8e22484e9915efbb5818e1e1ab684aa61b166dc82130d6221663ba00bf # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentDeletion(Clear), events: [] }, index_exists = true, index_op_fails = false, any_int = 0
|
@@ -1,7 +0,0 @@
|
||||
# Seeds for failure cases proptest has generated in the past. It is
|
||||
# automatically read and these particular cases re-run before any
|
||||
# novel cases are generated.
|
||||
#
|
||||
# It is recommended to check this file in to source control so that
|
||||
# everyone who runs the test benefits from these saved cases.
|
||||
cc 8cbd6c45ce8c5611ec3f2f94fd485f6a8eeccc470fa426e59bdfd4d9e7fce0e1 # shrinks to bytes = []
|
@@ -1,8 +0,0 @@
|
||||
use std::{fs, path::Path};
|
||||
|
||||
/// Copy the `instance-uid` contained in one db to another. Ignore all errors.
|
||||
pub fn copy_user_id(src: &Path, dst: &Path) {
|
||||
if let Ok(user_id) = fs::read_to_string(src.join("instance-uid")) {
|
||||
let _ = fs::write(dst.join("instance-uid"), &user_id);
|
||||
}
|
||||
}
|
@@ -1,26 +0,0 @@
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
|
||||
use tar::{Archive, Builder};
|
||||
|
||||
pub fn to_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let mut f = File::create(dest)?;
|
||||
let gz_encoder = GzEncoder::new(&mut f, Compression::default());
|
||||
let mut tar_encoder = Builder::new(gz_encoder);
|
||||
tar_encoder.append_dir_all(".", src)?;
|
||||
let gz_encoder = tar_encoder.into_inner()?;
|
||||
gz_encoder.finish()?;
|
||||
f.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn from_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let f = File::open(&src)?;
|
||||
let gz = GzDecoder::new(f);
|
||||
let mut ar = Archive::new(gz);
|
||||
create_dir_all(&dest)?;
|
||||
ar.unpack(&dest)?;
|
||||
Ok(())
|
||||
}
|
@@ -1,155 +0,0 @@
|
||||
use std::borrow::Borrow;
|
||||
use std::fmt::{self, Debug, Display};
|
||||
use std::io::{self, BufReader, Read, Seek, Write};
|
||||
|
||||
use either::Either;
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::internal_error;
|
||||
use milli::documents::{DocumentsBatchBuilder, Error};
|
||||
use milli::Object;
|
||||
use serde::Deserialize;
|
||||
|
||||
type Result<T> = std::result::Result<T, DocumentFormatError>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum PayloadType {
|
||||
Ndjson,
|
||||
Json,
|
||||
Csv,
|
||||
}
|
||||
|
||||
impl fmt::Display for PayloadType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
PayloadType::Ndjson => f.write_str("ndjson"),
|
||||
PayloadType::Json => f.write_str("json"),
|
||||
PayloadType::Csv => f.write_str("csv"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DocumentFormatError {
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
MalformedPayload(Error, PayloadType),
|
||||
}
|
||||
|
||||
impl Display for DocumentFormatError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e),
|
||||
Self::MalformedPayload(me, b) => match me.borrow() {
|
||||
Error::Json(se) => {
|
||||
// https://github.com/meilisearch/meilisearch/issues/2107
|
||||
// The user input maybe insanely long. We need to truncate it.
|
||||
let mut serde_msg = se.to_string();
|
||||
let ellipsis = "...";
|
||||
if serde_msg.len() > 100 + ellipsis.len() {
|
||||
serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis);
|
||||
}
|
||||
|
||||
write!(
|
||||
f,
|
||||
"The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.",
|
||||
b, serde_msg
|
||||
)
|
||||
}
|
||||
_ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for DocumentFormatError {}
|
||||
|
||||
impl From<(PayloadType, Error)> for DocumentFormatError {
|
||||
fn from((ty, error): (PayloadType, Error)) -> Self {
|
||||
match error {
|
||||
Error::Io(e) => Self::Internal(Box::new(e)),
|
||||
e => Self::MalformedPayload(e, ty),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for DocumentFormatError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
DocumentFormatError::Internal(_) => Code::Internal,
|
||||
DocumentFormatError::MalformedPayload(_, _) => Code::MalformedPayload,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal_error!(DocumentFormatError: io::Error);
|
||||
|
||||
/// Reads CSV from input and write an obkv batch to writer.
|
||||
pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
||||
|
||||
let csv = csv::Reader::from_reader(input);
|
||||
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
|
||||
|
||||
let count = builder.documents_count();
|
||||
let _ = builder
|
||||
.into_inner()
|
||||
.map_err(Into::into)
|
||||
.map_err(DocumentFormatError::Internal)?;
|
||||
|
||||
Ok(count as usize)
|
||||
}
|
||||
|
||||
/// Reads JSON Lines from input and write an obkv batch to writer.
|
||||
pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
||||
let reader = BufReader::new(input);
|
||||
|
||||
for result in serde_json::Deserializer::from_reader(reader).into_iter() {
|
||||
let object = result
|
||||
.map_err(Error::Json)
|
||||
.map_err(|e| (PayloadType::Ndjson, e))?;
|
||||
builder
|
||||
.append_json_object(&object)
|
||||
.map_err(Into::into)
|
||||
.map_err(DocumentFormatError::Internal)?;
|
||||
}
|
||||
|
||||
let count = builder.documents_count();
|
||||
let _ = builder
|
||||
.into_inner()
|
||||
.map_err(Into::into)
|
||||
.map_err(DocumentFormatError::Internal)?;
|
||||
|
||||
Ok(count as usize)
|
||||
}
|
||||
|
||||
/// Reads JSON from input and write an obkv batch to writer.
|
||||
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
||||
let reader = BufReader::new(input);
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(transparent)]
|
||||
struct ArrayOrSingleObject {
|
||||
#[serde(with = "either::serde_untagged")]
|
||||
inner: Either<Vec<Object>, Object>,
|
||||
}
|
||||
|
||||
let content: ArrayOrSingleObject = serde_json::from_reader(reader)
|
||||
.map_err(Error::Json)
|
||||
.map_err(|e| (PayloadType::Json, e))?;
|
||||
|
||||
for object in content.inner.map_right(|o| vec![o]).into_inner() {
|
||||
builder
|
||||
.append_json_object(&object)
|
||||
.map_err(Into::into)
|
||||
.map_err(DocumentFormatError::Internal)?;
|
||||
}
|
||||
|
||||
let count = builder.documents_count();
|
||||
let _ = builder
|
||||
.into_inner()
|
||||
.map_err(Into::into)
|
||||
.map_err(DocumentFormatError::Internal)?;
|
||||
|
||||
Ok(count as usize)
|
||||
}
|
@@ -1,17 +0,0 @@
|
||||
pub mod v2;
|
||||
pub mod v3;
|
||||
pub mod v4;
|
||||
|
||||
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
|
||||
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("asc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(')'))
|
||||
.map(|(field, _)| field)
|
||||
}
|
||||
|
||||
/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name.
|
||||
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("desc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(')'))
|
||||
.map(|(field, _)| field)
|
||||
}
|
@@ -1,152 +0,0 @@
|
||||
use anyhow::bail;
|
||||
use meilisearch_types::error::Code;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::{Settings, Unchecked};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct UpdateEntry {
|
||||
pub uuid: Uuid,
|
||||
pub update: UpdateStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateFormat {
|
||||
Json,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct DocumentAdditionResult {
|
||||
pub nb_documents: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateResult {
|
||||
DocumentsAddition(DocumentAdditionResult),
|
||||
DocumentDeletion { deleted: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateMeta {
|
||||
DocumentsAddition {
|
||||
method: IndexDocumentsMethod,
|
||||
format: UpdateFormat,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
ClearDocuments,
|
||||
DeleteDocuments {
|
||||
ids: Vec<String>,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Enqueued {
|
||||
pub update_id: u64,
|
||||
pub meta: UpdateMeta,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
pub content: Option<Uuid>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processed {
|
||||
pub success: UpdateResult,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub processed_at: OffsetDateTime,
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processing {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub started_processing_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Aborted {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub aborted_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Failed {
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
pub error: ResponseError,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub failed_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "status", rename_all = "camelCase")]
|
||||
pub enum UpdateStatus {
|
||||
Processing(Processing),
|
||||
Enqueued(Enqueued),
|
||||
Processed(Processed),
|
||||
Aborted(Aborted),
|
||||
Failed(Failed),
|
||||
}
|
||||
|
||||
type StatusCode = ();
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResponseError {
|
||||
#[serde(skip)]
|
||||
pub code: StatusCode,
|
||||
pub message: String,
|
||||
pub error_code: String,
|
||||
pub error_type: String,
|
||||
pub error_link: String,
|
||||
}
|
||||
|
||||
pub fn error_code_from_str(s: &str) -> anyhow::Result<Code> {
|
||||
let code = match s {
|
||||
"index_creation_failed" => Code::CreateIndex,
|
||||
"index_already_exists" => Code::IndexAlreadyExists,
|
||||
"index_not_found" => Code::IndexNotFound,
|
||||
"invalid_index_uid" => Code::InvalidIndexUid,
|
||||
"invalid_state" => Code::InvalidState,
|
||||
"missing_primary_key" => Code::MissingPrimaryKey,
|
||||
"primary_key_already_present" => Code::PrimaryKeyAlreadyPresent,
|
||||
"invalid_request" => Code::InvalidRankingRule,
|
||||
"max_fields_limit_exceeded" => Code::MaxFieldsLimitExceeded,
|
||||
"missing_document_id" => Code::MissingDocumentId,
|
||||
"invalid_facet" => Code::Filter,
|
||||
"invalid_filter" => Code::Filter,
|
||||
"invalid_sort" => Code::Sort,
|
||||
"bad_parameter" => Code::BadParameter,
|
||||
"bad_request" => Code::BadRequest,
|
||||
"document_not_found" => Code::DocumentNotFound,
|
||||
"internal" => Code::Internal,
|
||||
"invalid_geo_field" => Code::InvalidGeoField,
|
||||
"invalid_token" => Code::InvalidToken,
|
||||
"missing_authorization_header" => Code::MissingAuthorizationHeader,
|
||||
"payload_too_large" => Code::PayloadTooLarge,
|
||||
"unretrievable_document" => Code::RetrieveDocument,
|
||||
"search_error" => Code::SearchDocuments,
|
||||
"unsupported_media_type" => Code::UnsupportedMediaType,
|
||||
"dump_already_in_progress" => Code::DumpAlreadyInProgress,
|
||||
"dump_process_failed" => Code::DumpProcessFailed,
|
||||
_ => bail!("unknow error code."),
|
||||
};
|
||||
|
||||
Ok(code)
|
||||
}
|
@@ -1,205 +0,0 @@
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::v4::{Task, TaskContent, TaskEvent};
|
||||
use crate::index::{Settings, Unchecked};
|
||||
use crate::tasks::task::{DocumentDeletion, TaskId, TaskResult};
|
||||
|
||||
use super::v2;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct DumpEntry {
|
||||
pub uuid: Uuid,
|
||||
pub uid: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct UpdateEntry {
|
||||
pub uuid: Uuid,
|
||||
pub update: UpdateStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "status", rename_all = "camelCase")]
|
||||
pub enum UpdateStatus {
|
||||
Processing(Processing),
|
||||
Enqueued(Enqueued),
|
||||
Processed(Processed),
|
||||
Failed(Failed),
|
||||
}
|
||||
|
||||
impl From<v2::UpdateResult> for TaskResult {
|
||||
fn from(other: v2::UpdateResult) -> Self {
|
||||
match other {
|
||||
v2::UpdateResult::DocumentsAddition(result) => TaskResult::DocumentAddition {
|
||||
indexed_documents: result.nb_documents as u64,
|
||||
},
|
||||
v2::UpdateResult::DocumentDeletion { deleted } => TaskResult::DocumentDeletion {
|
||||
deleted_documents: deleted,
|
||||
},
|
||||
v2::UpdateResult::Other => TaskResult::Other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Update {
|
||||
DeleteDocuments(Vec<String>),
|
||||
DocumentAddition {
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
ClearDocuments,
|
||||
}
|
||||
|
||||
impl From<Update> for super::v4::TaskContent {
|
||||
fn from(update: Update) -> Self {
|
||||
match update {
|
||||
Update::DeleteDocuments(ids) => {
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids))
|
||||
}
|
||||
Update::DocumentAddition {
|
||||
primary_key,
|
||||
method,
|
||||
..
|
||||
} => TaskContent::DocumentAddition {
|
||||
content_uuid: Uuid::default(),
|
||||
merge_strategy: method,
|
||||
primary_key,
|
||||
// document count is unknown for legacy updates
|
||||
documents_count: 0,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
Update::Settings(settings) => TaskContent::SettingsUpdate {
|
||||
settings,
|
||||
// There is no way to know now, so we assume it isn't
|
||||
is_deletion: false,
|
||||
allow_index_creation: true,
|
||||
},
|
||||
Update::ClearDocuments => TaskContent::DocumentDeletion(DocumentDeletion::Clear),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateMeta {
|
||||
DocumentsAddition {
|
||||
method: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
ClearDocuments,
|
||||
DeleteDocuments {
|
||||
ids: Vec<String>,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Enqueued {
|
||||
pub update_id: u64,
|
||||
pub meta: Update,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl Enqueued {
|
||||
fn update_task(self, task: &mut Task) {
|
||||
// we do not erase the `TaskId` that was given to us.
|
||||
task.content = self.meta.into();
|
||||
task.events.push(TaskEvent::Created(self.enqueued_at));
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processed {
|
||||
pub success: v2::UpdateResult,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub processed_at: OffsetDateTime,
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
}
|
||||
|
||||
impl Processed {
|
||||
fn update_task(self, task: &mut Task) {
|
||||
self.from.update_task(task);
|
||||
|
||||
let event = TaskEvent::Succeded {
|
||||
result: TaskResult::from(self.success),
|
||||
timestamp: self.processed_at,
|
||||
};
|
||||
task.events.push(event);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processing {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub started_processing_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl Processing {
|
||||
fn update_task(self, task: &mut Task) {
|
||||
self.from.update_task(task);
|
||||
|
||||
let event = TaskEvent::Processing(self.started_processing_at);
|
||||
task.events.push(event);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Failed {
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
pub msg: String,
|
||||
pub code: Code,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub failed_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl Failed {
|
||||
fn update_task(self, task: &mut Task) {
|
||||
self.from.update_task(task);
|
||||
|
||||
let event = TaskEvent::Failed {
|
||||
error: ResponseError::from_msg(self.msg, self.code),
|
||||
timestamp: self.failed_at,
|
||||
};
|
||||
task.events.push(event);
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(UpdateStatus, String, TaskId)> for Task {
|
||||
fn from((update, uid, task_id): (UpdateStatus, String, TaskId)) -> Self {
|
||||
// Dummy task
|
||||
let mut task = super::v4::Task {
|
||||
id: task_id,
|
||||
index_uid: IndexUid::new_unchecked(uid),
|
||||
content: super::v4::TaskContent::IndexDeletion,
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
match update {
|
||||
UpdateStatus::Processing(u) => u.update_task(&mut task),
|
||||
UpdateStatus::Enqueued(u) => u.update_task(&mut task),
|
||||
UpdateStatus::Processed(u) => u.update_task(&mut task),
|
||||
UpdateStatus::Failed(u) => u.update_task(&mut task),
|
||||
}
|
||||
|
||||
task
|
||||
}
|
||||
}
|
@@ -1,145 +0,0 @@
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::{Settings, Unchecked};
|
||||
use crate::tasks::batch::BatchId;
|
||||
use crate::tasks::task::{
|
||||
DocumentDeletion, TaskContent as NewTaskContent, TaskEvent as NewTaskEvent, TaskId, TaskResult,
|
||||
};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct Task {
|
||||
pub id: TaskId,
|
||||
pub index_uid: IndexUid,
|
||||
pub content: TaskContent,
|
||||
pub events: Vec<TaskEvent>,
|
||||
}
|
||||
|
||||
impl From<Task> for crate::tasks::task::Task {
|
||||
fn from(other: Task) -> Self {
|
||||
Self {
|
||||
id: other.id,
|
||||
content: NewTaskContent::from((other.index_uid, other.content)),
|
||||
events: other.events.into_iter().map(Into::into).collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum TaskEvent {
|
||||
Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
|
||||
Batched {
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
batch_id: BatchId,
|
||||
},
|
||||
Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
|
||||
Succeded {
|
||||
result: TaskResult,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
},
|
||||
Failed {
|
||||
error: ResponseError,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<TaskEvent> for NewTaskEvent {
|
||||
fn from(other: TaskEvent) -> Self {
|
||||
match other {
|
||||
TaskEvent::Created(x) => NewTaskEvent::Created(x),
|
||||
TaskEvent::Batched {
|
||||
timestamp,
|
||||
batch_id,
|
||||
} => NewTaskEvent::Batched {
|
||||
timestamp,
|
||||
batch_id,
|
||||
},
|
||||
TaskEvent::Processing(x) => NewTaskEvent::Processing(x),
|
||||
TaskEvent::Succeded { result, timestamp } => {
|
||||
NewTaskEvent::Succeeded { result, timestamp }
|
||||
}
|
||||
TaskEvent::Failed { error, timestamp } => NewTaskEvent::Failed { error, timestamp },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum TaskContent {
|
||||
DocumentAddition {
|
||||
content_uuid: Uuid,
|
||||
merge_strategy: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
documents_count: usize,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
DocumentDeletion(DocumentDeletion),
|
||||
SettingsUpdate {
|
||||
settings: Settings<Unchecked>,
|
||||
/// Indicates whether the task was a deletion
|
||||
is_deletion: bool,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
IndexDeletion,
|
||||
IndexCreation {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
IndexUpdate {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
Dump {
|
||||
uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<(IndexUid, TaskContent)> for NewTaskContent {
|
||||
fn from((index_uid, content): (IndexUid, TaskContent)) -> Self {
|
||||
match content {
|
||||
TaskContent::DocumentAddition {
|
||||
content_uuid,
|
||||
merge_strategy,
|
||||
primary_key,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => NewTaskContent::DocumentAddition {
|
||||
index_uid,
|
||||
content_uuid,
|
||||
merge_strategy,
|
||||
primary_key,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
TaskContent::DocumentDeletion(deletion) => NewTaskContent::DocumentDeletion {
|
||||
index_uid,
|
||||
deletion,
|
||||
},
|
||||
TaskContent::SettingsUpdate {
|
||||
settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
} => NewTaskContent::SettingsUpdate {
|
||||
index_uid,
|
||||
settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
},
|
||||
TaskContent::IndexDeletion => NewTaskContent::IndexDeletion { index_uid },
|
||||
TaskContent::IndexCreation { primary_key } => NewTaskContent::IndexCreation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
},
|
||||
TaskContent::IndexUpdate { primary_key } => NewTaskContent::IndexUpdate {
|
||||
index_uid,
|
||||
primary_key,
|
||||
},
|
||||
TaskContent::Dump { uid } => NewTaskContent::Dump { uid },
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,42 +0,0 @@
|
||||
use meilisearch_auth::error::AuthControllerError;
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::internal_error;
|
||||
|
||||
use crate::{index_resolver::error::IndexResolverError, tasks::error::TaskError};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, DumpError>;
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum DumpError {
|
||||
#[error("An internal error has occurred. `{0}`.")]
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
IndexResolver(Box<IndexResolverError>),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
DumpError: milli::heed::Error,
|
||||
std::io::Error,
|
||||
tokio::task::JoinError,
|
||||
tokio::sync::oneshot::error::RecvError,
|
||||
serde_json::error::Error,
|
||||
tempfile::PersistError,
|
||||
fs_extra::error::Error,
|
||||
AuthControllerError,
|
||||
TaskError
|
||||
);
|
||||
|
||||
impl From<IndexResolverError> for DumpError {
|
||||
fn from(e: IndexResolverError) -> Self {
|
||||
Self::IndexResolver(Box::new(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for DumpError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
DumpError::Internal(_) => Code::Internal,
|
||||
DumpError::IndexResolver(e) => e.error_code(),
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,188 +0,0 @@
|
||||
#[cfg(not(test))]
|
||||
pub use real::DumpHandler;
|
||||
|
||||
#[cfg(test)]
|
||||
pub use test::MockDumpHandler as DumpHandler;
|
||||
|
||||
use time::{macros::format_description, OffsetDateTime};
|
||||
|
||||
/// Generate uid from creation date
|
||||
pub fn generate_uid() -> String {
|
||||
OffsetDateTime::now_utc()
|
||||
.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
mod real {
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use log::{info, trace};
|
||||
use meilisearch_auth::AuthController;
|
||||
use milli::heed::Env;
|
||||
use tokio::fs::create_dir_all;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
|
||||
use crate::analytics;
|
||||
use crate::compression::to_tar_gz;
|
||||
use crate::dump::error::{DumpError, Result};
|
||||
use crate::dump::{MetadataVersion, META_FILE_NAME};
|
||||
use crate::index_resolver::{
|
||||
index_store::IndexStore, meta_store::IndexMetaStore, IndexResolver,
|
||||
};
|
||||
use crate::tasks::TaskStore;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
pub struct DumpHandler<U, I> {
|
||||
dump_path: PathBuf,
|
||||
db_path: PathBuf,
|
||||
update_file_store: UpdateFileStore,
|
||||
task_store_size: usize,
|
||||
index_db_size: usize,
|
||||
env: Arc<Env>,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
}
|
||||
|
||||
impl<U, I> DumpHandler<U, I>
|
||||
where
|
||||
U: IndexMetaStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
pub fn new(
|
||||
dump_path: PathBuf,
|
||||
db_path: PathBuf,
|
||||
update_file_store: UpdateFileStore,
|
||||
task_store_size: usize,
|
||||
index_db_size: usize,
|
||||
env: Arc<Env>,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
dump_path,
|
||||
db_path,
|
||||
update_file_store,
|
||||
task_store_size,
|
||||
index_db_size,
|
||||
env,
|
||||
index_resolver,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(&self, uid: String) -> Result<()> {
|
||||
trace!("Performing dump.");
|
||||
|
||||
create_dir_all(&self.dump_path).await?;
|
||||
|
||||
let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??;
|
||||
let temp_dump_path = temp_dump_dir.path().to_owned();
|
||||
|
||||
let meta = MetadataVersion::new_v5(self.index_db_size, self.task_store_size);
|
||||
let meta_path = temp_dump_path.join(META_FILE_NAME);
|
||||
|
||||
let meta_bytes = serde_json::to_vec(&meta)?;
|
||||
let mut meta_file = tokio::fs::File::create(&meta_path).await?;
|
||||
meta_file.write_all(&meta_bytes).await?;
|
||||
|
||||
analytics::copy_user_id(&self.db_path, &temp_dump_path);
|
||||
|
||||
create_dir_all(&temp_dump_path.join("indexes")).await?;
|
||||
|
||||
let db_path = self.db_path.clone();
|
||||
let temp_dump_path_clone = temp_dump_path.clone();
|
||||
tokio::task::spawn_blocking(move || -> Result<()> {
|
||||
AuthController::dump(db_path, temp_dump_path_clone)?;
|
||||
Ok(())
|
||||
})
|
||||
.await??;
|
||||
TaskStore::dump(
|
||||
self.env.clone(),
|
||||
&temp_dump_path,
|
||||
self.update_file_store.clone(),
|
||||
)
|
||||
.await?;
|
||||
self.index_resolver.dump(&temp_dump_path).await?;
|
||||
|
||||
let dump_path = self.dump_path.clone();
|
||||
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
|
||||
// for now we simply copy the updates/updates_files
|
||||
// FIXME: We may copy more files than necessary, if new files are added while we are
|
||||
// performing the dump. We need a way to filter them out.
|
||||
|
||||
let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?;
|
||||
to_tar_gz(temp_dump_path, temp_dump_file.path())
|
||||
.map_err(|e| DumpError::Internal(e.into()))?;
|
||||
|
||||
let dump_path = dump_path.join(uid).with_extension("dump");
|
||||
temp_dump_file.persist(&dump_path)?;
|
||||
|
||||
Ok(dump_path)
|
||||
})
|
||||
.await??;
|
||||
|
||||
info!("Created dump in {:?}.", dump_path);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use milli::heed::Env;
|
||||
use nelson::Mocker;
|
||||
|
||||
use crate::dump::error::Result;
|
||||
use crate::index_resolver::IndexResolver;
|
||||
use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub enum MockDumpHandler<U, I> {
|
||||
Real(super::real::DumpHandler<U, I>),
|
||||
Mock(Mocker),
|
||||
}
|
||||
|
||||
impl<U, I> MockDumpHandler<U, I> {
|
||||
pub fn mock(mocker: Mocker) -> Self {
|
||||
Self::Mock(mocker)
|
||||
}
|
||||
}
|
||||
|
||||
impl<U, I> MockDumpHandler<U, I>
|
||||
where
|
||||
U: IndexMetaStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
pub fn new(
|
||||
dump_path: PathBuf,
|
||||
db_path: PathBuf,
|
||||
update_file_store: UpdateFileStore,
|
||||
task_store_size: usize,
|
||||
index_db_size: usize,
|
||||
env: Arc<Env>,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
) -> Self {
|
||||
Self::Real(super::real::DumpHandler::new(
|
||||
dump_path,
|
||||
db_path,
|
||||
update_file_store,
|
||||
task_store_size,
|
||||
index_db_size,
|
||||
env,
|
||||
index_resolver,
|
||||
))
|
||||
}
|
||||
pub async fn run(&self, uid: String) -> Result<()> {
|
||||
match self {
|
||||
DumpHandler::Real(real) => real.run(uid).await,
|
||||
DumpHandler::Mock(mocker) => unsafe { mocker.get("run").call(uid) },
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,4 +0,0 @@
|
||||
pub mod v2;
|
||||
pub mod v3;
|
||||
pub mod v4;
|
||||
pub mod v5;
|
@@ -1,24 +0,0 @@
|
||||
use std::path::Path;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::index_controller::IndexMetadata;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MetadataV1 {
|
||||
pub db_version: String,
|
||||
indexes: Vec<IndexMetadata>,
|
||||
}
|
||||
|
||||
impl MetadataV1 {
|
||||
#[allow(dead_code, unreachable_code, unused_variables)]
|
||||
pub fn load_dump(
|
||||
self,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
size: usize,
|
||||
indexer_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
|
||||
}
|
@@ -1,216 +0,0 @@
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use serde_json::{Deserializer, Value};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
use crate::dump::compat::{self, v2, v3};
|
||||
use crate::dump::Metadata;
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
/// The dump v2 reads the dump folder and patches all the needed file to make it compatible with a
|
||||
/// dump v3, then calls the dump v3 to actually handle the dump.
|
||||
pub fn load_dump(
|
||||
meta: Metadata,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
log::info!("Patching dump V2 to dump V3...");
|
||||
let indexes_path = src.as_ref().join("indexes");
|
||||
|
||||
let dir_entries = std::fs::read_dir(indexes_path)?;
|
||||
for entry in dir_entries {
|
||||
let entry = entry?;
|
||||
|
||||
// rename the index folder
|
||||
let path = entry.path();
|
||||
let new_path = patch_index_uuid_path(&path).expect("invalid index folder.");
|
||||
|
||||
std::fs::rename(path, &new_path)?;
|
||||
|
||||
let settings_path = new_path.join("meta.json");
|
||||
|
||||
patch_settings(settings_path)?;
|
||||
}
|
||||
|
||||
let update_dir = src.as_ref().join("updates");
|
||||
let update_path = update_dir.join("data.jsonl");
|
||||
patch_updates(update_dir, update_path)?;
|
||||
|
||||
super::v3::load_dump(
|
||||
meta,
|
||||
src,
|
||||
dst,
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
indexing_options,
|
||||
)
|
||||
}
|
||||
|
||||
fn patch_index_uuid_path(path: &Path) -> Option<PathBuf> {
|
||||
let uuid = path.file_name()?.to_str()?.trim_start_matches("index-");
|
||||
let new_path = path.parent()?.join(uuid);
|
||||
Some(new_path)
|
||||
}
|
||||
|
||||
fn patch_settings(path: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let mut meta_file = File::open(&path)?;
|
||||
let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
|
||||
|
||||
// We first deserialize the dump meta into a serde_json::Value and change
|
||||
// the custom ranking rules settings from the old format to the new format.
|
||||
if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
||||
patch_custom_ranking_rules(ranking_rules);
|
||||
}
|
||||
|
||||
let mut meta_file = OpenOptions::new().truncate(true).write(true).open(path)?;
|
||||
|
||||
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn patch_updates(dir: impl AsRef<Path>, path: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let mut output_update_file = NamedTempFile::new_in(&dir)?;
|
||||
let update_file = File::open(&path)?;
|
||||
|
||||
let stream = Deserializer::from_reader(update_file).into_iter::<v2::UpdateEntry>();
|
||||
|
||||
for update in stream {
|
||||
let update_entry = update?;
|
||||
|
||||
let update_entry = v3::UpdateEntry::from(update_entry);
|
||||
|
||||
serde_json::to_writer(&mut output_update_file, &update_entry)?;
|
||||
output_update_file.write_all(b"\n")?;
|
||||
}
|
||||
|
||||
output_update_file.flush()?;
|
||||
output_update_file.persist(path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
|
||||
///
|
||||
/// This is done for compatibility reasons, and to avoid a new dump version,
|
||||
/// since the new syntax was introduced soon after the new dump version.
|
||||
fn patch_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||
*ranking_rules = match ranking_rules.take() {
|
||||
Value::Array(values) => values
|
||||
.into_iter()
|
||||
.filter_map(|value| match value {
|
||||
Value::String(s) if s.starts_with("asc") => compat::asc_ranking_rule(&s)
|
||||
.map(|f| format!("{}:asc", f))
|
||||
.map(Value::String),
|
||||
Value::String(s) if s.starts_with("desc") => compat::desc_ranking_rule(&s)
|
||||
.map(|f| format!("{}:desc", f))
|
||||
.map(Value::String),
|
||||
otherwise => Some(otherwise),
|
||||
})
|
||||
.collect(),
|
||||
otherwise => otherwise,
|
||||
}
|
||||
}
|
||||
|
||||
impl From<v2::UpdateEntry> for v3::UpdateEntry {
|
||||
fn from(v2::UpdateEntry { uuid, update }: v2::UpdateEntry) -> Self {
|
||||
let update = match update {
|
||||
v2::UpdateStatus::Processing(meta) => v3::UpdateStatus::Processing(meta.into()),
|
||||
v2::UpdateStatus::Enqueued(meta) => v3::UpdateStatus::Enqueued(meta.into()),
|
||||
v2::UpdateStatus::Processed(meta) => v3::UpdateStatus::Processed(meta.into()),
|
||||
v2::UpdateStatus::Aborted(_) => unreachable!("Updates could never be aborted."),
|
||||
v2::UpdateStatus::Failed(meta) => v3::UpdateStatus::Failed(meta.into()),
|
||||
};
|
||||
|
||||
Self { uuid, update }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<v2::Failed> for v3::Failed {
|
||||
fn from(other: v2::Failed) -> Self {
|
||||
let v2::Failed {
|
||||
from,
|
||||
error,
|
||||
failed_at,
|
||||
} = other;
|
||||
|
||||
Self {
|
||||
from: from.into(),
|
||||
msg: error.message,
|
||||
code: v2::error_code_from_str(&error.error_code)
|
||||
.expect("Invalid update: Invalid error code"),
|
||||
failed_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<v2::Processing> for v3::Processing {
|
||||
fn from(other: v2::Processing) -> Self {
|
||||
let v2::Processing {
|
||||
from,
|
||||
started_processing_at,
|
||||
} = other;
|
||||
|
||||
Self {
|
||||
from: from.into(),
|
||||
started_processing_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<v2::Enqueued> for v3::Enqueued {
|
||||
fn from(other: v2::Enqueued) -> Self {
|
||||
let v2::Enqueued {
|
||||
update_id,
|
||||
meta,
|
||||
enqueued_at,
|
||||
content,
|
||||
} = other;
|
||||
|
||||
let meta = match meta {
|
||||
v2::UpdateMeta::DocumentsAddition {
|
||||
method,
|
||||
primary_key,
|
||||
..
|
||||
} => {
|
||||
v3::Update::DocumentAddition {
|
||||
primary_key,
|
||||
method,
|
||||
// Just ignore if the uuid is no present. If it is needed later, an error will
|
||||
// be thrown.
|
||||
content_uuid: content.unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
v2::UpdateMeta::ClearDocuments => v3::Update::ClearDocuments,
|
||||
v2::UpdateMeta::DeleteDocuments { ids } => v3::Update::DeleteDocuments(ids),
|
||||
v2::UpdateMeta::Settings(settings) => v3::Update::Settings(settings),
|
||||
};
|
||||
|
||||
Self {
|
||||
update_id,
|
||||
meta,
|
||||
enqueued_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<v2::Processed> for v3::Processed {
|
||||
fn from(other: v2::Processed) -> Self {
|
||||
let v2::Processed {
|
||||
from,
|
||||
success,
|
||||
processed_at,
|
||||
} = other;
|
||||
|
||||
Self {
|
||||
success,
|
||||
processed_at,
|
||||
from: from.into(),
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,136 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufReader, BufWriter, Write};
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Context;
|
||||
use fs_extra::dir::{self, CopyOptions};
|
||||
use log::info;
|
||||
use tempfile::tempdir;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::dump::compat::{self, v3};
|
||||
use crate::dump::Metadata;
|
||||
use crate::index_resolver::meta_store::{DumpEntry, IndexMeta};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::task::TaskId;
|
||||
|
||||
/// dump structure for V3:
|
||||
/// .
|
||||
/// ├── indexes
|
||||
/// │  └── 25f10bb8-6ea8-42f0-bd48-ad5857f77648
|
||||
/// │  ├── documents.jsonl
|
||||
/// │  └── meta.json
|
||||
/// ├── index_uuids
|
||||
/// │  └── data.jsonl
|
||||
/// ├── metadata.json
|
||||
/// └── updates
|
||||
/// └── data.jsonl
|
||||
|
||||
pub fn load_dump(
|
||||
meta: Metadata,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
meta_env_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
info!("Patching dump V3 to dump V4...");
|
||||
|
||||
let patched_dir = tempdir()?;
|
||||
|
||||
let options = CopyOptions::default();
|
||||
dir::copy(src.as_ref().join("indexes"), patched_dir.path(), &options)?;
|
||||
dir::copy(
|
||||
src.as_ref().join("index_uuids"),
|
||||
patched_dir.path(),
|
||||
&options,
|
||||
)?;
|
||||
|
||||
let uuid_map = patch_index_meta(
|
||||
src.as_ref().join("index_uuids/data.jsonl"),
|
||||
patched_dir.path(),
|
||||
)?;
|
||||
|
||||
fs::copy(
|
||||
src.as_ref().join("metadata.json"),
|
||||
patched_dir.path().join("metadata.json"),
|
||||
)?;
|
||||
|
||||
patch_updates(&src, patched_dir.path(), uuid_map)?;
|
||||
|
||||
super::v4::load_dump(
|
||||
meta,
|
||||
patched_dir.path(),
|
||||
dst,
|
||||
index_db_size,
|
||||
meta_env_size,
|
||||
indexing_options,
|
||||
)
|
||||
}
|
||||
|
||||
fn patch_index_meta(
|
||||
path: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
) -> anyhow::Result<HashMap<Uuid, String>> {
|
||||
let file = BufReader::new(File::open(path)?);
|
||||
let dst = dst.as_ref().join("index_uuids");
|
||||
fs::create_dir_all(&dst)?;
|
||||
let mut dst_file = File::create(dst.join("data.jsonl"))?;
|
||||
|
||||
let map = serde_json::Deserializer::from_reader(file)
|
||||
.into_iter::<v3::DumpEntry>()
|
||||
.try_fold(HashMap::new(), |mut map, entry| -> anyhow::Result<_> {
|
||||
let entry = entry?;
|
||||
map.insert(entry.uuid, entry.uid.clone());
|
||||
let meta = IndexMeta {
|
||||
uuid: entry.uuid,
|
||||
// This is lost information, we patch it to 0;
|
||||
creation_task_id: 0,
|
||||
};
|
||||
let entry = DumpEntry {
|
||||
uid: entry.uid,
|
||||
index_meta: meta,
|
||||
};
|
||||
serde_json::to_writer(&mut dst_file, &entry)?;
|
||||
dst_file.write_all(b"\n")?;
|
||||
Ok(map)
|
||||
})?;
|
||||
|
||||
dst_file.flush()?;
|
||||
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
fn patch_updates(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
uuid_map: HashMap<Uuid, String>,
|
||||
) -> anyhow::Result<()> {
|
||||
let dst = dst.as_ref().join("updates");
|
||||
fs::create_dir_all(&dst)?;
|
||||
|
||||
let mut dst_file = BufWriter::new(File::create(dst.join("data.jsonl"))?);
|
||||
let src_file = BufReader::new(File::open(src.as_ref().join("updates/data.jsonl"))?);
|
||||
|
||||
serde_json::Deserializer::from_reader(src_file)
|
||||
.into_iter::<v3::UpdateEntry>()
|
||||
.enumerate()
|
||||
.try_for_each(|(task_id, entry)| -> anyhow::Result<()> {
|
||||
let entry = entry?;
|
||||
let name = uuid_map
|
||||
.get(&entry.uuid)
|
||||
.with_context(|| format!("Unknown index uuid: {}", entry.uuid))?
|
||||
.clone();
|
||||
serde_json::to_writer(
|
||||
&mut dst_file,
|
||||
&compat::v4::Task::from((entry.update, name, task_id as TaskId)),
|
||||
)?;
|
||||
dst_file.write_all(b"\n")?;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
dst_file.flush()?;
|
||||
|
||||
Ok(())
|
||||
}
|
@@ -1,103 +0,0 @@
|
||||
use std::fs::{self, create_dir_all, File};
|
||||
use std::io::{BufReader, Write};
|
||||
use std::path::Path;
|
||||
|
||||
use fs_extra::dir::{self, CopyOptions};
|
||||
use log::info;
|
||||
use serde_json::{Deserializer, Map, Value};
|
||||
use tempfile::tempdir;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::dump::{compat, Metadata};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::task::Task;
|
||||
|
||||
pub fn load_dump(
|
||||
meta: Metadata,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
meta_env_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
info!("Patching dump V4 to dump V5...");
|
||||
|
||||
let patched_dir = tempdir()?;
|
||||
let options = CopyOptions::default();
|
||||
|
||||
// Indexes
|
||||
dir::copy(src.as_ref().join("indexes"), &patched_dir, &options)?;
|
||||
|
||||
// Index uuids
|
||||
dir::copy(src.as_ref().join("index_uuids"), &patched_dir, &options)?;
|
||||
|
||||
// Metadata
|
||||
fs::copy(
|
||||
src.as_ref().join("metadata.json"),
|
||||
patched_dir.path().join("metadata.json"),
|
||||
)?;
|
||||
|
||||
// Updates
|
||||
patch_updates(&src, &patched_dir)?;
|
||||
|
||||
// Keys
|
||||
patch_keys(&src, &patched_dir)?;
|
||||
|
||||
super::v5::load_dump(
|
||||
meta,
|
||||
&patched_dir,
|
||||
dst,
|
||||
index_db_size,
|
||||
meta_env_size,
|
||||
indexing_options,
|
||||
)
|
||||
}
|
||||
|
||||
fn patch_updates(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let updates_path = src.as_ref().join("updates/data.jsonl");
|
||||
let output_updates_path = dst.as_ref().join("updates/data.jsonl");
|
||||
create_dir_all(output_updates_path.parent().unwrap())?;
|
||||
let udpates_file = File::open(updates_path)?;
|
||||
let mut output_update_file = File::create(output_updates_path)?;
|
||||
|
||||
serde_json::Deserializer::from_reader(udpates_file)
|
||||
.into_iter::<compat::v4::Task>()
|
||||
.try_for_each(|task| -> anyhow::Result<()> {
|
||||
let task: Task = task?.into();
|
||||
|
||||
serde_json::to_writer(&mut output_update_file, &task)?;
|
||||
output_update_file.write_all(b"\n")?;
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
output_update_file.flush()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn patch_keys(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let keys_file_src = src.as_ref().join("keys");
|
||||
|
||||
if !keys_file_src.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
fs::create_dir_all(&dst)?;
|
||||
let keys_file_dst = dst.as_ref().join("keys");
|
||||
let mut writer = File::create(&keys_file_dst)?;
|
||||
|
||||
let reader = BufReader::new(File::open(&keys_file_src)?);
|
||||
for key in Deserializer::from_reader(reader).into_iter() {
|
||||
let mut key: Map<String, Value> = key?;
|
||||
|
||||
// generate a new uuid v4 and insert it in the key.
|
||||
let uid = serde_json::to_value(Uuid::new_v4()).unwrap();
|
||||
key.insert("uid".to_string(), uid);
|
||||
|
||||
serde_json::to_writer(&mut writer, &key)?;
|
||||
writer.write_all(b"\n")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
@@ -1,47 +0,0 @@
|
||||
use std::{path::Path, sync::Arc};
|
||||
|
||||
use log::info;
|
||||
use meilisearch_auth::AuthController;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
|
||||
use crate::analytics;
|
||||
use crate::dump::Metadata;
|
||||
use crate::index_resolver::IndexResolver;
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::TaskStore;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
pub fn load_dump(
|
||||
meta: Metadata,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
meta_env_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
info!(
|
||||
"Loading dump from {}, dump database version: {}, dump version: V5",
|
||||
meta.dump_date, meta.db_version
|
||||
);
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(meta_env_size);
|
||||
options.max_dbs(100);
|
||||
let env = Arc::new(options.open(&dst)?);
|
||||
|
||||
IndexResolver::load_dump(
|
||||
src.as_ref(),
|
||||
&dst,
|
||||
index_db_size,
|
||||
env.clone(),
|
||||
indexing_options,
|
||||
)?;
|
||||
UpdateFileStore::load_dump(src.as_ref(), &dst)?;
|
||||
TaskStore::load_dump(&src, env)?;
|
||||
AuthController::load_dump(&src, &dst)?;
|
||||
analytics::copy_user_id(src.as_ref(), dst.as_ref());
|
||||
|
||||
info!("Loading indexes.");
|
||||
|
||||
Ok(())
|
||||
}
|
@@ -1,262 +0,0 @@
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::bail;
|
||||
use log::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::compression::from_tar_gz;
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
use self::loaders::{v2, v3, v4, v5};
|
||||
|
||||
pub use handler::{generate_uid, DumpHandler};
|
||||
|
||||
mod compat;
|
||||
pub mod error;
|
||||
mod handler;
|
||||
mod loaders;
|
||||
|
||||
const META_FILE_NAME: &str = "metadata.json";
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Metadata {
|
||||
db_version: String,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
dump_date: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl Metadata {
|
||||
pub fn new(index_db_size: usize, update_db_size: usize) -> Self {
|
||||
Self {
|
||||
db_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
dump_date: OffsetDateTime::now_utc(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MetadataV1 {
|
||||
pub db_version: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "dumpVersion")]
|
||||
pub enum MetadataVersion {
|
||||
V1(MetadataV1),
|
||||
V2(Metadata),
|
||||
V3(Metadata),
|
||||
V4(Metadata),
|
||||
// V5 is forward compatible with V4 but not backward compatible.
|
||||
V5(Metadata),
|
||||
}
|
||||
|
||||
impl MetadataVersion {
|
||||
pub fn load_dump(
|
||||
self,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
meta_env_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
match self {
|
||||
MetadataVersion::V1(_meta) => {
|
||||
anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
|
||||
}
|
||||
MetadataVersion::V2(meta) => v2::load_dump(
|
||||
meta,
|
||||
src,
|
||||
dst,
|
||||
index_db_size,
|
||||
meta_env_size,
|
||||
indexing_options,
|
||||
)?,
|
||||
MetadataVersion::V3(meta) => v3::load_dump(
|
||||
meta,
|
||||
src,
|
||||
dst,
|
||||
index_db_size,
|
||||
meta_env_size,
|
||||
indexing_options,
|
||||
)?,
|
||||
MetadataVersion::V4(meta) => v4::load_dump(
|
||||
meta,
|
||||
src,
|
||||
dst,
|
||||
index_db_size,
|
||||
meta_env_size,
|
||||
indexing_options,
|
||||
)?,
|
||||
MetadataVersion::V5(meta) => v5::load_dump(
|
||||
meta,
|
||||
src,
|
||||
dst,
|
||||
index_db_size,
|
||||
meta_env_size,
|
||||
indexing_options,
|
||||
)?,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new_v5(index_db_size: usize, update_db_size: usize) -> Self {
|
||||
let meta = Metadata::new(index_db_size, update_db_size);
|
||||
Self::V5(meta)
|
||||
}
|
||||
|
||||
pub fn db_version(&self) -> &str {
|
||||
match self {
|
||||
Self::V1(meta) => &meta.db_version,
|
||||
Self::V2(meta) | Self::V3(meta) | Self::V4(meta) | Self::V5(meta) => &meta.db_version,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn version(&self) -> &'static str {
|
||||
match self {
|
||||
MetadataVersion::V1(_) => "V1",
|
||||
MetadataVersion::V2(_) => "V2",
|
||||
MetadataVersion::V3(_) => "V3",
|
||||
MetadataVersion::V4(_) => "V4",
|
||||
MetadataVersion::V5(_) => "V5",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dump_date(&self) -> Option<&OffsetDateTime> {
|
||||
match self {
|
||||
MetadataVersion::V1(_) => None,
|
||||
MetadataVersion::V2(meta)
|
||||
| MetadataVersion::V3(meta)
|
||||
| MetadataVersion::V4(meta)
|
||||
| MetadataVersion::V5(meta) => Some(&meta.dump_date),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum DumpStatus {
|
||||
Done,
|
||||
InProgress,
|
||||
Failed,
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
dst_path: impl AsRef<Path>,
|
||||
src_path: impl AsRef<Path>,
|
||||
ignore_dump_if_db_exists: bool,
|
||||
ignore_missing_dump: bool,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
let empty_db = crate::is_empty_db(&dst_path);
|
||||
let src_path_exists = src_path.as_ref().exists();
|
||||
|
||||
if empty_db && src_path_exists {
|
||||
let (tmp_src, tmp_dst, meta) = extract_dump(&dst_path, &src_path)?;
|
||||
meta.load_dump(
|
||||
tmp_src.path(),
|
||||
tmp_dst.path(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
indexer_opts,
|
||||
)?;
|
||||
persist_dump(&dst_path, tmp_dst)?;
|
||||
Ok(())
|
||||
} else if !empty_db && !ignore_dump_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
dst_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| dst_path.as_ref().to_owned())
|
||||
)
|
||||
} else if !src_path_exists && !ignore_missing_dump {
|
||||
bail!("dump doesn't exist at {:?}", src_path.as_ref())
|
||||
} else {
|
||||
// there is nothing to do
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_dump(
|
||||
dst_path: impl AsRef<Path>,
|
||||
src_path: impl AsRef<Path>,
|
||||
) -> anyhow::Result<(TempDir, TempDir, MetadataVersion)> {
|
||||
// Setup a temp directory path in the same path as the database, to prevent cross devices
|
||||
// references.
|
||||
let temp_path = dst_path
|
||||
.as_ref()
|
||||
.parent()
|
||||
.map(ToOwned::to_owned)
|
||||
.unwrap_or_else(|| ".".into());
|
||||
|
||||
let tmp_src = tempfile::tempdir_in(temp_path)?;
|
||||
let tmp_src_path = tmp_src.path();
|
||||
|
||||
from_tar_gz(&src_path, tmp_src_path)?;
|
||||
|
||||
let meta_path = tmp_src_path.join(META_FILE_NAME);
|
||||
let mut meta_file = File::open(&meta_path)?;
|
||||
let meta: MetadataVersion = serde_json::from_reader(&mut meta_file)?;
|
||||
|
||||
if !dst_path.as_ref().exists() {
|
||||
std::fs::create_dir_all(dst_path.as_ref())?;
|
||||
}
|
||||
|
||||
let tmp_dst = tempfile::tempdir_in(dst_path.as_ref())?;
|
||||
|
||||
info!(
|
||||
"Loading dump {}, dump database version: {}, dump version: {}",
|
||||
meta.dump_date()
|
||||
.map(|t| format!("from {}", t))
|
||||
.unwrap_or_else(String::new),
|
||||
meta.db_version(),
|
||||
meta.version()
|
||||
);
|
||||
|
||||
Ok((tmp_src, tmp_dst, meta))
|
||||
}
|
||||
|
||||
fn persist_dump(dst_path: impl AsRef<Path>, tmp_dst: TempDir) -> anyhow::Result<()> {
|
||||
let persisted_dump = tmp_dst.into_path();
|
||||
|
||||
// Delete everything in the `data.ms` except the tempdir.
|
||||
if dst_path.as_ref().exists() {
|
||||
for file in dst_path.as_ref().read_dir().unwrap() {
|
||||
let file = file.unwrap().path();
|
||||
if file.file_name() == persisted_dump.file_name() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if file.is_file() {
|
||||
std::fs::remove_file(&file)?;
|
||||
} else {
|
||||
std::fs::remove_dir_all(&file)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Move the whole content of the tempdir into the `data.ms`.
|
||||
for file in persisted_dump.read_dir().unwrap() {
|
||||
let file = file.unwrap().path();
|
||||
|
||||
std::fs::rename(&file, &dst_path.as_ref().join(file.file_name().unwrap()))?;
|
||||
}
|
||||
|
||||
// Delete the empty tempdir.
|
||||
std::fs::remove_dir_all(&persisted_dump)?;
|
||||
|
||||
Ok(())
|
||||
}
|
@@ -1,55 +0,0 @@
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use milli::UserError;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MilliError<'a>(pub &'a milli::Error);
|
||||
|
||||
impl Error for MilliError<'_> {}
|
||||
|
||||
impl fmt::Display for MilliError<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for MilliError<'_> {
|
||||
fn error_code(&self) -> Code {
|
||||
match self.0 {
|
||||
milli::Error::InternalError(_) => Code::Internal,
|
||||
milli::Error::IoError(_) => Code::Internal,
|
||||
milli::Error::UserError(ref error) => {
|
||||
match error {
|
||||
// TODO: wait for spec for new error codes.
|
||||
UserError::SerdeJson(_)
|
||||
| UserError::InvalidLmdbOpenOptions
|
||||
| UserError::DocumentLimitReached
|
||||
| UserError::AccessingSoftDeletedDocument { .. }
|
||||
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
|
||||
UserError::InvalidStoreFile => Code::InvalidStore,
|
||||
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
|
||||
UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
|
||||
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
|
||||
UserError::InvalidFilter(_) => Code::Filter,
|
||||
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
|
||||
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
|
||||
Code::InvalidDocumentId
|
||||
}
|
||||
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
|
||||
UserError::PrimaryKeyCannotBeChanged(_) => Code::PrimaryKeyAlreadyPresent,
|
||||
UserError::SortRankingRuleMissing => Code::Sort,
|
||||
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
|
||||
UserError::InvalidSortableAttribute { .. } => Code::Sort,
|
||||
UserError::CriterionError(_) => Code::InvalidRankingRule,
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidGeoField,
|
||||
UserError::SortError(_) => Code::Sort,
|
||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||
Code::InvalidMinWordLengthForTypo
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,160 +0,0 @@
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{BufReader, Seek, SeekFrom, Write};
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Context;
|
||||
use indexmap::IndexMap;
|
||||
use milli::documents::DocumentsBatchReader;
|
||||
use milli::heed::{EnvOpenOptions, RoTxn};
|
||||
use milli::update::{IndexDocumentsConfig, IndexerConfig};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::document_formats::read_ndjson;
|
||||
use crate::index::updates::apply_settings_to_builder;
|
||||
|
||||
use super::error::Result;
|
||||
use super::{index::Index, Settings, Unchecked};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct DumpMeta {
|
||||
settings: Settings<Unchecked>,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
const META_FILE_NAME: &str = "meta.json";
|
||||
const DATA_FILE_NAME: &str = "documents.jsonl";
|
||||
|
||||
impl Index {
|
||||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
// acquire write txn make sure any ongoing write is finished before we start.
|
||||
let txn = self.write_txn()?;
|
||||
let path = path.as_ref().join(format!("indexes/{}", self.uuid));
|
||||
|
||||
create_dir_all(&path)?;
|
||||
|
||||
self.dump_documents(&txn, &path)?;
|
||||
self.dump_meta(&txn, &path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
||||
let document_file_path = path.as_ref().join(DATA_FILE_NAME);
|
||||
let mut document_file = File::create(&document_file_path)?;
|
||||
|
||||
let documents = self.all_documents(txn)?;
|
||||
let fields_ids_map = self.fields_ids_map(txn)?;
|
||||
|
||||
// dump documents
|
||||
let mut json_map = IndexMap::new();
|
||||
for document in documents {
|
||||
let (_, reader) = document?;
|
||||
|
||||
for (fid, bytes) in reader.iter() {
|
||||
if let Some(name) = fields_ids_map.name(fid) {
|
||||
json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?);
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_writer(&mut document_file, &json_map)?;
|
||||
document_file.write_all(b"\n")?;
|
||||
|
||||
json_map.clear();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
||||
let meta_file_path = path.as_ref().join(META_FILE_NAME);
|
||||
let mut meta_file = File::create(&meta_file_path)?;
|
||||
|
||||
let settings = self.settings_txn(txn)?.into_unchecked();
|
||||
let primary_key = self.primary_key(txn)?.map(String::from);
|
||||
let meta = DumpMeta {
|
||||
settings,
|
||||
primary_key,
|
||||
};
|
||||
|
||||
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
size: usize,
|
||||
indexer_config: &IndexerConfig,
|
||||
) -> anyhow::Result<()> {
|
||||
let dir_name = src
|
||||
.as_ref()
|
||||
.file_name()
|
||||
.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
||||
|
||||
let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
||||
create_dir_all(&dst_dir_path)?;
|
||||
|
||||
let meta_path = src.as_ref().join(META_FILE_NAME);
|
||||
let meta_file = File::open(meta_path)?;
|
||||
let DumpMeta {
|
||||
settings,
|
||||
primary_key,
|
||||
} = serde_json::from_reader(meta_file)?;
|
||||
let settings = settings.check();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(size);
|
||||
let index = milli::Index::new(options, &dst_dir_path)?;
|
||||
|
||||
let mut txn = index.write_txn()?;
|
||||
|
||||
// Apply settings first
|
||||
let mut builder = milli::update::Settings::new(&mut txn, &index, indexer_config);
|
||||
|
||||
if let Some(primary_key) = primary_key {
|
||||
builder.set_primary_key(primary_key);
|
||||
}
|
||||
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
|
||||
builder.execute(|_| ())?;
|
||||
|
||||
let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
||||
let reader = BufReader::new(File::open(&document_file_path)?);
|
||||
|
||||
let mut tmp_doc_file = tempfile::tempfile()?;
|
||||
|
||||
let empty = match read_ndjson(reader, &mut tmp_doc_file) {
|
||||
// if there was no document in the file it's because the index was empty
|
||||
Ok(0) => true,
|
||||
Ok(_) => false,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
if !empty {
|
||||
tmp_doc_file.seek(SeekFrom::Start(0))?;
|
||||
|
||||
let documents_reader = DocumentsBatchReader::from_reader(tmp_doc_file)?;
|
||||
|
||||
//If the document file is empty, we don't perform the document addition, to prevent
|
||||
//a primary key error to be thrown.
|
||||
let config = IndexDocumentsConfig::default();
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
&mut txn,
|
||||
&index,
|
||||
indexer_config,
|
||||
config,
|
||||
|_| (),
|
||||
)?;
|
||||
let (builder, user_error) = builder.add_documents(documents_reader)?;
|
||||
user_error?;
|
||||
builder.execute()?;
|
||||
}
|
||||
|
||||
txn.commit()?;
|
||||
index.prepare_for_closing().wait();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
@@ -1,61 +0,0 @@
|
||||
use std::error::Error;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::internal_error;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::{error::MilliError, update_file_store};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum IndexError {
|
||||
#[error("An internal error has occurred. `{0}`.")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("Document `{0}` not found.")]
|
||||
DocumentNotFound(String),
|
||||
#[error("{0}")]
|
||||
Facet(#[from] FacetError),
|
||||
#[error("{0}")]
|
||||
Milli(#[from] milli::Error),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
IndexError: std::io::Error,
|
||||
milli::heed::Error,
|
||||
fst::Error,
|
||||
serde_json::Error,
|
||||
update_file_store::UpdateFileStoreError,
|
||||
milli::documents::Error
|
||||
);
|
||||
|
||||
impl ErrorCode for IndexError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexError::Internal(_) => Code::Internal,
|
||||
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
|
||||
IndexError::Facet(e) => e.error_code(),
|
||||
IndexError::Milli(e) => MilliError(e).error_code(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<milli::UserError> for IndexError {
|
||||
fn from(error: milli::UserError) -> IndexError {
|
||||
IndexError::Milli(error.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum FacetError {
|
||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
}
|
||||
|
||||
impl ErrorCode for FacetError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
FacetError::InvalidExpression(_, _) => Code::Filter,
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,332 +0,0 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs::create_dir_all;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use fst::IntoStreamer;
|
||||
use milli::heed::{CompactionOption, EnvOpenOptions, RoTxn};
|
||||
use milli::update::{IndexerConfig, Setting};
|
||||
use milli::{obkv_to_json, FieldDistribution, DEFAULT_VALUES_PER_FACET};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::index::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
|
||||
use super::error::IndexError;
|
||||
use super::error::Result;
|
||||
use super::updates::{FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, TypoSettings};
|
||||
use super::{Checked, Settings};
|
||||
|
||||
pub type Document = Map<String, Value>;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexMeta {
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub created_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub updated_at: OffsetDateTime,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
impl IndexMeta {
|
||||
pub fn new(index: &Index) -> Result<Self> {
|
||||
let txn = index.read_txn()?;
|
||||
Self::new_txn(index, &txn)
|
||||
}
|
||||
|
||||
pub fn new_txn(index: &Index, txn: &milli::heed::RoTxn) -> Result<Self> {
|
||||
let created_at = index.created_at(txn)?;
|
||||
let updated_at = index.updated_at(txn)?;
|
||||
let primary_key = index.primary_key(txn)?.map(String::from);
|
||||
Ok(Self {
|
||||
created_at,
|
||||
updated_at,
|
||||
primary_key,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexStats {
|
||||
#[serde(skip)]
|
||||
pub size: u64,
|
||||
pub number_of_documents: u64,
|
||||
/// Whether the current index is performing an update. It is initially `None` when the
|
||||
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
|
||||
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
|
||||
pub is_indexing: Option<bool>,
|
||||
pub field_distribution: FieldDistribution,
|
||||
}
|
||||
|
||||
#[derive(Clone, derivative::Derivative)]
|
||||
#[derivative(Debug)]
|
||||
pub struct Index {
|
||||
pub uuid: Uuid,
|
||||
#[derivative(Debug = "ignore")]
|
||||
pub inner: Arc<milli::Index>,
|
||||
#[derivative(Debug = "ignore")]
|
||||
pub indexer_config: Arc<IndexerConfig>,
|
||||
}
|
||||
|
||||
impl Deref for Index {
|
||||
type Target = milli::Index;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.inner.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn open(
|
||||
path: impl AsRef<Path>,
|
||||
size: usize,
|
||||
uuid: Uuid,
|
||||
update_handler: Arc<IndexerConfig>,
|
||||
) -> Result<Self> {
|
||||
log::debug!("opening index in {}", path.as_ref().display());
|
||||
create_dir_all(&path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(size);
|
||||
let inner = Arc::new(milli::Index::new(options, &path)?);
|
||||
Ok(Index {
|
||||
inner,
|
||||
uuid,
|
||||
indexer_config: update_handler,
|
||||
})
|
||||
}
|
||||
|
||||
/// Asynchronously close the underlying index
|
||||
pub fn close(self) {
|
||||
self.inner.as_ref().clone().prepare_for_closing();
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> Result<IndexStats> {
|
||||
let rtxn = self.read_txn()?;
|
||||
|
||||
Ok(IndexStats {
|
||||
size: self.size(),
|
||||
number_of_documents: self.number_of_documents(&rtxn)?,
|
||||
is_indexing: None,
|
||||
field_distribution: self.field_distribution(&rtxn)?,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> Result<IndexMeta> {
|
||||
IndexMeta::new(self)
|
||||
}
|
||||
pub fn settings(&self) -> Result<Settings<Checked>> {
|
||||
let txn = self.read_txn()?;
|
||||
self.settings_txn(&txn)
|
||||
}
|
||||
|
||||
pub fn uuid(&self) -> Uuid {
|
||||
self.uuid
|
||||
}
|
||||
|
||||
pub fn settings_txn(&self, txn: &RoTxn) -> Result<Settings<Checked>> {
|
||||
let displayed_attributes = self
|
||||
.displayed_fields(txn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let searchable_attributes = self
|
||||
.user_defined_searchable_fields(txn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect();
|
||||
|
||||
let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect();
|
||||
|
||||
let criteria = self
|
||||
.criteria(txn)?
|
||||
.into_iter()
|
||||
.map(|c| c.to_string())
|
||||
.collect();
|
||||
|
||||
let stop_words = self
|
||||
.stop_words(txn)?
|
||||
.map(|stop_words| -> Result<BTreeSet<_>> {
|
||||
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
let distinct_field = self.distinct_field(txn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = self
|
||||
.synonyms(txn)?
|
||||
.iter()
|
||||
.map(|(key, values)| {
|
||||
(
|
||||
key.join(" "),
|
||||
values.iter().map(|value| value.join(" ")).collect(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
one_typo: Setting::Set(self.min_word_len_one_typo(txn)?),
|
||||
two_typos: Setting::Set(self.min_word_len_two_typos(txn)?),
|
||||
};
|
||||
|
||||
let disabled_words = match self.exact_words(txn)? {
|
||||
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
|
||||
None => BTreeSet::new(),
|
||||
};
|
||||
|
||||
let disabled_attributes = self
|
||||
.exact_attributes(txn)?
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
|
||||
let typo_tolerance = TypoSettings {
|
||||
enabled: Setting::Set(self.authorize_typos(txn)?),
|
||||
min_word_size_for_typos: Setting::Set(min_typo_word_len),
|
||||
disable_on_words: Setting::Set(disabled_words),
|
||||
disable_on_attributes: Setting::Set(disabled_attributes),
|
||||
};
|
||||
|
||||
let faceting = FacetingSettings {
|
||||
max_values_per_facet: Setting::Set(
|
||||
self.max_values_per_facet(txn)?
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||
),
|
||||
};
|
||||
|
||||
let pagination = PaginationSettings {
|
||||
max_total_hits: Setting::Set(
|
||||
self.pagination_max_total_hits(txn)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||
),
|
||||
};
|
||||
|
||||
Ok(Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
searchable_attributes: match searchable_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
filterable_attributes: Setting::Set(filterable_attributes),
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
synonyms: Setting::Set(synonyms),
|
||||
typo_tolerance: Setting::Set(typo_tolerance),
|
||||
faceting: Setting::Set(faceting),
|
||||
pagination: Setting::Set(pagination),
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the total number of documents contained in the index + the selected documents.
|
||||
pub fn retrieve_documents<S: AsRef<str>>(
|
||||
&self,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<(u64, Vec<Document>)> {
|
||||
let txn = self.read_txn()?;
|
||||
|
||||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
let mut documents = Vec::new();
|
||||
for entry in self.all_documents(&txn)?.skip(offset).take(limit) {
|
||||
let (_id, obkv) = entry?;
|
||||
let document = obkv_to_json(&all_fields, &fields_ids_map, obkv)?;
|
||||
let document = match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||
),
|
||||
None => document,
|
||||
};
|
||||
documents.push(document);
|
||||
}
|
||||
|
||||
let number_of_documents = self.number_of_documents(&txn)?;
|
||||
|
||||
Ok((number_of_documents, documents))
|
||||
}
|
||||
|
||||
pub fn retrieve_document<S: AsRef<str>>(
|
||||
&self,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Document> {
|
||||
let txn = self.read_txn()?;
|
||||
|
||||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
let internal_id = self
|
||||
.external_documents_ids(&txn)?
|
||||
.get(doc_id.as_bytes())
|
||||
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?;
|
||||
|
||||
let document = self
|
||||
.documents(&txn, std::iter::once(internal_id))?
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|(_, d)| d)
|
||||
.ok_or(IndexError::DocumentNotFound(doc_id))?;
|
||||
|
||||
let document = obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||
let document = match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||
),
|
||||
None => document,
|
||||
};
|
||||
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
WalkDir::new(self.path())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len())
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
let mut dst = path.as_ref().join(format!("indexes/{}/", self.uuid));
|
||||
create_dir_all(&dst)?;
|
||||
dst.push("data.mdb");
|
||||
let _txn = self.write_txn()?;
|
||||
self.inner.copy_to_path(dst, CompactionOption::Enabled)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// When running tests, when a server instance is dropped, the environment is not actually closed,
|
||||
/// leaving a lot of open file descriptors.
|
||||
impl Drop for Index {
|
||||
fn drop(&mut self) {
|
||||
// When dropping the last instance of an index, we want to close the index
|
||||
// Note that the close is actually performed only if all the instances a effectively
|
||||
// dropped
|
||||
|
||||
if Arc::strong_count(&self.inner) == 1 {
|
||||
self.inner.as_ref().clone().prepare_for_closing();
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,249 +0,0 @@
|
||||
pub use search::{
|
||||
MatchingStrategy, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
};
|
||||
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};
|
||||
|
||||
mod dump;
|
||||
pub mod error;
|
||||
mod search;
|
||||
pub mod updates;
|
||||
|
||||
#[allow(clippy::module_inception)]
|
||||
mod index;
|
||||
|
||||
pub use index::{Document, IndexMeta, IndexStats};
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub use index::Index;
|
||||
|
||||
#[cfg(test)]
|
||||
pub use test::MockIndex as Index;
|
||||
|
||||
/// The index::test module provides means of mocking an index instance. I can be used throughout the
|
||||
/// code for unit testing, in places where an index would normally be used.
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use milli::update::{
|
||||
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, IndexerConfig,
|
||||
};
|
||||
use nelson::Mocker;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::Result;
|
||||
use super::index::Index;
|
||||
use super::Document;
|
||||
use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum MockIndex {
|
||||
Real(Index),
|
||||
Mock(Arc<Mocker>),
|
||||
}
|
||||
|
||||
impl MockIndex {
|
||||
pub fn mock(mocker: Mocker) -> Self {
|
||||
Self::Mock(Arc::new(mocker))
|
||||
}
|
||||
|
||||
pub fn open(
|
||||
path: impl AsRef<Path>,
|
||||
size: usize,
|
||||
uuid: Uuid,
|
||||
update_handler: Arc<IndexerConfig>,
|
||||
) -> Result<Self> {
|
||||
let index = Index::open(path, size, uuid, update_handler)?;
|
||||
Ok(Self::Real(index))
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
size: usize,
|
||||
update_handler: &IndexerConfig,
|
||||
) -> anyhow::Result<()> {
|
||||
Index::load_dump(src, dst, size, update_handler)
|
||||
}
|
||||
|
||||
pub fn uuid(&self) -> Uuid {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.uuid(),
|
||||
MockIndex::Mock(m) => unsafe { m.get("uuid").call(()) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> Result<IndexStats> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.stats(),
|
||||
MockIndex::Mock(m) => unsafe { m.get("stats").call(()) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> Result<IndexMeta> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.meta(),
|
||||
MockIndex::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
pub fn settings(&self) -> Result<Settings<Checked>> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.settings(),
|
||||
MockIndex::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn retrieve_documents<S: AsRef<str>>(
|
||||
&self,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<(u64, Vec<Document>)> {
|
||||
match self {
|
||||
MockIndex::Real(index) => {
|
||||
index.retrieve_documents(offset, limit, attributes_to_retrieve)
|
||||
}
|
||||
MockIndex::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn retrieve_document<S: AsRef<str>>(
|
||||
&self,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Document> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.retrieve_document(doc_id, attributes_to_retrieve),
|
||||
MockIndex::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.size(),
|
||||
MockIndex::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.snapshot(path),
|
||||
MockIndex::Mock(m) => unsafe { m.get("snapshot").call(path.as_ref()) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn close(self) {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.close(),
|
||||
MockIndex::Mock(m) => unsafe { m.get("close").call(()) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.perform_search(query),
|
||||
MockIndex::Mock(m) => unsafe { m.get("perform_search").call(query) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.dump(path),
|
||||
MockIndex::Mock(m) => unsafe { m.get("dump").call(path.as_ref()) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_documents(
|
||||
&self,
|
||||
method: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
file_store: UpdateFileStore,
|
||||
contents: impl Iterator<Item = Uuid>,
|
||||
) -> Result<Vec<Result<DocumentAdditionResult>>> {
|
||||
match self {
|
||||
MockIndex::Real(index) => {
|
||||
index.update_documents(method, primary_key, file_store, contents)
|
||||
}
|
||||
MockIndex::Mock(mocker) => unsafe {
|
||||
mocker
|
||||
.get("update_documents")
|
||||
.call((method, primary_key, file_store, contents))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.update_settings(settings),
|
||||
MockIndex::Mock(m) => unsafe { m.get("update_settings").call(settings) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_primary_key(&self, primary_key: String) -> Result<IndexMeta> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.update_primary_key(primary_key),
|
||||
MockIndex::Mock(m) => unsafe { m.get("update_primary_key").call(primary_key) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_documents(&self, ids: &[String]) -> Result<DocumentDeletionResult> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.delete_documents(ids),
|
||||
MockIndex::Mock(m) => unsafe { m.get("delete_documents").call(ids) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear_documents(&self) -> Result<()> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.clear_documents(),
|
||||
MockIndex::Mock(m) => unsafe { m.get("clear_documents").call(()) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_faux_index() {
|
||||
let faux = Mocker::default();
|
||||
faux.when("snapshot")
|
||||
.times(2)
|
||||
.then(|_: &Path| -> Result<()> { Ok(()) });
|
||||
|
||||
let index = MockIndex::mock(faux);
|
||||
|
||||
let path = PathBuf::from("hello");
|
||||
index.snapshot(&path).unwrap();
|
||||
index.snapshot(&path).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_faux_unexisting_method_stub() {
|
||||
let faux = Mocker::default();
|
||||
|
||||
let index = MockIndex::mock(faux);
|
||||
|
||||
let path = PathBuf::from("hello");
|
||||
index.snapshot(&path).unwrap();
|
||||
index.snapshot(&path).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_faux_panic() {
|
||||
let faux = Mocker::default();
|
||||
faux.when("snapshot")
|
||||
.times(2)
|
||||
.then(|_: &Path| -> Result<()> {
|
||||
panic!();
|
||||
});
|
||||
|
||||
let index = MockIndex::mock(faux);
|
||||
|
||||
let path = PathBuf::from("hello");
|
||||
index.snapshot(&path).unwrap();
|
||||
index.snapshot(&path).unwrap();
|
||||
}
|
||||
}
|
@@ -1,688 +0,0 @@
|
||||
use std::cmp::min;
|
||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::str::FromStr;
|
||||
use std::time::Instant;
|
||||
|
||||
use either::Either;
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
use milli::{
|
||||
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError,
|
||||
TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::index::error::FacetError;
|
||||
|
||||
use super::error::{IndexError, Result};
|
||||
use super::index::Index;
|
||||
|
||||
pub type Document = serde_json::Map<String, Value>;
|
||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
||||
|
||||
pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
|
||||
pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
||||
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
|
||||
/// The maximimum number of results that the engine
|
||||
/// will be able to return in one search call.
|
||||
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct SearchQuery {
|
||||
pub q: Option<String>,
|
||||
pub offset: Option<usize>,
|
||||
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
|
||||
pub limit: usize,
|
||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
pub attributes_to_crop: Option<Vec<String>>,
|
||||
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
||||
pub crop_length: usize,
|
||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
||||
// Default to false
|
||||
#[serde(default = "Default::default")]
|
||||
pub show_matches_position: bool,
|
||||
pub filter: Option<Value>,
|
||||
pub sort: Option<Vec<String>>,
|
||||
pub facets: Option<Vec<String>>,
|
||||
#[serde(default = "DEFAULT_HIGHLIGHT_PRE_TAG")]
|
||||
pub highlight_pre_tag: String,
|
||||
#[serde(default = "DEFAULT_HIGHLIGHT_POST_TAG")]
|
||||
pub highlight_post_tag: String,
|
||||
#[serde(default = "DEFAULT_CROP_MARKER")]
|
||||
pub crop_marker: String,
|
||||
#[serde(default)]
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum MatchingStrategy {
|
||||
/// Remove query words from last to first
|
||||
Last,
|
||||
/// All query words are mandatory
|
||||
All,
|
||||
}
|
||||
|
||||
impl Default for MatchingStrategy {
|
||||
fn default() -> Self {
|
||||
Self::Last
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MatchingStrategy> for TermsMatchingStrategy {
|
||||
fn from(other: MatchingStrategy) -> Self {
|
||||
match other {
|
||||
MatchingStrategy::Last => Self::Last,
|
||||
MatchingStrategy::All => Self::All,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
pub struct SearchHit {
|
||||
#[serde(flatten)]
|
||||
pub document: Document,
|
||||
#[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")]
|
||||
pub formatted: Document,
|
||||
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
|
||||
pub matches_position: Option<MatchesPosition>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResult {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub estimated_total_hits: u64,
|
||||
pub query: String,
|
||||
pub limit: usize,
|
||||
pub offset: usize,
|
||||
pub processing_time_ms: u128,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = self.read_txn()?;
|
||||
|
||||
let mut search = self.search(&rtxn);
|
||||
|
||||
if let Some(ref query) = query.q {
|
||||
search.query(query);
|
||||
}
|
||||
|
||||
search.terms_matching_strategy(query.matching_strategy.into());
|
||||
|
||||
let max_total_hits = self
|
||||
.pagination_max_total_hits(&rtxn)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
// Make sure that a user can't get more documents than the hard limit,
|
||||
// we align that on the offset too.
|
||||
let offset = min(query.offset.unwrap_or(0), max_total_hits);
|
||||
let limit = min(query.limit, max_total_hits.saturating_sub(offset));
|
||||
|
||||
search.offset(offset);
|
||||
search.limit(limit);
|
||||
|
||||
if let Some(ref filter) = query.filter {
|
||||
if let Some(facets) = parse_filter(filter)? {
|
||||
search.filter(facets);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref sort) = query.sort {
|
||||
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
|
||||
Ok(sorts) => sorts,
|
||||
Err(asc_desc_error) => {
|
||||
return Err(IndexError::Milli(SortError::from(asc_desc_error).into()))
|
||||
}
|
||||
};
|
||||
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
|
||||
let milli::SearchResult {
|
||||
documents_ids,
|
||||
matching_words,
|
||||
candidates,
|
||||
..
|
||||
} = search.execute()?;
|
||||
|
||||
let fields_ids_map = self.fields_ids_map(&rtxn).unwrap();
|
||||
|
||||
let displayed_ids = self
|
||||
.displayed_fields_ids(&rtxn)?
|
||||
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||
|
||||
let fids = |attrs: &BTreeSet<String>| {
|
||||
let mut ids = BTreeSet::new();
|
||||
for attr in attrs {
|
||||
if attr == "*" {
|
||||
ids = displayed_ids.clone();
|
||||
break;
|
||||
}
|
||||
|
||||
if let Some(id) = fields_ids_map.id(attr) {
|
||||
ids.insert(id);
|
||||
}
|
||||
}
|
||||
ids
|
||||
};
|
||||
|
||||
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
|
||||
// but these attributes must be also be present
|
||||
// - in the fields_ids_map
|
||||
// - in the the displayed attributes
|
||||
let to_retrieve_ids: BTreeSet<_> = query
|
||||
.attributes_to_retrieve
|
||||
.as_ref()
|
||||
.map(fids)
|
||||
.unwrap_or_else(|| displayed_ids.clone())
|
||||
.intersection(&displayed_ids)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default();
|
||||
|
||||
let attr_to_crop = query.attributes_to_crop.unwrap_or_default();
|
||||
|
||||
// Attributes in `formatted_options` correspond to the attributes that will be in `_formatted`
|
||||
// These attributes are:
|
||||
// - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
|
||||
// - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
|
||||
// But these attributes must be also present in displayed attributes
|
||||
let formatted_options = compute_formatted_options(
|
||||
&attr_to_highlight,
|
||||
&attr_to_crop,
|
||||
query.crop_length,
|
||||
&to_retrieve_ids,
|
||||
&fields_ids_map,
|
||||
&displayed_ids,
|
||||
);
|
||||
|
||||
let tokenizer = TokenizerBuilder::default().build();
|
||||
|
||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
|
||||
formatter_builder.crop_marker(query.crop_marker);
|
||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
||||
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
let documents_iter = self.documents(&rtxn, documents_ids)?;
|
||||
|
||||
for (_id, obkv) in documents_iter {
|
||||
// First generate a document with all the displayed fields
|
||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||
|
||||
// select the attributes to retrieve
|
||||
let attributes_to_retrieve = to_retrieve_ids
|
||||
.iter()
|
||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
||||
let mut document =
|
||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||
|
||||
let (matches_position, formatted) = format_fields(
|
||||
&displayed_document,
|
||||
&fields_ids_map,
|
||||
&formatter_builder,
|
||||
&formatted_options,
|
||||
query.show_matches_position,
|
||||
&displayed_ids,
|
||||
)?;
|
||||
|
||||
if let Some(sort) = query.sort.as_ref() {
|
||||
insert_geo_distance(sort, &mut document);
|
||||
}
|
||||
|
||||
let hit = SearchHit {
|
||||
document,
|
||||
formatted,
|
||||
matches_position,
|
||||
};
|
||||
documents.push(hit);
|
||||
}
|
||||
|
||||
let estimated_total_hits = candidates.len();
|
||||
|
||||
let facet_distribution = match query.facets {
|
||||
Some(ref fields) => {
|
||||
let mut facet_distribution = self.facets_distribution(&rtxn);
|
||||
|
||||
let max_values_by_facet = self
|
||||
.max_values_per_facet(&rtxn)?
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
||||
facet_distribution.max_values_per_facet(max_values_by_facet);
|
||||
|
||||
if fields.iter().all(|f| f != "*") {
|
||||
facet_distribution.facets(fields);
|
||||
}
|
||||
let distribution = facet_distribution.candidates(candidates).execute()?;
|
||||
|
||||
Some(distribution)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let result = SearchResult {
|
||||
hits: documents,
|
||||
estimated_total_hits,
|
||||
query: query.q.clone().unwrap_or_default(),
|
||||
limit: query.limit,
|
||||
offset: query.offset.unwrap_or_default(),
|
||||
processing_time_ms: before_search.elapsed().as_millis(),
|
||||
facet_distribution,
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
lazy_static::lazy_static! {
|
||||
static ref GEO_REGEX: Regex =
|
||||
Regex::new(r"_geoPoint\(\s*([[:digit:].\-]+)\s*,\s*([[:digit:].\-]+)\s*\)").unwrap();
|
||||
};
|
||||
if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) {
|
||||
// TODO: TAMO: milli encountered an internal error, what do we want to do?
|
||||
let base = [
|
||||
capture_group[1].parse().unwrap(),
|
||||
capture_group[2].parse().unwrap(),
|
||||
];
|
||||
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
|
||||
if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
|
||||
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
|
||||
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_formatted_options(
|
||||
attr_to_highlight: &HashSet<String>,
|
||||
attr_to_crop: &[String],
|
||||
query_crop_length: usize,
|
||||
to_retrieve_ids: &BTreeSet<FieldId>,
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
displayed_ids: &BTreeSet<FieldId>,
|
||||
) -> BTreeMap<FieldId, FormatOptions> {
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
|
||||
add_highlight_to_formatted_options(
|
||||
&mut formatted_options,
|
||||
attr_to_highlight,
|
||||
fields_ids_map,
|
||||
displayed_ids,
|
||||
);
|
||||
|
||||
add_crop_to_formatted_options(
|
||||
&mut formatted_options,
|
||||
attr_to_crop,
|
||||
query_crop_length,
|
||||
fields_ids_map,
|
||||
displayed_ids,
|
||||
);
|
||||
|
||||
// Should not return `_formatted` if no valid attributes to highlight/crop
|
||||
if !formatted_options.is_empty() {
|
||||
add_non_formatted_ids_to_formatted_options(&mut formatted_options, to_retrieve_ids);
|
||||
}
|
||||
|
||||
formatted_options
|
||||
}
|
||||
|
||||
fn add_highlight_to_formatted_options(
|
||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
||||
attr_to_highlight: &HashSet<String>,
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
displayed_ids: &BTreeSet<FieldId>,
|
||||
) {
|
||||
for attr in attr_to_highlight {
|
||||
let new_format = FormatOptions {
|
||||
highlight: true,
|
||||
crop: None,
|
||||
};
|
||||
|
||||
if attr == "*" {
|
||||
for id in displayed_ids {
|
||||
formatted_options.insert(*id, new_format);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if let Some(id) = fields_ids_map.id(attr) {
|
||||
if displayed_ids.contains(&id) {
|
||||
formatted_options.insert(id, new_format);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_crop_to_formatted_options(
|
||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
||||
attr_to_crop: &[String],
|
||||
crop_length: usize,
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
displayed_ids: &BTreeSet<FieldId>,
|
||||
) {
|
||||
for attr in attr_to_crop {
|
||||
let mut split = attr.rsplitn(2, ':');
|
||||
let (attr_name, attr_len) = match split.next().zip(split.next()) {
|
||||
Some((len, name)) => {
|
||||
let crop_len = len.parse::<usize>().unwrap_or(crop_length);
|
||||
(name, crop_len)
|
||||
}
|
||||
None => (attr.as_str(), crop_length),
|
||||
};
|
||||
|
||||
if attr_name == "*" {
|
||||
for id in displayed_ids {
|
||||
formatted_options
|
||||
.entry(*id)
|
||||
.and_modify(|f| f.crop = Some(attr_len))
|
||||
.or_insert(FormatOptions {
|
||||
highlight: false,
|
||||
crop: Some(attr_len),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(id) = fields_ids_map.id(attr_name) {
|
||||
if displayed_ids.contains(&id) {
|
||||
formatted_options
|
||||
.entry(id)
|
||||
.and_modify(|f| f.crop = Some(attr_len))
|
||||
.or_insert(FormatOptions {
|
||||
highlight: false,
|
||||
crop: Some(attr_len),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_non_formatted_ids_to_formatted_options(
|
||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
||||
to_retrieve_ids: &BTreeSet<FieldId>,
|
||||
) {
|
||||
for id in to_retrieve_ids {
|
||||
formatted_options.entry(*id).or_insert(FormatOptions {
|
||||
highlight: false,
|
||||
crop: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn make_document(
|
||||
displayed_attributes: &BTreeSet<FieldId>,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
obkv: obkv::KvReaderU16,
|
||||
) -> Result<Document> {
|
||||
let mut document = serde_json::Map::new();
|
||||
|
||||
// recreate the original json
|
||||
for (key, value) in obkv.iter() {
|
||||
let value = serde_json::from_slice(value)?;
|
||||
let key = field_ids_map
|
||||
.name(key)
|
||||
.expect("Missing field name")
|
||||
.to_string();
|
||||
|
||||
document.insert(key, value);
|
||||
}
|
||||
|
||||
// select the attributes to retrieve
|
||||
let displayed_attributes = displayed_attributes
|
||||
.iter()
|
||||
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
||||
|
||||
let document = permissive_json_pointer::select_values(&document, displayed_attributes);
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
fn format_fields<'a, A: AsRef<[u8]>>(
|
||||
document: &Document,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
builder: &MatcherBuilder<'a, A>,
|
||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
||||
compute_matches: bool,
|
||||
displayable_ids: &BTreeSet<FieldId>,
|
||||
) -> Result<(Option<MatchesPosition>, Document)> {
|
||||
let mut matches_position = compute_matches.then(BTreeMap::new);
|
||||
let mut document = document.clone();
|
||||
|
||||
// select the attributes to retrieve
|
||||
let displayable_names = displayable_ids
|
||||
.iter()
|
||||
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
||||
permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| {
|
||||
// To get the formatting option of each key we need to see all the rules that applies
|
||||
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
|
||||
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
|
||||
// highlighted.
|
||||
let format = formatted_options
|
||||
.iter()
|
||||
.filter(|(field, _option)| {
|
||||
let name = field_ids_map.name(**field).unwrap();
|
||||
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
|
||||
})
|
||||
.map(|(_, option)| *option)
|
||||
.reduce(|acc, option| acc.merge(option));
|
||||
let mut infos = Vec::new();
|
||||
|
||||
*value = format_value(
|
||||
std::mem::take(value),
|
||||
builder,
|
||||
format,
|
||||
&mut infos,
|
||||
compute_matches,
|
||||
);
|
||||
|
||||
if let Some(matches) = matches_position.as_mut() {
|
||||
if !infos.is_empty() {
|
||||
matches.insert(key.to_owned(), infos);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let selectors = formatted_options
|
||||
.keys()
|
||||
// This unwrap must be safe since we got the ids from the fields_ids_map just
|
||||
// before.
|
||||
.map(|&fid| field_ids_map.name(fid).unwrap());
|
||||
let document = permissive_json_pointer::select_values(&document, selectors);
|
||||
|
||||
Ok((matches_position, document))
|
||||
}
|
||||
|
||||
fn format_value<'a, A: AsRef<[u8]>>(
|
||||
value: Value,
|
||||
builder: &MatcherBuilder<'a, A>,
|
||||
format_options: Option<FormatOptions>,
|
||||
infos: &mut Vec<MatchBounds>,
|
||||
compute_matches: bool,
|
||||
) -> Value {
|
||||
match value {
|
||||
Value::String(old_string) => {
|
||||
let mut matcher = builder.build(&old_string);
|
||||
if compute_matches {
|
||||
let matches = matcher.matches();
|
||||
infos.extend_from_slice(&matches[..]);
|
||||
}
|
||||
|
||||
match format_options {
|
||||
Some(format_options) => {
|
||||
let value = matcher.format(format_options);
|
||||
Value::String(value.into_owned())
|
||||
}
|
||||
None => Value::String(old_string),
|
||||
}
|
||||
}
|
||||
Value::Array(values) => Value::Array(
|
||||
values
|
||||
.into_iter()
|
||||
.map(|v| {
|
||||
format_value(
|
||||
v,
|
||||
builder,
|
||||
format_options.map(|format_options| FormatOptions {
|
||||
highlight: format_options.highlight,
|
||||
crop: None,
|
||||
}),
|
||||
infos,
|
||||
compute_matches,
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
),
|
||||
Value::Object(object) => Value::Object(
|
||||
object
|
||||
.into_iter()
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
k,
|
||||
format_value(
|
||||
v,
|
||||
builder,
|
||||
format_options.map(|format_options| FormatOptions {
|
||||
highlight: format_options.highlight,
|
||||
crop: None,
|
||||
}),
|
||||
infos,
|
||||
compute_matches,
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
),
|
||||
Value::Number(number) => {
|
||||
let s = number.to_string();
|
||||
|
||||
let mut matcher = builder.build(&s);
|
||||
if compute_matches {
|
||||
let matches = matcher.matches();
|
||||
infos.extend_from_slice(&matches[..]);
|
||||
}
|
||||
|
||||
match format_options {
|
||||
Some(format_options) => {
|
||||
let value = matcher.format(format_options);
|
||||
Value::String(value.into_owned())
|
||||
}
|
||||
None => Value::Number(number),
|
||||
}
|
||||
}
|
||||
value => value,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_filter(facets: &Value) -> Result<Option<Filter>> {
|
||||
match facets {
|
||||
Value::String(expr) => {
|
||||
let condition = Filter::from_str(expr)?;
|
||||
Ok(condition)
|
||||
}
|
||||
Value::Array(arr) => parse_filter_array(arr),
|
||||
v => Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>> {
|
||||
let mut ands = Vec::new();
|
||||
for value in arr {
|
||||
match value {
|
||||
Value::String(s) => ands.push(Either::Right(s.as_str())),
|
||||
Value::Array(arr) => {
|
||||
let mut ors = Vec::new();
|
||||
for value in arr {
|
||||
match value {
|
||||
Value::String(s) => ors.push(s.as_str()),
|
||||
v => {
|
||||
return Err(FacetError::InvalidExpression(&["String"], v.clone()).into())
|
||||
}
|
||||
}
|
||||
}
|
||||
ands.push(Either::Left(ors));
|
||||
}
|
||||
v => {
|
||||
return Err(
|
||||
FacetError::InvalidExpression(&["String", "[String]"], v.clone()).into(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Filter::from_array(ands)?)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_insert_geo_distance() {
|
||||
let value: Document = serde_json::from_str(
|
||||
r#"{
|
||||
"_geo": {
|
||||
"lat": 50.629973371633746,
|
||||
"lng": 3.0569447399419567
|
||||
},
|
||||
"city": "Lille",
|
||||
"id": "1"
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let sorters = &["_geoPoint(50.629973371633746,3.0569447399419567):desc".to_string()];
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
let sorters = &["_geoPoint(50.629973371633746, 3.0569447399419567):asc".to_string()];
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
let sorters =
|
||||
&["_geoPoint( 50.629973371633746 , 3.0569447399419567 ):desc".to_string()];
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
let sorters = &[
|
||||
"prix:asc",
|
||||
"villeneuve:desc",
|
||||
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
|
||||
"ubu:asc",
|
||||
]
|
||||
.map(|s| s.to_string());
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
// only the first geoPoint is used to compute the distance
|
||||
let sorters = &[
|
||||
"chien:desc",
|
||||
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
|
||||
"pangolin:desc",
|
||||
"_geoPoint(100.0, -80.0):asc",
|
||||
"chat:asc",
|
||||
]
|
||||
.map(|s| s.to_string());
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
// there was no _geoPoint so nothing is inserted in the document
|
||||
let sorters = &["chien:asc".to_string()];
|
||||
let mut document = value;
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), None);
|
||||
}
|
||||
}
|
@@ -1,559 +0,0 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::marker::PhantomData;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use log::{debug, info, trace};
|
||||
use milli::documents::DocumentsBatchReader;
|
||||
use milli::update::{
|
||||
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||
Setting,
|
||||
};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::{IndexError, Result};
|
||||
use super::index::{Index, IndexMeta};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
fn serialize_with_wildcard<S>(
|
||||
field: &Setting<Vec<String>>,
|
||||
s: S,
|
||||
) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let wildcard = vec!["*".to_string()];
|
||||
match field {
|
||||
Setting::Set(value) => Some(value),
|
||||
Setting::Reset => Some(&wildcard),
|
||||
Setting::NotSet => None,
|
||||
}
|
||||
.serialize(s)
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
|
||||
pub struct Checked;
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct Unchecked;
|
||||
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MinWordSizeTyposSetting {
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub one_typo: Setting<u8>,
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub two_typos: Setting<u8>,
|
||||
}
|
||||
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TypoSettings {
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub enabled: Setting<bool>,
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub disable_on_words: Setting<BTreeSet<String>>,
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub disable_on_attributes: Setting<BTreeSet<String>>,
|
||||
}
|
||||
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FacetingSettings {
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub max_values_per_facet: Setting<usize>,
|
||||
}
|
||||
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PaginationSettings {
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub max_total_hits: Setting<usize>,
|
||||
}
|
||||
|
||||
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
|
||||
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
|
||||
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub struct Settings<T> {
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub displayed_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub searchable_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub ranking_rules: Setting<Vec<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub distinct_attribute: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub typo_tolerance: Setting<TypoSettings>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub faceting: Setting<FacetingSettings>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub pagination: Setting<PaginationSettings>,
|
||||
|
||||
#[serde(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl Settings<Checked> {
|
||||
pub fn cleared() -> Settings<Checked> {
|
||||
Settings {
|
||||
displayed_attributes: Setting::Reset,
|
||||
searchable_attributes: Setting::Reset,
|
||||
filterable_attributes: Setting::Reset,
|
||||
sortable_attributes: Setting::Reset,
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
typo_tolerance: Setting::Reset,
|
||||
faceting: Setting::Reset,
|
||||
pagination: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_unchecked(self) -> Settings<Unchecked> {
|
||||
let Self {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
..
|
||||
} = self;
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Settings<Unchecked> {
|
||||
pub fn check(self) -> Settings<Checked> {
|
||||
let displayed_attributes = match self.displayed_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
let searchable_attributes = match self.searchable_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes: self.filterable_attributes,
|
||||
sortable_attributes: self.sortable_attributes,
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
faceting: self.faceting,
|
||||
pagination: self.pagination,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Facets {
|
||||
pub level_group_size: Option<NonZeroUsize>,
|
||||
pub min_level_size: Option<NonZeroUsize>,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
fn update_primary_key_txn<'a, 'b>(
|
||||
&'a self,
|
||||
txn: &mut milli::heed::RwTxn<'a, 'b>,
|
||||
primary_key: String,
|
||||
) -> Result<IndexMeta> {
|
||||
let mut builder = milli::update::Settings::new(txn, self, self.indexer_config.as_ref());
|
||||
builder.set_primary_key(primary_key);
|
||||
builder.execute(|_| ())?;
|
||||
let meta = IndexMeta::new_txn(self, txn)?;
|
||||
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
pub fn update_primary_key(&self, primary_key: String) -> Result<IndexMeta> {
|
||||
let mut txn = self.write_txn()?;
|
||||
let res = self.update_primary_key_txn(&mut txn, primary_key)?;
|
||||
txn.commit()?;
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
/// Deletes `ids` from the index, and returns how many documents were deleted.
|
||||
pub fn delete_documents(&self, ids: &[String]) -> Result<DocumentDeletionResult> {
|
||||
let mut txn = self.write_txn()?;
|
||||
let mut builder = milli::update::DeleteDocuments::new(&mut txn, self)?;
|
||||
|
||||
// We ignore unexisting document ids
|
||||
ids.iter().for_each(|id| {
|
||||
builder.delete_external_id(id);
|
||||
});
|
||||
|
||||
let deleted = builder.execute()?;
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
Ok(deleted)
|
||||
}
|
||||
|
||||
pub fn clear_documents(&self) -> Result<()> {
|
||||
let mut txn = self.write_txn()?;
|
||||
milli::update::ClearDocuments::new(&mut txn, self).execute()?;
|
||||
txn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn update_documents(
|
||||
&self,
|
||||
method: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
file_store: UpdateFileStore,
|
||||
contents: impl IntoIterator<Item = Uuid>,
|
||||
) -> Result<Vec<Result<DocumentAdditionResult>>> {
|
||||
trace!("performing document addition");
|
||||
let mut txn = self.write_txn()?;
|
||||
|
||||
if let Some(primary_key) = primary_key {
|
||||
if self.primary_key(&txn)?.is_none() {
|
||||
self.update_primary_key_txn(&mut txn, primary_key)?;
|
||||
}
|
||||
}
|
||||
|
||||
let config = IndexDocumentsConfig {
|
||||
update_method: method,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step);
|
||||
let mut builder = milli::update::IndexDocuments::new(
|
||||
&mut txn,
|
||||
self,
|
||||
self.indexer_config.as_ref(),
|
||||
config,
|
||||
indexing_callback,
|
||||
)?;
|
||||
|
||||
let mut results = Vec::new();
|
||||
for content_uuid in contents.into_iter() {
|
||||
let content_file = file_store.get_update(content_uuid)?;
|
||||
let reader = DocumentsBatchReader::from_reader(content_file)?;
|
||||
let (new_builder, user_result) = builder.add_documents(reader)?;
|
||||
builder = new_builder;
|
||||
|
||||
let user_result = match user_result {
|
||||
Ok(count) => Ok(DocumentAdditionResult {
|
||||
indexed_documents: count,
|
||||
number_of_documents: count,
|
||||
}),
|
||||
Err(e) => Err(IndexError::from(e)),
|
||||
};
|
||||
|
||||
results.push(user_result);
|
||||
}
|
||||
|
||||
if results.iter().any(Result::is_ok) {
|
||||
let addition = builder.execute()?;
|
||||
txn.commit()?;
|
||||
info!("document addition done: {:?}", addition);
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> {
|
||||
// We must use the write transaction of the update here.
|
||||
let mut txn = self.write_txn()?;
|
||||
let mut builder =
|
||||
milli::update::Settings::new(&mut txn, self, self.indexer_config.as_ref());
|
||||
|
||||
apply_settings_to_builder(settings, &mut builder);
|
||||
|
||||
builder.execute(|indexing_step| debug!("update: {:?}", indexing_step))?;
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_settings_to_builder(
|
||||
settings: &Settings<Checked>,
|
||||
builder: &mut milli::update::Settings,
|
||||
) {
|
||||
match settings.searchable_attributes {
|
||||
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
||||
Setting::Reset => builder.reset_searchable_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.displayed_attributes {
|
||||
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
||||
Setting::Reset => builder.reset_displayed_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.filterable_attributes {
|
||||
Setting::Set(ref facets) => {
|
||||
builder.set_filterable_fields(facets.clone().into_iter().collect())
|
||||
}
|
||||
Setting::Reset => builder.reset_filterable_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.sortable_attributes {
|
||||
Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()),
|
||||
Setting::Reset => builder.reset_sortable_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.ranking_rules {
|
||||
Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
|
||||
Setting::Reset => builder.reset_criteria(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.stop_words {
|
||||
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
||||
Setting::Reset => builder.reset_stop_words(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.synonyms {
|
||||
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
|
||||
Setting::Reset => builder.reset_synonyms(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.distinct_attribute {
|
||||
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
||||
Setting::Reset => builder.reset_distinct_field(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.typo_tolerance {
|
||||
Setting::Set(ref value) => {
|
||||
match value.enabled {
|
||||
Setting::Set(val) => builder.set_autorize_typos(val),
|
||||
Setting::Reset => builder.reset_authorize_typos(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match value.min_word_size_for_typos {
|
||||
Setting::Set(ref setting) => {
|
||||
match setting.one_typo {
|
||||
Setting::Set(val) => builder.set_min_word_len_one_typo(val),
|
||||
Setting::Reset => builder.reset_min_word_len_one_typo(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
match setting.two_typos {
|
||||
Setting::Set(val) => builder.set_min_word_len_two_typos(val),
|
||||
Setting::Reset => builder.reset_min_word_len_two_typos(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
Setting::Reset => {
|
||||
builder.reset_min_word_len_one_typo();
|
||||
builder.reset_min_word_len_two_typos();
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match value.disable_on_words {
|
||||
Setting::Set(ref words) => {
|
||||
builder.set_exact_words(words.clone());
|
||||
}
|
||||
Setting::Reset => builder.reset_exact_words(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match value.disable_on_attributes {
|
||||
Setting::Set(ref words) => {
|
||||
builder.set_exact_attributes(words.iter().cloned().collect())
|
||||
}
|
||||
Setting::Reset => builder.reset_exact_attributes(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
Setting::Reset => {
|
||||
// all typo settings need to be reset here.
|
||||
builder.reset_authorize_typos();
|
||||
builder.reset_min_word_len_one_typo();
|
||||
builder.reset_min_word_len_two_typos();
|
||||
builder.reset_exact_words();
|
||||
builder.reset_exact_attributes();
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.faceting {
|
||||
Setting::Set(ref value) => match value.max_values_per_facet {
|
||||
Setting::Set(val) => builder.set_max_values_per_facet(val),
|
||||
Setting::Reset => builder.reset_max_values_per_facet(),
|
||||
Setting::NotSet => (),
|
||||
},
|
||||
Setting::Reset => builder.reset_max_values_per_facet(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.pagination {
|
||||
Setting::Set(ref value) => match value.max_total_hits {
|
||||
Setting::Set(val) => builder.set_pagination_max_total_hits(val),
|
||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
||||
Setting::NotSet => (),
|
||||
},
|
||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub(super) fn setting_strategy<T: Arbitrary + Clone>() -> impl Strategy<Value = Setting<T>> {
|
||||
prop_oneof![
|
||||
Just(Setting::NotSet),
|
||||
Just(Setting::Reset),
|
||||
any::<T>().prop_map(Setting::Set)
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_setting_check() {
|
||||
// test no changes
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![String::from("hello")]),
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello")]),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::NotSet,
|
||||
pagination: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
let checked = settings.clone().check();
|
||||
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
|
||||
assert_eq!(
|
||||
settings.searchable_attributes,
|
||||
checked.searchable_attributes
|
||||
);
|
||||
|
||||
// test wildcard
|
||||
// test no changes
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![String::from("*")]),
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::NotSet,
|
||||
pagination: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
let checked = settings.check();
|
||||
assert_eq!(checked.displayed_attributes, Setting::Reset);
|
||||
assert_eq!(checked.searchable_attributes, Setting::Reset);
|
||||
}
|
||||
}
|
@@ -1,72 +0,0 @@
|
||||
use std::error::Error;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::index_uid::IndexUidFormatError;
|
||||
use meilisearch_types::internal_error;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
use super::DocumentAdditionFormat;
|
||||
use crate::document_formats::DocumentFormatError;
|
||||
use crate::dump::error::DumpError;
|
||||
use crate::index::error::IndexError;
|
||||
use crate::tasks::error::TaskError;
|
||||
use crate::update_file_store::UpdateFileStoreError;
|
||||
|
||||
use crate::index_resolver::error::IndexResolverError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexControllerError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum IndexControllerError {
|
||||
#[error("Index creation must have an uid")]
|
||||
MissingUid,
|
||||
#[error("{0}")]
|
||||
IndexResolver(#[from] IndexResolverError),
|
||||
#[error("{0}")]
|
||||
IndexError(#[from] IndexError),
|
||||
#[error("An internal error has occurred. `{0}`.")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
TaskError(#[from] TaskError),
|
||||
#[error("{0}")]
|
||||
DumpError(#[from] DumpError),
|
||||
#[error("{0}")]
|
||||
DocumentFormatError(#[from] DocumentFormatError),
|
||||
#[error("A {0} payload is missing.")]
|
||||
MissingPayload(DocumentAdditionFormat),
|
||||
#[error("The provided payload reached the size limit.")]
|
||||
PayloadTooLarge,
|
||||
}
|
||||
|
||||
internal_error!(IndexControllerError: JoinError, UpdateFileStoreError);
|
||||
|
||||
impl From<actix_web::error::PayloadError> for IndexControllerError {
|
||||
fn from(other: actix_web::error::PayloadError) -> Self {
|
||||
match other {
|
||||
actix_web::error::PayloadError::Overflow => Self::PayloadTooLarge,
|
||||
_ => Self::Internal(Box::new(other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for IndexControllerError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexControllerError::MissingUid => Code::BadRequest,
|
||||
IndexControllerError::IndexResolver(e) => e.error_code(),
|
||||
IndexControllerError::IndexError(e) => e.error_code(),
|
||||
IndexControllerError::Internal(_) => Code::Internal,
|
||||
IndexControllerError::TaskError(e) => e.error_code(),
|
||||
IndexControllerError::DocumentFormatError(e) => e.error_code(),
|
||||
IndexControllerError::MissingPayload(_) => Code::MissingPayload,
|
||||
IndexControllerError::PayloadTooLarge => Code::PayloadTooLarge,
|
||||
IndexControllerError::DumpError(e) => e.error_code(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<IndexUidFormatError> for IndexControllerError {
|
||||
fn from(err: IndexUidFormatError) -> Self {
|
||||
IndexResolverError::from(err).into()
|
||||
}
|
||||
}
|
@@ -1,779 +0,0 @@
|
||||
use meilisearch_auth::SearchRules;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::io::Cursor;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use actix_web::error::PayloadError;
|
||||
use bytes::Bytes;
|
||||
use futures::Stream;
|
||||
use futures::StreamExt;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::task::spawn_blocking;
|
||||
use tokio::time::sleep;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::document_formats::{read_csv, read_json, read_ndjson};
|
||||
use crate::dump::{self, load_dump, DumpHandler};
|
||||
use crate::index::{
|
||||
Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
|
||||
};
|
||||
use crate::index_resolver::error::IndexResolverError;
|
||||
use crate::options::{IndexerOpts, SchedulerConfig};
|
||||
use crate::snapshot::{load_snapshot, SnapshotService};
|
||||
use crate::tasks::error::TaskError;
|
||||
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId};
|
||||
use crate::tasks::{
|
||||
BatchHandler, EmptyBatchHandler, Scheduler, SnapshotHandler, TaskFilter, TaskStore,
|
||||
};
|
||||
use error::Result;
|
||||
|
||||
use self::error::IndexControllerError;
|
||||
use crate::index_resolver::index_store::{IndexStore, MapIndexStore};
|
||||
use crate::index_resolver::meta_store::{HeedMetaStore, IndexMetaStore};
|
||||
use crate::index_resolver::{create_index_resolver, IndexResolver};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
pub mod error;
|
||||
pub mod versioning;
|
||||
|
||||
/// Concrete implementation of the IndexController, exposed by meilisearch-lib
|
||||
pub type MeiliSearch = IndexController<HeedMetaStore, MapIndexStore>;
|
||||
|
||||
pub type Payload = Box<
|
||||
dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin,
|
||||
>;
|
||||
|
||||
pub fn open_meta_env(path: &Path, size: usize) -> milli::heed::Result<milli::heed::Env> {
|
||||
let mut options = milli::heed::EnvOpenOptions::new();
|
||||
options.map_size(size);
|
||||
options.max_dbs(20);
|
||||
options.open(path)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexMetadata {
|
||||
#[serde(skip)]
|
||||
pub uuid: Uuid,
|
||||
pub uid: String,
|
||||
#[serde(flatten)]
|
||||
pub meta: IndexMeta,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IndexSettings {
|
||||
pub uid: Option<String>,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
pub struct IndexController<U, I> {
|
||||
pub index_resolver: Arc<IndexResolver<U, I>>,
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
task_store: TaskStore,
|
||||
pub update_file_store: UpdateFileStore,
|
||||
}
|
||||
|
||||
/// Need a custom implementation for clone because deriving require that U and I are clone.
|
||||
impl<U, I> Clone for IndexController<U, I> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
index_resolver: self.index_resolver.clone(),
|
||||
scheduler: self.scheduler.clone(),
|
||||
update_file_store: self.update_file_store.clone(),
|
||||
task_store: self.task_store.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DocumentAdditionFormat {
|
||||
Json,
|
||||
Csv,
|
||||
Ndjson,
|
||||
}
|
||||
|
||||
impl fmt::Display for DocumentAdditionFormat {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
DocumentAdditionFormat::Json => write!(f, "json"),
|
||||
DocumentAdditionFormat::Ndjson => write!(f, "ndjson"),
|
||||
DocumentAdditionFormat::Csv => write!(f, "csv"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Stats {
|
||||
pub database_size: u64,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
|
||||
pub last_update: Option<OffsetDateTime>,
|
||||
pub indexes: BTreeMap<String, IndexStats>,
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(derivative::Derivative)]
|
||||
#[derivative(Debug)]
|
||||
pub enum Update {
|
||||
DeleteDocuments(Vec<String>),
|
||||
ClearDocuments,
|
||||
Settings {
|
||||
settings: Settings<Unchecked>,
|
||||
/// Indicates whether the update was a deletion
|
||||
is_deletion: bool,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
DocumentAddition {
|
||||
#[derivative(Debug = "ignore")]
|
||||
payload: Payload,
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
format: DocumentAdditionFormat,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
DeleteIndex,
|
||||
CreateIndex {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
UpdateIndex {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub struct IndexControllerBuilder {
|
||||
max_index_size: Option<usize>,
|
||||
max_task_store_size: Option<usize>,
|
||||
snapshot_dir: Option<PathBuf>,
|
||||
import_snapshot: Option<PathBuf>,
|
||||
snapshot_interval: Option<Duration>,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
ignore_missing_snapshot: bool,
|
||||
schedule_snapshot: bool,
|
||||
dump_src: Option<PathBuf>,
|
||||
dump_dst: Option<PathBuf>,
|
||||
ignore_dump_if_db_exists: bool,
|
||||
ignore_missing_dump: bool,
|
||||
}
|
||||
|
||||
impl IndexControllerBuilder {
|
||||
pub fn build(
|
||||
self,
|
||||
db_path: impl AsRef<Path>,
|
||||
indexer_options: IndexerOpts,
|
||||
scheduler_config: SchedulerConfig,
|
||||
) -> anyhow::Result<MeiliSearch> {
|
||||
let index_size = self
|
||||
.max_index_size
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
|
||||
let task_store_size = self
|
||||
.max_task_store_size
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
|
||||
|
||||
if let Some(ref path) = self.import_snapshot {
|
||||
log::info!("Loading from snapshot {:?}", path);
|
||||
load_snapshot(
|
||||
db_path.as_ref(),
|
||||
path,
|
||||
self.ignore_snapshot_if_db_exists,
|
||||
self.ignore_missing_snapshot,
|
||||
)?;
|
||||
} else if let Some(ref src_path) = self.dump_src {
|
||||
load_dump(
|
||||
db_path.as_ref(),
|
||||
src_path,
|
||||
self.ignore_dump_if_db_exists,
|
||||
self.ignore_missing_dump,
|
||||
index_size,
|
||||
task_store_size,
|
||||
&indexer_options,
|
||||
)?;
|
||||
} else if db_path.as_ref().exists() {
|
||||
// Directory could be pre-created without any database in.
|
||||
let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
|
||||
if !db_is_empty {
|
||||
versioning::check_version_file(db_path.as_ref())?;
|
||||
}
|
||||
}
|
||||
|
||||
std::fs::create_dir_all(db_path.as_ref())?;
|
||||
|
||||
let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?);
|
||||
|
||||
let update_file_store = UpdateFileStore::new(&db_path)?;
|
||||
// Create or overwrite the version file for this DB
|
||||
versioning::create_version_file(db_path.as_ref())?;
|
||||
|
||||
let index_resolver = Arc::new(create_index_resolver(
|
||||
&db_path,
|
||||
index_size,
|
||||
&indexer_options,
|
||||
meta_env.clone(),
|
||||
update_file_store.clone(),
|
||||
)?);
|
||||
|
||||
let dump_path = self
|
||||
.dump_dst
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?;
|
||||
|
||||
let dump_handler = Arc::new(DumpHandler::new(
|
||||
dump_path,
|
||||
db_path.as_ref().into(),
|
||||
update_file_store.clone(),
|
||||
task_store_size,
|
||||
index_size,
|
||||
meta_env.clone(),
|
||||
index_resolver.clone(),
|
||||
));
|
||||
let task_store = TaskStore::new(meta_env)?;
|
||||
|
||||
// register all the batch handlers for use with the scheduler.
|
||||
let handlers: Vec<Arc<dyn BatchHandler + Sync + Send + 'static>> = vec![
|
||||
index_resolver.clone(),
|
||||
dump_handler,
|
||||
Arc::new(SnapshotHandler),
|
||||
// dummy handler to catch all empty batches
|
||||
Arc::new(EmptyBatchHandler),
|
||||
];
|
||||
let scheduler = Scheduler::new(task_store.clone(), handlers, scheduler_config)?;
|
||||
|
||||
if self.schedule_snapshot {
|
||||
let snapshot_period = self
|
||||
.snapshot_interval
|
||||
.ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?;
|
||||
let snapshot_path = self
|
||||
.snapshot_dir
|
||||
.ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?;
|
||||
|
||||
let snapshot_service = SnapshotService {
|
||||
db_path: db_path.as_ref().to_path_buf(),
|
||||
snapshot_period,
|
||||
snapshot_path,
|
||||
index_size,
|
||||
meta_env_size: task_store_size,
|
||||
scheduler: scheduler.clone(),
|
||||
};
|
||||
|
||||
tokio::task::spawn_local(snapshot_service.run());
|
||||
}
|
||||
|
||||
Ok(IndexController {
|
||||
index_resolver,
|
||||
scheduler,
|
||||
update_file_store,
|
||||
task_store,
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the index controller builder's max update store size.
|
||||
pub fn set_max_task_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
|
||||
let max_update_store_size = clamp_to_page_size(max_update_store_size);
|
||||
self.max_task_store_size.replace(max_update_store_size);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_max_index_size(&mut self, size: usize) -> &mut Self {
|
||||
let size = clamp_to_page_size(size);
|
||||
self.max_index_size.replace(size);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's snapshot path.
|
||||
pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self {
|
||||
self.snapshot_dir.replace(snapshot_dir);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's ignore snapshot if db exists.
|
||||
pub fn set_ignore_snapshot_if_db_exists(
|
||||
&mut self,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
) -> &mut Self {
|
||||
self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's ignore missing snapshot.
|
||||
pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self {
|
||||
self.ignore_missing_snapshot = ignore_missing_snapshot;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's import snapshot.
|
||||
pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
|
||||
self.import_snapshot.replace(import_snapshot);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's snapshot interval sec.
|
||||
pub fn set_snapshot_interval(&mut self, snapshot_interval: Duration) -> &mut Self {
|
||||
self.snapshot_interval = Some(snapshot_interval);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's schedule snapshot.
|
||||
pub fn set_schedule_snapshot(&mut self) -> &mut Self {
|
||||
self.schedule_snapshot = true;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's dump src.
|
||||
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
|
||||
self.dump_src.replace(dump_src);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's dump dst.
|
||||
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
|
||||
self.dump_dst.replace(dump_dst);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's ignore dump if db exists.
|
||||
pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self {
|
||||
self.ignore_dump_if_db_exists = ignore_dump_if_db_exists;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's ignore missing dump.
|
||||
pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self {
|
||||
self.ignore_missing_dump = ignore_missing_dump;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<U, I> IndexController<U, I>
|
||||
where
|
||||
U: IndexMetaStore,
|
||||
I: IndexStore,
|
||||
{
|
||||
pub fn builder() -> IndexControllerBuilder {
|
||||
IndexControllerBuilder::default()
|
||||
}
|
||||
|
||||
pub async fn register_update(&self, uid: String, update: Update) -> Result<Task> {
|
||||
let index_uid = IndexUid::from_str(&uid).map_err(IndexResolverError::from)?;
|
||||
let content = match update {
|
||||
Update::DeleteDocuments(ids) => TaskContent::DocumentDeletion {
|
||||
index_uid,
|
||||
deletion: DocumentDeletion::Ids(ids),
|
||||
},
|
||||
Update::ClearDocuments => TaskContent::DocumentDeletion {
|
||||
index_uid,
|
||||
deletion: DocumentDeletion::Clear,
|
||||
},
|
||||
Update::Settings {
|
||||
settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
} => TaskContent::SettingsUpdate {
|
||||
settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
index_uid,
|
||||
},
|
||||
Update::DocumentAddition {
|
||||
mut payload,
|
||||
primary_key,
|
||||
format,
|
||||
method,
|
||||
allow_index_creation,
|
||||
} => {
|
||||
let mut buffer = Vec::new();
|
||||
while let Some(bytes) = payload.next().await {
|
||||
let bytes = bytes?;
|
||||
buffer.extend_from_slice(&bytes);
|
||||
}
|
||||
let (content_uuid, mut update_file) = self.update_file_store.new_update()?;
|
||||
let documents_count = tokio::task::spawn_blocking(move || -> Result<_> {
|
||||
// check if the payload is empty, and return an error
|
||||
if buffer.is_empty() {
|
||||
return Err(IndexControllerError::MissingPayload(format));
|
||||
}
|
||||
|
||||
let reader = Cursor::new(buffer);
|
||||
let count = match format {
|
||||
DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?,
|
||||
};
|
||||
|
||||
update_file.persist()?;
|
||||
|
||||
Ok(count)
|
||||
})
|
||||
.await??;
|
||||
|
||||
TaskContent::DocumentAddition {
|
||||
content_uuid,
|
||||
merge_strategy: method,
|
||||
primary_key,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
index_uid,
|
||||
}
|
||||
}
|
||||
Update::DeleteIndex => TaskContent::IndexDeletion { index_uid },
|
||||
Update::CreateIndex { primary_key } => TaskContent::IndexCreation {
|
||||
primary_key,
|
||||
index_uid,
|
||||
},
|
||||
Update::UpdateIndex { primary_key } => TaskContent::IndexUpdate {
|
||||
primary_key,
|
||||
index_uid,
|
||||
},
|
||||
};
|
||||
|
||||
let task = self.task_store.register(content).await?;
|
||||
self.scheduler.read().await.notify();
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub async fn register_dump_task(&self) -> Result<Task> {
|
||||
let uid = dump::generate_uid();
|
||||
let content = TaskContent::Dump { uid };
|
||||
let task = self.task_store.register(content).await?;
|
||||
self.scheduler.read().await.notify();
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
|
||||
let task = self.scheduler.read().await.get_task(id, filter).await?;
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub async fn get_index_task(&self, index_uid: String, task_id: TaskId) -> Result<Task> {
|
||||
let creation_task_id = self
|
||||
.index_resolver
|
||||
.get_index_creation_task_id(index_uid.clone())
|
||||
.await?;
|
||||
if task_id < creation_task_id {
|
||||
return Err(TaskError::UnexistingTask(task_id).into());
|
||||
}
|
||||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index(index_uid);
|
||||
let task = self
|
||||
.scheduler
|
||||
.read()
|
||||
.await
|
||||
.get_task(task_id, Some(filter))
|
||||
.await?;
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub async fn list_tasks(
|
||||
&self,
|
||||
filter: Option<TaskFilter>,
|
||||
limit: Option<usize>,
|
||||
offset: Option<TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
let tasks = self
|
||||
.scheduler
|
||||
.read()
|
||||
.await
|
||||
.list_tasks(offset, filter, limit)
|
||||
.await?;
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
pub async fn list_index_task(
|
||||
&self,
|
||||
index_uid: String,
|
||||
limit: Option<usize>,
|
||||
offset: Option<TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
let task_id = self
|
||||
.index_resolver
|
||||
.get_index_creation_task_id(index_uid.clone())
|
||||
.await?;
|
||||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index(index_uid);
|
||||
|
||||
let tasks = self
|
||||
.scheduler
|
||||
.read()
|
||||
.await
|
||||
.list_tasks(
|
||||
Some(offset.unwrap_or_default() + task_id),
|
||||
Some(filter),
|
||||
limit,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
|
||||
let indexes = self.index_resolver.list().await?;
|
||||
let mut ret = Vec::new();
|
||||
for (uid, index) in indexes {
|
||||
let meta = index.meta()?;
|
||||
let meta = IndexMetadata {
|
||||
uuid: index.uuid(),
|
||||
uid,
|
||||
meta,
|
||||
};
|
||||
ret.push(meta);
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
|
||||
let index = self.index_resolver.get_index(uid).await?;
|
||||
let settings = spawn_blocking(move || index.settings()).await??;
|
||||
Ok(settings)
|
||||
}
|
||||
|
||||
/// Return the total number of documents contained in the index + the selected documents.
|
||||
pub async fn documents(
|
||||
&self,
|
||||
uid: String,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<(u64, Vec<Document>)> {
|
||||
let index = self.index_resolver.get_index(uid).await?;
|
||||
let result =
|
||||
spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve))
|
||||
.await??;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn document(
|
||||
&self,
|
||||
uid: String,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document> {
|
||||
let index = self.index_resolver.get_index(uid).await?;
|
||||
let document =
|
||||
spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve))
|
||||
.await??;
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
|
||||
let index = self.index_resolver.get_index(uid).await?;
|
||||
let result = spawn_blocking(move || index.perform_search(query)).await??;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn get_index(&self, uid: String) -> Result<IndexMetadata> {
|
||||
let index = self.index_resolver.get_index(uid.clone()).await?;
|
||||
let uuid = index.uuid();
|
||||
let meta = spawn_blocking(move || index.meta()).await??;
|
||||
let meta = IndexMetadata { uuid, uid, meta };
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
|
||||
let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?;
|
||||
// Check if the currently indexing update is from our index.
|
||||
let is_indexing = processing_tasks
|
||||
.first()
|
||||
.map_or(false, |task| task.index_uid().map_or(false, |u| u == uid));
|
||||
|
||||
let index = self.index_resolver.get_index(uid).await?;
|
||||
let mut stats = spawn_blocking(move || index.stats()).await??;
|
||||
stats.is_indexing = Some(is_indexing);
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
pub async fn get_all_stats(&self, search_rules: &SearchRules) -> Result<Stats> {
|
||||
let mut last_task: Option<OffsetDateTime> = None;
|
||||
let mut indexes = BTreeMap::new();
|
||||
let mut database_size = 0;
|
||||
let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?;
|
||||
|
||||
for (index_uid, index) in self.index_resolver.list().await? {
|
||||
if !search_rules.is_index_authorized(&index_uid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (mut stats, meta) =
|
||||
spawn_blocking::<_, Result<(IndexStats, IndexMeta)>>(move || {
|
||||
Ok((index.stats()?, index.meta()?))
|
||||
})
|
||||
.await??;
|
||||
|
||||
database_size += stats.size;
|
||||
|
||||
last_task = last_task.map_or(Some(meta.updated_at), |last| {
|
||||
Some(last.max(meta.updated_at))
|
||||
});
|
||||
|
||||
// Check if the currently indexing update is from our index.
|
||||
stats.is_indexing = processing_tasks
|
||||
.first()
|
||||
.and_then(|p| p.index_uid().map(|u| u == index_uid))
|
||||
.or(Some(false));
|
||||
|
||||
indexes.insert(index_uid, stats);
|
||||
}
|
||||
|
||||
Ok(Stats {
|
||||
database_size,
|
||||
last_update: last_task,
|
||||
indexes,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
|
||||
loop {
|
||||
match Arc::try_unwrap(item) {
|
||||
Ok(item) => return item,
|
||||
Err(item_arc) => {
|
||||
item = item_arc;
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clamp the provided value to be a multiple of system page size.
|
||||
fn clamp_to_page_size(size: usize) -> usize {
|
||||
size / page_size::get() * page_size::get()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use futures::future::ok;
|
||||
use mockall::predicate::eq;
|
||||
use nelson::Mocker;
|
||||
|
||||
use crate::index::error::Result as IndexResult;
|
||||
use crate::index::Index;
|
||||
use crate::index::{
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
};
|
||||
use crate::index_resolver::index_store::MockIndexStore;
|
||||
use crate::index_resolver::meta_store::MockIndexMetaStore;
|
||||
use crate::index_resolver::IndexResolver;
|
||||
|
||||
use super::*;
|
||||
|
||||
impl IndexController<MockIndexMetaStore, MockIndexStore> {
|
||||
pub fn mock(
|
||||
index_resolver: Arc<IndexResolver<MockIndexMetaStore, MockIndexStore>>,
|
||||
task_store: TaskStore,
|
||||
update_file_store: UpdateFileStore,
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
) -> Self {
|
||||
IndexController {
|
||||
index_resolver,
|
||||
task_store,
|
||||
update_file_store,
|
||||
scheduler,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_search_simple() {
|
||||
let index_uid = "test";
|
||||
let index_uuid = Uuid::new_v4();
|
||||
let query = SearchQuery {
|
||||
q: Some(String::from("hello world")),
|
||||
offset: Some(10),
|
||||
limit: 0,
|
||||
attributes_to_retrieve: Some(vec!["string".to_owned()].into_iter().collect()),
|
||||
attributes_to_crop: None,
|
||||
crop_length: 18,
|
||||
attributes_to_highlight: None,
|
||||
show_matches_position: true,
|
||||
filter: None,
|
||||
sort: None,
|
||||
facets: None,
|
||||
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
|
||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||
crop_marker: DEFAULT_CROP_MARKER(),
|
||||
matching_strategy: Default::default(),
|
||||
};
|
||||
|
||||
let result = SearchResult {
|
||||
hits: vec![],
|
||||
estimated_total_hits: 29,
|
||||
query: "hello world".to_string(),
|
||||
limit: 24,
|
||||
offset: 0,
|
||||
processing_time_ms: 50,
|
||||
facet_distribution: None,
|
||||
};
|
||||
|
||||
let mut uuid_store = MockIndexMetaStore::new();
|
||||
uuid_store
|
||||
.expect_get()
|
||||
.with(eq(index_uid.to_owned()))
|
||||
.returning(move |s| {
|
||||
Box::pin(ok((
|
||||
s,
|
||||
Some(crate::index_resolver::meta_store::IndexMeta {
|
||||
uuid: index_uuid,
|
||||
creation_task_id: 0,
|
||||
}),
|
||||
)))
|
||||
});
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
let result_clone = result.clone();
|
||||
let query_clone = query.clone();
|
||||
index_store
|
||||
.expect_get()
|
||||
.with(eq(index_uuid))
|
||||
.returning(move |_uuid| {
|
||||
let result = result_clone.clone();
|
||||
let query = query_clone.clone();
|
||||
let mocker = Mocker::default();
|
||||
mocker
|
||||
.when::<SearchQuery, IndexResult<SearchResult>>("perform_search")
|
||||
.once()
|
||||
.then(move |q| {
|
||||
assert_eq!(&q, &query);
|
||||
Ok(result.clone())
|
||||
});
|
||||
let index = Index::mock(mocker);
|
||||
Box::pin(ok(Some(index)))
|
||||
});
|
||||
|
||||
let task_store_mocker = nelson::Mocker::default();
|
||||
let mocker = Mocker::default();
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = Arc::new(IndexResolver::new(
|
||||
uuid_store,
|
||||
index_store,
|
||||
update_file_store.clone(),
|
||||
));
|
||||
let task_store = TaskStore::mock(task_store_mocker);
|
||||
let scheduler = Scheduler::new(
|
||||
task_store.clone(),
|
||||
vec![index_resolver.clone()],
|
||||
SchedulerConfig::default(),
|
||||
)
|
||||
.unwrap();
|
||||
let index_controller =
|
||||
IndexController::mock(index_resolver, task_store, update_file_store, scheduler);
|
||||
|
||||
let r = index_controller
|
||||
.search(index_uid.to_owned(), query.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(r, result);
|
||||
}
|
||||
}
|
@@ -1,79 +0,0 @@
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use meilisearch_types::{internal_error, Code, ErrorCode};
|
||||
|
||||
use crate::{
|
||||
document_formats::DocumentFormatError,
|
||||
index::error::IndexError,
|
||||
index_controller::{update_file_store::UpdateFileStoreError, DocumentAdditionFormat},
|
||||
};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, UpdateLoopError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum UpdateLoopError {
|
||||
#[error("Task `{0}` not found.")]
|
||||
UnexistingUpdate(u64),
|
||||
#[error("An internal error has occurred. `{0}`.")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error(
|
||||
"update store was shut down due to a fatal error, please check your logs for more info."
|
||||
)]
|
||||
FatalUpdateStoreError,
|
||||
#[error("{0}")]
|
||||
DocumentFormatError(#[from] DocumentFormatError),
|
||||
#[error("The provided payload reached the size limit.")]
|
||||
PayloadTooLarge,
|
||||
#[error("A {0} payload is missing.")]
|
||||
MissingPayload(DocumentAdditionFormat),
|
||||
#[error("{0}")]
|
||||
IndexError(#[from] IndexError),
|
||||
}
|
||||
|
||||
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for UpdateLoopError
|
||||
where
|
||||
T: Sync + Send + 'static + fmt::Debug,
|
||||
{
|
||||
fn from(other: tokio::sync::mpsc::error::SendError<T>) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<tokio::sync::oneshot::error::RecvError> for UpdateLoopError {
|
||||
fn from(other: tokio::sync::oneshot::error::RecvError) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<actix_web::error::PayloadError> for UpdateLoopError {
|
||||
fn from(other: actix_web::error::PayloadError) -> Self {
|
||||
match other {
|
||||
actix_web::error::PayloadError::Overflow => Self::PayloadTooLarge,
|
||||
_ => Self::Internal(Box::new(other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
UpdateLoopError: heed::Error,
|
||||
std::io::Error,
|
||||
serde_json::Error,
|
||||
tokio::task::JoinError,
|
||||
UpdateFileStoreError
|
||||
);
|
||||
|
||||
impl ErrorCode for UpdateLoopError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
Self::UnexistingUpdate(_) => Code::TaskNotFound,
|
||||
Self::Internal(_) => Code::Internal,
|
||||
Self::FatalUpdateStoreError => Code::Internal,
|
||||
Self::DocumentFormatError(error) => error.error_code(),
|
||||
Self::PayloadTooLarge => Code::PayloadTooLarge,
|
||||
Self::MissingPayload(_) => Code::MissingPayload,
|
||||
Self::IndexError(e) => e.error_code(),
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,19 +0,0 @@
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum VersionFileError {
|
||||
#[error(
|
||||
"Meilisearch (v{}) failed to infer the version of the database. Please consider using a dump to load your data.",
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
)]
|
||||
MissingVersionFile,
|
||||
#[error("Version file is corrupted and thus Meilisearch is unable to determine the version of the database.")]
|
||||
MalformedVersionFile,
|
||||
#[error(
|
||||
"Expected Meilisearch engine version: {major}.{minor}.{patch}, current engine version: {}. To update Meilisearch use a dump.",
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
)]
|
||||
VersionMismatch {
|
||||
major: String,
|
||||
minor: String,
|
||||
patch: String,
|
||||
},
|
||||
}
|
@@ -1,56 +0,0 @@
|
||||
use std::fs;
|
||||
use std::io::ErrorKind;
|
||||
use std::path::Path;
|
||||
|
||||
use self::error::VersionFileError;
|
||||
|
||||
mod error;
|
||||
|
||||
pub const VERSION_FILE_NAME: &str = "VERSION";
|
||||
|
||||
static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR");
|
||||
static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
|
||||
static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
|
||||
|
||||
// Persists the version of the current Meilisearch binary to a VERSION file
|
||||
pub fn create_version_file(db_path: &Path) -> anyhow::Result<()> {
|
||||
let version_path = db_path.join(VERSION_FILE_NAME);
|
||||
fs::write(
|
||||
version_path,
|
||||
format!("{}.{}.{}", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH),
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch.
|
||||
pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
|
||||
let version_path = db_path.join(VERSION_FILE_NAME);
|
||||
|
||||
match fs::read_to_string(&version_path) {
|
||||
Ok(version) => {
|
||||
let version_components = version.split('.').collect::<Vec<_>>();
|
||||
let (major, minor, patch) = match &version_components[..] {
|
||||
[major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()),
|
||||
_ => return Err(VersionFileError::MalformedVersionFile.into()),
|
||||
};
|
||||
|
||||
if major != VERSION_MAJOR || minor != VERSION_MINOR {
|
||||
return Err(VersionFileError::VersionMismatch {
|
||||
major,
|
||||
minor,
|
||||
patch,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
return match error.kind() {
|
||||
ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile.into()),
|
||||
_ => Err(error.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
@@ -1,71 +0,0 @@
|
||||
use std::fmt;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::index_uid::IndexUidFormatError;
|
||||
use meilisearch_types::internal_error;
|
||||
use tokio::sync::mpsc::error::SendError as MpscSendError;
|
||||
use tokio::sync::oneshot::error::RecvError as OneshotRecvError;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{error::MilliError, index::error::IndexError, update_file_store::UpdateFileStoreError};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexResolverError>;
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum IndexResolverError {
|
||||
#[error("{0}")]
|
||||
IndexError(#[from] IndexError),
|
||||
#[error("Index `{0}` already exists.")]
|
||||
IndexAlreadyExists(String),
|
||||
#[error("Index `{0}` not found.")]
|
||||
UnexistingIndex(String),
|
||||
#[error("A primary key is already present. It's impossible to update it")]
|
||||
ExistingPrimaryKey,
|
||||
#[error("An internal error has occurred. `{0}`.")]
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
#[error("The creation of the `{0}` index has failed due to `Index uuid is already assigned`.")]
|
||||
UuidAlreadyExists(Uuid),
|
||||
#[error("{0}")]
|
||||
Milli(#[from] milli::Error),
|
||||
#[error("{0}")]
|
||||
BadlyFormatted(#[from] IndexUidFormatError),
|
||||
}
|
||||
|
||||
impl<T> From<MpscSendError<T>> for IndexResolverError
|
||||
where
|
||||
T: Send + Sync + 'static + fmt::Debug,
|
||||
{
|
||||
fn from(other: tokio::sync::mpsc::error::SendError<T>) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<OneshotRecvError> for IndexResolverError {
|
||||
fn from(other: tokio::sync::oneshot::error::RecvError) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
IndexResolverError: milli::heed::Error,
|
||||
uuid::Error,
|
||||
std::io::Error,
|
||||
tokio::task::JoinError,
|
||||
serde_json::Error,
|
||||
UpdateFileStoreError
|
||||
);
|
||||
|
||||
impl ErrorCode for IndexResolverError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexResolverError::IndexError(e) => e.error_code(),
|
||||
IndexResolverError::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
|
||||
IndexResolverError::UnexistingIndex(_) => Code::IndexNotFound,
|
||||
IndexResolverError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
|
||||
IndexResolverError::Internal(_) => Code::Internal,
|
||||
IndexResolverError::UuidAlreadyExists(_) => Code::CreateIndex,
|
||||
IndexResolverError::Milli(e) => MilliError(e).error_code(),
|
||||
IndexResolverError::BadlyFormatted(_) => Code::InvalidIndexUid,
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,108 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use milli::update::IndexerConfig;
|
||||
use tokio::fs;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::task::spawn_blocking;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::{IndexResolverError, Result};
|
||||
use crate::index::Index;
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, mockall::automock)]
|
||||
pub trait IndexStore {
|
||||
async fn create(&self, uuid: Uuid) -> Result<Index>;
|
||||
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>;
|
||||
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>;
|
||||
}
|
||||
|
||||
pub struct MapIndexStore {
|
||||
index_store: AsyncMap<Uuid, Index>,
|
||||
path: PathBuf,
|
||||
index_size: usize,
|
||||
indexer_config: Arc<IndexerConfig>,
|
||||
}
|
||||
|
||||
impl MapIndexStore {
|
||||
pub fn new(
|
||||
path: impl AsRef<Path>,
|
||||
index_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<Self> {
|
||||
let indexer_config = Arc::new(IndexerConfig::try_from(indexer_opts)?);
|
||||
let path = path.as_ref().join("indexes/");
|
||||
let index_store = Arc::new(RwLock::new(HashMap::new()));
|
||||
Ok(Self {
|
||||
index_store,
|
||||
path,
|
||||
index_size,
|
||||
indexer_config,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl IndexStore for MapIndexStore {
|
||||
async fn create(&self, uuid: Uuid) -> Result<Index> {
|
||||
// We need to keep the lock until we are sure the db file has been opened correclty, to
|
||||
// ensure that another db is not created at the same time.
|
||||
let mut lock = self.index_store.write().await;
|
||||
|
||||
if let Some(index) = lock.get(&uuid) {
|
||||
return Ok(index.clone());
|
||||
}
|
||||
let path = self.path.join(format!("{}", uuid));
|
||||
if path.exists() {
|
||||
return Err(IndexResolverError::UuidAlreadyExists(uuid));
|
||||
}
|
||||
|
||||
let index_size = self.index_size;
|
||||
let update_handler = self.indexer_config.clone();
|
||||
let index = spawn_blocking(move || -> Result<Index> {
|
||||
let index = Index::open(path, index_size, uuid, update_handler)?;
|
||||
Ok(index)
|
||||
})
|
||||
.await??;
|
||||
|
||||
lock.insert(uuid, index.clone());
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
async fn get(&self, uuid: Uuid) -> Result<Option<Index>> {
|
||||
let guard = self.index_store.read().await;
|
||||
match guard.get(&uuid) {
|
||||
Some(index) => Ok(Some(index.clone())),
|
||||
None => {
|
||||
// drop the guard here so we can perform the write after without deadlocking;
|
||||
drop(guard);
|
||||
let path = self.path.join(format!("{}", uuid));
|
||||
if !path.exists() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let index_size = self.index_size;
|
||||
let update_handler = self.indexer_config.clone();
|
||||
let index =
|
||||
spawn_blocking(move || Index::open(path, index_size, uuid, update_handler))
|
||||
.await??;
|
||||
self.index_store.write().await.insert(uuid, index.clone());
|
||||
Ok(Some(index))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> {
|
||||
let db_path = self.path.join(format!("{}", uuid));
|
||||
fs::remove_dir_all(db_path).await?;
|
||||
let index = self.index_store.write().await.remove(&uuid);
|
||||
Ok(index)
|
||||
}
|
||||
}
|
@@ -1,223 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use milli::heed::types::{SerdeBincode, Str};
|
||||
use milli::heed::{CompactionOption, Database, Env};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::{IndexResolverError, Result};
|
||||
use crate::tasks::task::TaskId;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct DumpEntry {
|
||||
pub uid: String,
|
||||
pub index_meta: IndexMeta,
|
||||
}
|
||||
|
||||
const UUIDS_DB_PATH: &str = "index_uuids";
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, mockall::automock)]
|
||||
pub trait IndexMetaStore: Sized {
|
||||
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
|
||||
// the uuid otherwise.
|
||||
async fn get(&self, uid: String) -> Result<(String, Option<IndexMeta>)>;
|
||||
async fn delete(&self, uid: String) -> Result<Option<IndexMeta>>;
|
||||
async fn list(&self) -> Result<Vec<(String, IndexMeta)>>;
|
||||
async fn insert(&self, name: String, meta: IndexMeta) -> Result<()>;
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
async fn get_size(&self) -> Result<u64>;
|
||||
async fn dump(&self, path: PathBuf) -> Result<()>;
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct IndexMeta {
|
||||
pub uuid: Uuid,
|
||||
pub creation_task_id: TaskId,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct HeedMetaStore {
|
||||
env: Arc<Env>,
|
||||
db: Database<Str, SerdeBincode<IndexMeta>>,
|
||||
}
|
||||
|
||||
impl Drop for HeedMetaStore {
|
||||
fn drop(&mut self) {
|
||||
if Arc::strong_count(&self.env) == 1 {
|
||||
self.env.as_ref().clone().prepare_for_closing();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HeedMetaStore {
|
||||
pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> {
|
||||
let db = env.create_database(Some("uuids"))?;
|
||||
Ok(Self { env, db })
|
||||
}
|
||||
|
||||
fn get(&self, name: &str) -> Result<Option<IndexMeta>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let txn = env.read_txn()?;
|
||||
match db.get(&txn, name)? {
|
||||
Some(meta) => Ok(Some(meta)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn delete(&self, uid: String) -> Result<Option<IndexMeta>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let mut txn = env.write_txn()?;
|
||||
match db.get(&txn, &uid)? {
|
||||
Some(meta) => {
|
||||
db.delete(&mut txn, &uid)?;
|
||||
txn.commit()?;
|
||||
Ok(Some(meta))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn list(&self) -> Result<Vec<(String, IndexMeta)>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let txn = env.read_txn()?;
|
||||
let mut entries = Vec::new();
|
||||
for entry in db.iter(&txn)? {
|
||||
let (name, meta) = entry?;
|
||||
entries.push((name.to_string(), meta))
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
pub(crate) fn insert(&self, name: String, meta: IndexMeta) -> Result<()> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let mut txn = env.write_txn()?;
|
||||
|
||||
if db.get(&txn, &name)?.is_some() {
|
||||
return Err(IndexResolverError::IndexAlreadyExists(name));
|
||||
}
|
||||
|
||||
db.put(&mut txn, &name, &meta)?;
|
||||
txn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
// Write transaction to acquire a lock on the database.
|
||||
let txn = self.env.write_txn()?;
|
||||
let mut entries = HashSet::new();
|
||||
for entry in self.db.iter(&txn)? {
|
||||
let (_, IndexMeta { uuid, .. }) = entry?;
|
||||
entries.insert(uuid);
|
||||
}
|
||||
|
||||
// only perform snapshot if there are indexes
|
||||
if !entries.is_empty() {
|
||||
path.push(UUIDS_DB_PATH);
|
||||
create_dir_all(&path).unwrap();
|
||||
path.push("data.mdb");
|
||||
self.env.copy_to_path(path, CompactionOption::Enabled)?;
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
fn get_size(&self) -> Result<u64> {
|
||||
Ok(WalkDir::new(self.env.path())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len()))
|
||||
}
|
||||
|
||||
pub fn dump(&self, path: PathBuf) -> Result<()> {
|
||||
let dump_path = path.join(UUIDS_DB_PATH);
|
||||
create_dir_all(&dump_path)?;
|
||||
let dump_file_path = dump_path.join("data.jsonl");
|
||||
let mut dump_file = File::create(&dump_file_path)?;
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
for entry in self.db.iter(&txn)? {
|
||||
let (uid, index_meta) = entry?;
|
||||
let uid = uid.to_string();
|
||||
|
||||
let entry = DumpEntry { uid, index_meta };
|
||||
serde_json::to_writer(&mut dump_file, &entry)?;
|
||||
dump_file.write_all(b"\n").unwrap();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, env: Arc<milli::heed::Env>) -> Result<()> {
|
||||
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
|
||||
let indexes = File::open(&src_indexes)?;
|
||||
let mut indexes = BufReader::new(indexes);
|
||||
let mut line = String::new();
|
||||
|
||||
let db = Self::new(env)?;
|
||||
let mut txn = db.env.write_txn()?;
|
||||
|
||||
loop {
|
||||
match indexes.read_line(&mut line) {
|
||||
Ok(0) => break,
|
||||
Ok(_) => {
|
||||
let DumpEntry { uid, index_meta } = serde_json::from_str(&line)?;
|
||||
db.db.put(&mut txn, &uid, &index_meta)?;
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
|
||||
line.clear();
|
||||
}
|
||||
txn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl IndexMetaStore for HeedMetaStore {
|
||||
async fn get(&self, name: String) -> Result<(String, Option<IndexMeta>)> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.get(&name).map(|res| (name, res))).await?
|
||||
}
|
||||
|
||||
async fn delete(&self, uid: String) -> Result<Option<IndexMeta>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.delete(uid)).await?
|
||||
}
|
||||
|
||||
async fn list(&self) -> Result<Vec<(String, IndexMeta)>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.list()).await?
|
||||
}
|
||||
|
||||
async fn insert(&self, name: String, meta: IndexMeta) -> Result<()> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.insert(name, meta)).await?
|
||||
}
|
||||
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
|
||||
}
|
||||
|
||||
async fn get_size(&self) -> Result<u64> {
|
||||
self.get_size()
|
||||
}
|
||||
|
||||
async fn dump(&self, path: PathBuf) -> Result<()> {
|
||||
let this = self.clone();
|
||||
Ok(tokio::task::spawn_blocking(move || this.dump(path)).await??)
|
||||
}
|
||||
}
|
@@ -1,685 +0,0 @@
|
||||
pub mod error;
|
||||
pub mod index_store;
|
||||
pub mod meta_store;
|
||||
|
||||
use std::convert::TryFrom;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use error::{IndexResolverError, Result};
|
||||
use index_store::{IndexStore, MapIndexStore};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meta_store::{HeedMetaStore, IndexMetaStore};
|
||||
use milli::heed::Env;
|
||||
use milli::update::{DocumentDeletionResult, IndexerConfig};
|
||||
use time::OffsetDateTime;
|
||||
use tokio::task::spawn_blocking;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::{error::Result as IndexResult, Index};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
use self::meta_store::IndexMeta;
|
||||
|
||||
pub type HardStateIndexResolver = IndexResolver<HeedMetaStore, MapIndexStore>;
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub use real::IndexResolver;
|
||||
|
||||
#[cfg(test)]
|
||||
pub use test::MockIndexResolver as IndexResolver;
|
||||
|
||||
pub fn create_index_resolver(
|
||||
path: impl AsRef<Path>,
|
||||
index_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
meta_env: Arc<milli::heed::Env>,
|
||||
file_store: UpdateFileStore,
|
||||
) -> anyhow::Result<HardStateIndexResolver> {
|
||||
let uuid_store = HeedMetaStore::new(meta_env)?;
|
||||
let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?;
|
||||
Ok(IndexResolver::new(uuid_store, index_store, file_store))
|
||||
}
|
||||
|
||||
mod real {
|
||||
use super::*;
|
||||
|
||||
pub struct IndexResolver<U, I> {
|
||||
pub(super) index_uuid_store: U,
|
||||
pub(super) index_store: I,
|
||||
pub(super) file_store: UpdateFileStore,
|
||||
}
|
||||
|
||||
impl IndexResolver<HeedMetaStore, MapIndexStore> {
|
||||
pub fn load_dump(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
env: Arc<Env>,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
HeedMetaStore::load_dump(&src, env)?;
|
||||
let indexes_path = src.as_ref().join("indexes");
|
||||
let indexes = indexes_path.read_dir()?;
|
||||
let indexer_config = IndexerConfig::try_from(indexer_opts)?;
|
||||
for index in indexes {
|
||||
Index::load_dump(&index?.path(), &dst, index_db_size, &indexer_config)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<U, I> IndexResolver<U, I>
|
||||
where
|
||||
U: IndexMetaStore,
|
||||
I: IndexStore,
|
||||
{
|
||||
pub fn new(index_uuid_store: U, index_store: I, file_store: UpdateFileStore) -> Self {
|
||||
Self {
|
||||
index_uuid_store,
|
||||
index_store,
|
||||
file_store,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn process_document_addition_batch(&self, tasks: &mut [Task]) {
|
||||
fn get_content_uuid(task: &Task) -> Uuid {
|
||||
match task {
|
||||
Task {
|
||||
content: TaskContent::DocumentAddition { content_uuid, .. },
|
||||
..
|
||||
} => *content_uuid,
|
||||
_ => panic!("unexpected task in the document addition batch"),
|
||||
}
|
||||
}
|
||||
|
||||
let content_uuids = tasks.iter().map(get_content_uuid).collect::<Vec<_>>();
|
||||
|
||||
match tasks.first() {
|
||||
Some(Task {
|
||||
id,
|
||||
content:
|
||||
TaskContent::DocumentAddition {
|
||||
merge_strategy,
|
||||
primary_key,
|
||||
allow_index_creation,
|
||||
index_uid,
|
||||
..
|
||||
},
|
||||
..
|
||||
}) => {
|
||||
let primary_key = primary_key.clone();
|
||||
let method = *merge_strategy;
|
||||
|
||||
let index = if *allow_index_creation {
|
||||
self.get_or_create_index(index_uid.clone(), *id).await
|
||||
} else {
|
||||
self.get_index(index_uid.as_str().to_string()).await
|
||||
};
|
||||
|
||||
// If the index doesn't exist and we are not allowed to create it with the first
|
||||
// task, we must fails the whole batch.
|
||||
let now = OffsetDateTime::now_utc();
|
||||
let index = match index {
|
||||
Ok(index) => index,
|
||||
Err(e) => {
|
||||
let error = ResponseError::from(e);
|
||||
for task in tasks.iter_mut() {
|
||||
task.events.push(TaskEvent::Failed {
|
||||
error: error.clone(),
|
||||
timestamp: now,
|
||||
});
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let file_store = self.file_store.clone();
|
||||
let result = spawn_blocking(move || {
|
||||
index.update_documents(
|
||||
method,
|
||||
primary_key,
|
||||
file_store,
|
||||
content_uuids.into_iter(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(Ok(results)) => {
|
||||
for (task, result) in tasks.iter_mut().zip(results) {
|
||||
let event = match result {
|
||||
Ok(addition) => {
|
||||
TaskEvent::succeeded(TaskResult::DocumentAddition {
|
||||
indexed_documents: addition.indexed_documents,
|
||||
})
|
||||
}
|
||||
Err(error) => {
|
||||
TaskEvent::failed(IndexResolverError::from(error))
|
||||
}
|
||||
};
|
||||
task.events.push(event);
|
||||
}
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
let event = TaskEvent::failed(e);
|
||||
for task in tasks.iter_mut() {
|
||||
task.events.push(event.clone());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
let event = TaskEvent::failed(IndexResolverError::from(e));
|
||||
for task in tasks.iter_mut() {
|
||||
task.events.push(event.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => panic!("invalid batch!"),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_content_file(&self, content_uuid: Uuid) -> Result<()> {
|
||||
self.file_store.delete(content_uuid).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_task_inner(&self, task: &Task) -> Result<TaskResult> {
|
||||
match &task.content {
|
||||
TaskContent::DocumentAddition { .. } => {
|
||||
panic!("updates should be handled by batch")
|
||||
}
|
||||
TaskContent::DocumentDeletion {
|
||||
deletion: DocumentDeletion::Ids(ids),
|
||||
index_uid,
|
||||
} => {
|
||||
let ids = ids.clone();
|
||||
let index = self.get_index(index_uid.clone().into_inner()).await?;
|
||||
|
||||
let DocumentDeletionResult {
|
||||
deleted_documents, ..
|
||||
} = spawn_blocking(move || index.delete_documents(&ids)).await??;
|
||||
|
||||
Ok(TaskResult::DocumentDeletion { deleted_documents })
|
||||
}
|
||||
TaskContent::DocumentDeletion {
|
||||
deletion: DocumentDeletion::Clear,
|
||||
index_uid,
|
||||
} => {
|
||||
let index = self.get_index(index_uid.clone().into_inner()).await?;
|
||||
let deleted_documents = spawn_blocking(move || -> IndexResult<u64> {
|
||||
let number_documents = index.stats()?.number_of_documents;
|
||||
index.clear_documents()?;
|
||||
Ok(number_documents)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(TaskResult::ClearAll { deleted_documents })
|
||||
}
|
||||
TaskContent::SettingsUpdate {
|
||||
settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
index_uid,
|
||||
} => {
|
||||
let index = if *is_deletion || !*allow_index_creation {
|
||||
self.get_index(index_uid.clone().into_inner()).await?
|
||||
} else {
|
||||
self.get_or_create_index(index_uid.clone(), task.id).await?
|
||||
};
|
||||
|
||||
let settings = settings.clone();
|
||||
spawn_blocking(move || index.update_settings(&settings.check())).await??;
|
||||
|
||||
Ok(TaskResult::Other)
|
||||
}
|
||||
TaskContent::IndexDeletion { index_uid } => {
|
||||
let index = self.delete_index(index_uid.clone().into_inner()).await?;
|
||||
|
||||
let deleted_documents = spawn_blocking(move || -> IndexResult<u64> {
|
||||
Ok(index.stats()?.number_of_documents)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(TaskResult::ClearAll { deleted_documents })
|
||||
}
|
||||
TaskContent::IndexCreation {
|
||||
primary_key,
|
||||
index_uid,
|
||||
} => {
|
||||
let index = self.create_index(index_uid.clone(), task.id).await?;
|
||||
|
||||
if let Some(primary_key) = primary_key {
|
||||
let primary_key = primary_key.clone();
|
||||
spawn_blocking(move || index.update_primary_key(primary_key)).await??;
|
||||
}
|
||||
|
||||
Ok(TaskResult::Other)
|
||||
}
|
||||
TaskContent::IndexUpdate {
|
||||
primary_key,
|
||||
index_uid,
|
||||
} => {
|
||||
let index = self.get_index(index_uid.clone().into_inner()).await?;
|
||||
|
||||
if let Some(primary_key) = primary_key {
|
||||
let primary_key = primary_key.clone();
|
||||
spawn_blocking(move || index.update_primary_key(primary_key)).await??;
|
||||
}
|
||||
|
||||
Ok(TaskResult::Other)
|
||||
}
|
||||
_ => unreachable!("Invalid task for index resolver"),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn process_task(&self, task: &mut Task) {
|
||||
match self.process_task_inner(task).await {
|
||||
Ok(res) => task.events.push(TaskEvent::succeeded(res)),
|
||||
Err(e) => task.events.push(TaskEvent::failed(e)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
for (_, index) in self.list().await? {
|
||||
index.dump(&path)?;
|
||||
}
|
||||
self.index_uuid_store.dump(path.as_ref().to_owned()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_index(&self, uid: IndexUid, creation_task_id: TaskId) -> Result<Index> {
|
||||
match self.index_uuid_store.get(uid.into_inner()).await? {
|
||||
(uid, Some(_)) => Err(IndexResolverError::IndexAlreadyExists(uid)),
|
||||
(uid, None) => {
|
||||
let uuid = Uuid::new_v4();
|
||||
let index = self.index_store.create(uuid).await?;
|
||||
match self
|
||||
.index_uuid_store
|
||||
.insert(
|
||||
uid,
|
||||
IndexMeta {
|
||||
uuid,
|
||||
creation_task_id,
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(e) => {
|
||||
match self.index_store.delete(uuid).await {
|
||||
Ok(Some(index)) => {
|
||||
index.close();
|
||||
}
|
||||
Ok(None) => (),
|
||||
Err(e) => log::error!("Error while deleting index: {:?}", e),
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
Ok(()) => Ok(index),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create an index with name `uid`.
|
||||
pub async fn get_or_create_index(&self, uid: IndexUid, task_id: TaskId) -> Result<Index> {
|
||||
match self.create_index(uid, task_id).await {
|
||||
Ok(index) => Ok(index),
|
||||
Err(IndexResolverError::IndexAlreadyExists(uid)) => self.get_index(uid).await,
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list(&self) -> Result<Vec<(String, Index)>> {
|
||||
let uuids = self.index_uuid_store.list().await?;
|
||||
let mut indexes = Vec::new();
|
||||
for (name, IndexMeta { uuid, .. }) in uuids {
|
||||
match self.index_store.get(uuid).await? {
|
||||
Some(index) => indexes.push((name, index)),
|
||||
None => {
|
||||
// we found an unexisting index, we remove it from the uuid store
|
||||
let _ = self.index_uuid_store.delete(name).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(indexes)
|
||||
}
|
||||
|
||||
pub async fn delete_index(&self, uid: String) -> Result<Index> {
|
||||
match self.index_uuid_store.delete(uid.clone()).await? {
|
||||
Some(IndexMeta { uuid, .. }) => match self.index_store.delete(uuid).await? {
|
||||
Some(index) => {
|
||||
index.clone().close();
|
||||
Ok(index)
|
||||
}
|
||||
None => Err(IndexResolverError::UnexistingIndex(uid)),
|
||||
},
|
||||
None => Err(IndexResolverError::UnexistingIndex(uid)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index(&self, uid: String) -> Result<Index> {
|
||||
match self.index_uuid_store.get(uid).await? {
|
||||
(name, Some(IndexMeta { uuid, .. })) => {
|
||||
match self.index_store.get(uuid).await? {
|
||||
Some(index) => Ok(index),
|
||||
None => {
|
||||
// For some reason we got a uuid to an unexisting index, we return an error,
|
||||
// and remove the uuid from the uuid store.
|
||||
let _ = self.index_uuid_store.delete(name.clone()).await;
|
||||
Err(IndexResolverError::UnexistingIndex(name))
|
||||
}
|
||||
}
|
||||
}
|
||||
(name, _) => Err(IndexResolverError::UnexistingIndex(name)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index_creation_task_id(&self, index_uid: String) -> Result<TaskId> {
|
||||
let (uid, meta) = self.index_uuid_store.get(index_uid).await?;
|
||||
meta.map(
|
||||
|IndexMeta {
|
||||
creation_task_id, ..
|
||||
}| creation_task_id,
|
||||
)
|
||||
.ok_or(IndexResolverError::UnexistingIndex(uid))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::index::IndexStats;
|
||||
|
||||
use super::index_store::MockIndexStore;
|
||||
use super::meta_store::MockIndexMetaStore;
|
||||
use super::*;
|
||||
|
||||
use futures::future::ok;
|
||||
use milli::FieldDistribution;
|
||||
use nelson::Mocker;
|
||||
|
||||
pub enum MockIndexResolver<U, I> {
|
||||
Real(super::real::IndexResolver<U, I>),
|
||||
Mock(Mocker),
|
||||
}
|
||||
|
||||
impl MockIndexResolver<HeedMetaStore, MapIndexStore> {
|
||||
pub fn load_dump(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
env: Arc<Env>,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
super::real::IndexResolver::load_dump(src, dst, index_db_size, env, indexer_opts)
|
||||
}
|
||||
}
|
||||
|
||||
impl<U, I> MockIndexResolver<U, I>
|
||||
where
|
||||
U: IndexMetaStore,
|
||||
I: IndexStore,
|
||||
{
|
||||
pub fn new(index_uuid_store: U, index_store: I, file_store: UpdateFileStore) -> Self {
|
||||
Self::Real(super::real::IndexResolver {
|
||||
index_uuid_store,
|
||||
index_store,
|
||||
file_store,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn mock(mocker: Mocker) -> Self {
|
||||
Self::Mock(mocker)
|
||||
}
|
||||
|
||||
pub async fn process_document_addition_batch(&self, tasks: &mut [Task]) {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.process_document_addition_batch(tasks).await,
|
||||
IndexResolver::Mock(m) => unsafe {
|
||||
m.get("process_document_addition_batch").call(tasks)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn process_task(&self, task: &mut Task) {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.process_task(task).await,
|
||||
IndexResolver::Mock(m) => unsafe { m.get("process_task").call(task) },
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.dump(path).await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create an index with name `uid`.
|
||||
pub async fn get_or_create_index(&self, uid: IndexUid, task_id: TaskId) -> Result<Index> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.get_or_create_index(uid, task_id).await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list(&self) -> Result<Vec<(String, Index)>> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.list().await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_index(&self, uid: String) -> Result<Index> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.delete_index(uid).await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index(&self, uid: String) -> Result<Index> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.get_index(uid).await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index_creation_task_id(&self, index_uid: String) -> Result<TaskId> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.get_index_creation_task_id(index_uid).await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_content_file(&self, content_uuid: Uuid) -> Result<()> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.delete_content_file(content_uuid).await,
|
||||
IndexResolver::Mock(m) => unsafe {
|
||||
m.get("delete_content_file").call(content_uuid)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_remove_unknown_index() {
|
||||
let mut meta_store = MockIndexMetaStore::new();
|
||||
meta_store
|
||||
.expect_delete()
|
||||
.once()
|
||||
.returning(|_| Box::pin(ok(None)));
|
||||
|
||||
let index_store = MockIndexStore::new();
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, file_store);
|
||||
|
||||
let mut task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::IndexDeletion {
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
index_resolver.process_task(&mut task).await;
|
||||
|
||||
assert!(matches!(task.events[0], TaskEvent::Failed { .. }));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_remove_index() {
|
||||
let mut meta_store = MockIndexMetaStore::new();
|
||||
meta_store.expect_delete().once().returning(|_| {
|
||||
Box::pin(ok(Some(IndexMeta {
|
||||
uuid: Uuid::new_v4(),
|
||||
creation_task_id: 1,
|
||||
})))
|
||||
});
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store.expect_delete().once().returning(|_| {
|
||||
let mocker = Mocker::default();
|
||||
mocker.when::<(), ()>("close").then(|_| ());
|
||||
mocker
|
||||
.when::<(), IndexResult<IndexStats>>("stats")
|
||||
.then(|_| {
|
||||
Ok(IndexStats {
|
||||
size: 10,
|
||||
number_of_documents: 10,
|
||||
is_indexing: None,
|
||||
field_distribution: FieldDistribution::default(),
|
||||
})
|
||||
});
|
||||
Box::pin(ok(Some(Index::mock(mocker))))
|
||||
});
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, file_store);
|
||||
|
||||
let mut task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::IndexDeletion {
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
index_resolver.process_task(&mut task).await;
|
||||
|
||||
assert!(matches!(task.events[0], TaskEvent::Succeeded { .. }));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_delete_documents() {
|
||||
let mut meta_store = MockIndexMetaStore::new();
|
||||
meta_store.expect_get().once().returning(|_| {
|
||||
Box::pin(ok((
|
||||
"test".to_string(),
|
||||
Some(IndexMeta {
|
||||
uuid: Uuid::new_v4(),
|
||||
creation_task_id: 1,
|
||||
}),
|
||||
)))
|
||||
});
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store.expect_get().once().returning(|_| {
|
||||
let mocker = Mocker::default();
|
||||
mocker
|
||||
.when::<(), IndexResult<()>>("clear_documents")
|
||||
.once()
|
||||
.then(|_| Ok(()));
|
||||
mocker
|
||||
.when::<(), IndexResult<IndexStats>>("stats")
|
||||
.once()
|
||||
.then(|_| {
|
||||
Ok(IndexStats {
|
||||
size: 10,
|
||||
number_of_documents: 10,
|
||||
is_indexing: None,
|
||||
field_distribution: FieldDistribution::default(),
|
||||
})
|
||||
});
|
||||
Box::pin(ok(Some(Index::mock(mocker))))
|
||||
});
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, file_store);
|
||||
|
||||
let mut task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::DocumentDeletion {
|
||||
deletion: DocumentDeletion::Clear,
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
index_resolver.process_task(&mut task).await;
|
||||
|
||||
assert!(matches!(task.events[0], TaskEvent::Succeeded { .. }));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_index_update() {
|
||||
let mut meta_store = MockIndexMetaStore::new();
|
||||
meta_store.expect_get().once().returning(|_| {
|
||||
Box::pin(ok((
|
||||
"test".to_string(),
|
||||
Some(IndexMeta {
|
||||
uuid: Uuid::new_v4(),
|
||||
creation_task_id: 1,
|
||||
}),
|
||||
)))
|
||||
});
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store.expect_get().once().returning(|_| {
|
||||
let mocker = Mocker::default();
|
||||
|
||||
mocker
|
||||
.when::<String, IndexResult<crate::index::IndexMeta>>("update_primary_key")
|
||||
.once()
|
||||
.then(|_| {
|
||||
Ok(crate::index::IndexMeta {
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
updated_at: OffsetDateTime::now_utc(),
|
||||
primary_key: Some("key".to_string()),
|
||||
})
|
||||
});
|
||||
Box::pin(ok(Some(Index::mock(mocker))))
|
||||
});
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, file_store);
|
||||
|
||||
let mut task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::IndexUpdate {
|
||||
primary_key: Some("key".to_string()),
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
index_resolver.process_task(&mut task).await;
|
||||
|
||||
assert!(matches!(task.events[0], TaskEvent::Succeeded { .. }));
|
||||
}
|
||||
}
|
@@ -1,37 +0,0 @@
|
||||
#[macro_use]
|
||||
pub mod error;
|
||||
pub mod options;
|
||||
|
||||
mod analytics;
|
||||
mod dump;
|
||||
pub mod index;
|
||||
pub mod index_controller;
|
||||
mod index_resolver;
|
||||
mod snapshot;
|
||||
pub mod tasks;
|
||||
mod update_file_store;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
pub use index_controller::MeiliSearch;
|
||||
pub use milli;
|
||||
pub use milli::heed;
|
||||
|
||||
mod compression;
|
||||
pub mod document_formats;
|
||||
|
||||
/// Check if a db is empty. It does not provide any information on the
|
||||
/// validity of the data in it.
|
||||
/// We consider a database as non empty when it's a non empty directory.
|
||||
pub fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
let db_path = db_path.as_ref();
|
||||
|
||||
if !db_path.exists() {
|
||||
true
|
||||
// if we encounter an error or if the db is a file we consider the db non empty
|
||||
} else if let Ok(dir) = db_path.read_dir() {
|
||||
dir.count() == 0
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
@@ -1,166 +0,0 @@
|
||||
use core::fmt;
|
||||
use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr};
|
||||
|
||||
use byte_unit::{Byte, ByteError};
|
||||
use clap::Parser;
|
||||
use milli::update::IndexerConfig;
|
||||
use serde::Serialize;
|
||||
use sysinfo::{RefreshKind, System, SystemExt};
|
||||
|
||||
#[derive(Debug, Clone, Parser, Serialize)]
|
||||
pub struct IndexerOpts {
|
||||
/// The amount of documents to skip before printing
|
||||
/// a log regarding the indexing advancement.
|
||||
#[serde(skip)]
|
||||
#[clap(long, default_value = "100000", hide = true)] // 100k
|
||||
pub log_every_n: usize,
|
||||
|
||||
/// Grenad max number of chunks in bytes.
|
||||
#[serde(skip)]
|
||||
#[clap(long, hide = true)]
|
||||
pub max_nb_chunks: Option<usize>,
|
||||
|
||||
/// The maximum amount of memory the indexer will use. It defaults to 2/3
|
||||
/// of the available memory. It is recommended to use something like 80%-90%
|
||||
/// of the available memory, no more.
|
||||
///
|
||||
/// In case the engine is unable to retrieve the available memory the engine will
|
||||
/// try to use the memory it needs but without real limit, this can lead to
|
||||
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
|
||||
#[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)]
|
||||
pub max_indexing_memory: MaxMemory,
|
||||
|
||||
/// The maximum number of threads the indexer will use.
|
||||
/// If the number set is higher than the real number of cores available in the machine,
|
||||
/// it will use the maximum number of available cores.
|
||||
///
|
||||
/// It defaults to half of the available threads.
|
||||
#[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)]
|
||||
pub max_indexing_threads: MaxThreads,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Parser, Default, Serialize)]
|
||||
pub struct SchedulerConfig {
|
||||
/// The engine will disable task auto-batching,
|
||||
/// and will sequencialy compute each task one by one.
|
||||
#[clap(long, env = "DISABLE_AUTO_BATCHING")]
|
||||
pub disable_auto_batching: bool,
|
||||
}
|
||||
|
||||
impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
|
||||
let thread_pool = rayon::ThreadPoolBuilder::new()
|
||||
.num_threads(*other.max_indexing_threads)
|
||||
.build()?;
|
||||
|
||||
Ok(Self {
|
||||
log_every_n: Some(other.log_every_n),
|
||||
max_nb_chunks: other.max_nb_chunks,
|
||||
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
|
||||
thread_pool: Some(thread_pool),
|
||||
max_positions_per_attributes: None,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IndexerOpts {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
log_every_n: 100_000,
|
||||
max_nb_chunks: None,
|
||||
max_indexing_memory: MaxMemory::default(),
|
||||
max_indexing_threads: MaxThreads::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A type used to detect the max memory available and use 2/3 of it.
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
pub struct MaxMemory(Option<Byte>);
|
||||
|
||||
impl FromStr for MaxMemory {
|
||||
type Err = ByteError;
|
||||
|
||||
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
|
||||
Byte::from_str(s).map(Some).map(MaxMemory)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MaxMemory {
|
||||
fn default() -> MaxMemory {
|
||||
MaxMemory(
|
||||
total_memory_bytes()
|
||||
.map(|bytes| bytes * 2 / 3)
|
||||
.map(Byte::from_bytes),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MaxMemory {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.0 {
|
||||
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
|
||||
None => f.write_str("unknown"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for MaxMemory {
|
||||
type Target = Option<Byte>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl MaxMemory {
|
||||
pub fn unlimited() -> Self {
|
||||
Self(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the total amount of bytes available or `None` if this system isn't supported.
|
||||
fn total_memory_bytes() -> Option<u64> {
|
||||
if System::IS_SUPPORTED {
|
||||
let memory_kind = RefreshKind::new().with_memory();
|
||||
let mut system = System::new_with_specifics(memory_kind);
|
||||
system.refresh_memory();
|
||||
Some(system.total_memory() * 1024) // KiB into bytes
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
pub struct MaxThreads(usize);
|
||||
|
||||
impl FromStr for MaxThreads {
|
||||
type Err = ParseIntError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
usize::from_str(s).map(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MaxThreads {
|
||||
fn default() -> Self {
|
||||
MaxThreads(num_cpus::get() / 2)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MaxThreads {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for MaxThreads {
|
||||
type Target = usize;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
@@ -1,202 +0,0 @@
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::bail;
|
||||
use fs_extra::dir::{self, CopyOptions};
|
||||
use log::{info, trace};
|
||||
use meilisearch_auth::open_auth_store_env;
|
||||
use milli::heed::CompactionOption;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::time::sleep;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::compression::from_tar_gz;
|
||||
use crate::index_controller::open_meta_env;
|
||||
use crate::index_controller::versioning::VERSION_FILE_NAME;
|
||||
use crate::tasks::Scheduler;
|
||||
|
||||
pub struct SnapshotService {
|
||||
pub(crate) db_path: PathBuf,
|
||||
pub(crate) snapshot_period: Duration,
|
||||
pub(crate) snapshot_path: PathBuf,
|
||||
pub(crate) index_size: usize,
|
||||
pub(crate) meta_env_size: usize,
|
||||
pub(crate) scheduler: Arc<RwLock<Scheduler>>,
|
||||
}
|
||||
|
||||
impl SnapshotService {
|
||||
pub async fn run(self) {
|
||||
info!(
|
||||
"Snapshot scheduled every {}s.",
|
||||
self.snapshot_period.as_secs()
|
||||
);
|
||||
loop {
|
||||
let snapshot_job = SnapshotJob {
|
||||
dest_path: self.snapshot_path.clone(),
|
||||
src_path: self.db_path.clone(),
|
||||
meta_env_size: self.meta_env_size,
|
||||
index_size: self.index_size,
|
||||
};
|
||||
self.scheduler.write().await.schedule_snapshot(snapshot_job);
|
||||
sleep(self.snapshot_period).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_snapshot(
|
||||
db_path: impl AsRef<Path>,
|
||||
snapshot_path: impl AsRef<Path>,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
ignore_missing_snapshot: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
let empty_db = crate::is_empty_db(&db_path);
|
||||
let snapshot_path_exists = snapshot_path.as_ref().exists();
|
||||
|
||||
if empty_db && snapshot_path_exists {
|
||||
match from_tar_gz(snapshot_path, &db_path) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
//clean created db folder
|
||||
std::fs::remove_dir_all(&db_path)?;
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
} else if !empty_db && !ignore_snapshot_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
db_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| db_path.as_ref().to_owned())
|
||||
)
|
||||
} else if !snapshot_path_exists && !ignore_missing_snapshot {
|
||||
bail!("snapshot doesn't exist at {:?}", snapshot_path.as_ref())
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SnapshotJob {
|
||||
dest_path: PathBuf,
|
||||
src_path: PathBuf,
|
||||
|
||||
meta_env_size: usize,
|
||||
index_size: usize,
|
||||
}
|
||||
|
||||
impl SnapshotJob {
|
||||
pub async fn run(self) -> anyhow::Result<()> {
|
||||
tokio::task::spawn_blocking(|| self.run_sync()).await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_sync(self) -> anyhow::Result<()> {
|
||||
trace!("Performing snapshot.");
|
||||
|
||||
let snapshot_dir = self.dest_path.clone();
|
||||
std::fs::create_dir_all(&snapshot_dir)?;
|
||||
let temp_snapshot_dir = tempfile::tempdir()?;
|
||||
let temp_snapshot_path = temp_snapshot_dir.path();
|
||||
|
||||
self.snapshot_version_file(temp_snapshot_path)?;
|
||||
self.snapshot_meta_env(temp_snapshot_path)?;
|
||||
self.snapshot_file_store(temp_snapshot_path)?;
|
||||
self.snapshot_indexes(temp_snapshot_path)?;
|
||||
self.snapshot_auth(temp_snapshot_path)?;
|
||||
|
||||
let db_name = self
|
||||
.src_path
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or("data.ms")
|
||||
.to_string();
|
||||
|
||||
let snapshot_path = self.dest_path.join(format!("{}.snapshot", db_name));
|
||||
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?;
|
||||
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
|
||||
crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
|
||||
let _file = temp_snapshot_file.persist(&snapshot_path)?;
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let perm = Permissions::from_mode(0o644);
|
||||
_file.set_permissions(perm)?;
|
||||
}
|
||||
|
||||
trace!("Created snapshot in {:?}.", snapshot_path);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn snapshot_version_file(&self, path: &Path) -> anyhow::Result<()> {
|
||||
let dst = path.join(VERSION_FILE_NAME);
|
||||
let src = self.src_path.join(VERSION_FILE_NAME);
|
||||
|
||||
fs::copy(src, dst)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn snapshot_meta_env(&self, path: &Path) -> anyhow::Result<()> {
|
||||
let env = open_meta_env(&self.src_path, self.meta_env_size)?;
|
||||
|
||||
let dst = path.join("data.mdb");
|
||||
env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn snapshot_file_store(&self, path: &Path) -> anyhow::Result<()> {
|
||||
// for now we simply copy the updates/updates_files
|
||||
// FIXME(marin): We may copy more files than necessary, if new files are added while we are
|
||||
// performing the snapshop. We need a way to filter them out.
|
||||
|
||||
let dst = path.join("updates");
|
||||
fs::create_dir_all(&dst)?;
|
||||
let options = CopyOptions::default();
|
||||
dir::copy(self.src_path.join("updates/updates_files"), dst, &options)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn snapshot_indexes(&self, path: &Path) -> anyhow::Result<()> {
|
||||
let indexes_path = self.src_path.join("indexes/");
|
||||
let dst = path.join("indexes/");
|
||||
|
||||
for entry in WalkDir::new(indexes_path).max_depth(1).into_iter().skip(1) {
|
||||
let entry = entry?;
|
||||
let name = entry.file_name();
|
||||
let dst = dst.join(name);
|
||||
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
|
||||
let dst = dst.join("data.mdb");
|
||||
|
||||
let mut options = milli::heed::EnvOpenOptions::new();
|
||||
options.map_size(self.index_size);
|
||||
let index = milli::Index::new(options, entry.path())?;
|
||||
index.copy_to_path(dst, CompactionOption::Enabled)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn snapshot_auth(&self, path: &Path) -> anyhow::Result<()> {
|
||||
let auth_path = self.src_path.join("auth");
|
||||
let dst = path.join("auth");
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
let dst = dst.join("data.mdb");
|
||||
|
||||
let env = open_auth_store_env(&auth_path)?;
|
||||
env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
@@ -1,75 +0,0 @@
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::snapshot::SnapshotJob;
|
||||
|
||||
use super::task::{Task, TaskEvent};
|
||||
|
||||
pub type BatchId = u32;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BatchContent {
|
||||
DocumentsAdditionBatch(Vec<Task>),
|
||||
IndexUpdate(Task),
|
||||
Dump(Task),
|
||||
Snapshot(SnapshotJob),
|
||||
// Symbolizes a empty batch. This can occur when we were woken, but there wasn't any work to do.
|
||||
Empty,
|
||||
}
|
||||
|
||||
impl BatchContent {
|
||||
pub fn first(&self) -> Option<&Task> {
|
||||
match self {
|
||||
BatchContent::DocumentsAdditionBatch(ts) => ts.first(),
|
||||
BatchContent::Dump(t) | BatchContent::IndexUpdate(t) => Some(t),
|
||||
BatchContent::Snapshot(_) | BatchContent::Empty => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_event(&mut self, event: TaskEvent) {
|
||||
match self {
|
||||
BatchContent::DocumentsAdditionBatch(ts) => {
|
||||
ts.iter_mut().for_each(|t| t.events.push(event.clone()))
|
||||
}
|
||||
BatchContent::IndexUpdate(t) | BatchContent::Dump(t) => t.events.push(event),
|
||||
BatchContent::Snapshot(_) | BatchContent::Empty => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Batch {
|
||||
// Only batches that contains a persistent tasks are given an id. Snapshot batches don't have
|
||||
// an id.
|
||||
pub id: Option<BatchId>,
|
||||
pub created_at: OffsetDateTime,
|
||||
pub content: BatchContent,
|
||||
}
|
||||
|
||||
impl Batch {
|
||||
pub fn new(id: Option<BatchId>, content: BatchContent) -> Self {
|
||||
Self {
|
||||
id,
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
content,
|
||||
}
|
||||
}
|
||||
pub fn len(&self) -> usize {
|
||||
match self.content {
|
||||
BatchContent::DocumentsAdditionBatch(ref ts) => ts.len(),
|
||||
BatchContent::IndexUpdate(_) | BatchContent::Dump(_) | BatchContent::Snapshot(_) => 1,
|
||||
BatchContent::Empty => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
id: None,
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
content: BatchContent::Empty,
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,34 +0,0 @@
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::internal_error;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
use crate::update_file_store::UpdateFileStoreError;
|
||||
|
||||
use super::task::TaskId;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, TaskError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum TaskError {
|
||||
#[error("Task `{0}` not found.")]
|
||||
UnexistingTask(TaskId),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
TaskError: milli::heed::Error,
|
||||
JoinError,
|
||||
std::io::Error,
|
||||
serde_json::Error,
|
||||
UpdateFileStoreError
|
||||
);
|
||||
|
||||
impl ErrorCode for TaskError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
TaskError::UnexistingTask(_) => Code::TaskNotFound,
|
||||
TaskError::Internal(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,132 +0,0 @@
|
||||
use crate::dump::DumpHandler;
|
||||
use crate::index_resolver::index_store::IndexStore;
|
||||
use crate::index_resolver::meta_store::IndexMetaStore;
|
||||
use crate::tasks::batch::{Batch, BatchContent};
|
||||
use crate::tasks::task::{Task, TaskContent, TaskEvent, TaskResult};
|
||||
use crate::tasks::BatchHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<U, I> BatchHandler for DumpHandler<U, I>
|
||||
where
|
||||
U: IndexMetaStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
fn accept(&self, batch: &Batch) -> bool {
|
||||
matches!(batch.content, BatchContent::Dump { .. })
|
||||
}
|
||||
|
||||
async fn process_batch(&self, mut batch: Batch) -> Batch {
|
||||
match &batch.content {
|
||||
BatchContent::Dump(Task {
|
||||
content: TaskContent::Dump { uid },
|
||||
..
|
||||
}) => {
|
||||
match self.run(uid.clone()).await {
|
||||
Ok(_) => {
|
||||
batch
|
||||
.content
|
||||
.push_event(TaskEvent::succeeded(TaskResult::Other));
|
||||
}
|
||||
Err(e) => batch.content.push_event(TaskEvent::failed(e)),
|
||||
}
|
||||
batch
|
||||
}
|
||||
_ => unreachable!("invalid batch content for dump"),
|
||||
}
|
||||
}
|
||||
|
||||
async fn finish(&self, _: &Batch) {}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::dump::error::{DumpError, Result as DumpResult};
|
||||
use crate::index_resolver::{index_store::MockIndexStore, meta_store::MockIndexMetaStore};
|
||||
use crate::tasks::handlers::test::task_to_batch;
|
||||
|
||||
use super::*;
|
||||
|
||||
use nelson::Mocker;
|
||||
use proptest::prelude::*;
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn finish_does_nothing(
|
||||
task in any::<Task>(),
|
||||
) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = rt.spawn(async {
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let dump_handler = DumpHandler::<MockIndexMetaStore, MockIndexStore>::mock(mocker);
|
||||
|
||||
dump_handler.finish(&batch).await;
|
||||
});
|
||||
|
||||
rt.block_on(handle).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_handle_dump_success(
|
||||
task in any::<Task>(),
|
||||
) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = rt.spawn(async {
|
||||
let batch = task_to_batch(task);
|
||||
let should_accept = matches!(batch.content, BatchContent::Dump { .. });
|
||||
|
||||
let mocker = Mocker::default();
|
||||
if should_accept {
|
||||
mocker.when::<String, DumpResult<()>>("run")
|
||||
.once()
|
||||
.then(|_| Ok(()));
|
||||
}
|
||||
|
||||
let dump_handler = DumpHandler::<MockIndexMetaStore, MockIndexStore>::mock(mocker);
|
||||
|
||||
let accept = dump_handler.accept(&batch);
|
||||
assert_eq!(accept, should_accept);
|
||||
|
||||
if accept {
|
||||
let batch = dump_handler.process_batch(batch).await;
|
||||
let last_event = batch.content.first().unwrap().events.last().unwrap();
|
||||
assert!(matches!(last_event, TaskEvent::Succeeded { .. }));
|
||||
}
|
||||
});
|
||||
|
||||
rt.block_on(handle).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_handle_dump_error(
|
||||
task in any::<Task>(),
|
||||
) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = rt.spawn(async {
|
||||
let batch = task_to_batch(task);
|
||||
let should_accept = matches!(batch.content, BatchContent::Dump { .. });
|
||||
|
||||
let mocker = Mocker::default();
|
||||
if should_accept {
|
||||
mocker.when::<String, DumpResult<()>>("run")
|
||||
.once()
|
||||
.then(|_| Err(DumpError::Internal("error".into())));
|
||||
}
|
||||
|
||||
let dump_handler = DumpHandler::<MockIndexMetaStore, MockIndexStore>::mock(mocker);
|
||||
|
||||
let accept = dump_handler.accept(&batch);
|
||||
assert_eq!(accept, should_accept);
|
||||
|
||||
if accept {
|
||||
let batch = dump_handler.process_batch(batch).await;
|
||||
let last_event = batch.content.first().unwrap().events.last().unwrap();
|
||||
assert!(matches!(last_event, TaskEvent::Failed { .. }));
|
||||
}
|
||||
});
|
||||
|
||||
rt.block_on(handle).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,18 +0,0 @@
|
||||
use crate::tasks::batch::{Batch, BatchContent};
|
||||
use crate::tasks::BatchHandler;
|
||||
|
||||
/// A sink handler for empty tasks.
|
||||
pub struct EmptyBatchHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl BatchHandler for EmptyBatchHandler {
|
||||
fn accept(&self, batch: &Batch) -> bool {
|
||||
matches!(batch.content, BatchContent::Empty)
|
||||
}
|
||||
|
||||
async fn process_batch(&self, batch: Batch) -> Batch {
|
||||
batch
|
||||
}
|
||||
|
||||
async fn finish(&self, _: &Batch) {}
|
||||
}
|
@@ -1,199 +0,0 @@
|
||||
use crate::index_resolver::IndexResolver;
|
||||
use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore};
|
||||
use crate::tasks::batch::{Batch, BatchContent};
|
||||
use crate::tasks::BatchHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<U, I> BatchHandler for IndexResolver<U, I>
|
||||
where
|
||||
U: IndexMetaStore + Send + Sync + 'static,
|
||||
I: IndexStore + Send + Sync + 'static,
|
||||
{
|
||||
fn accept(&self, batch: &Batch) -> bool {
|
||||
matches!(
|
||||
batch.content,
|
||||
BatchContent::DocumentsAdditionBatch(_) | BatchContent::IndexUpdate(_)
|
||||
)
|
||||
}
|
||||
|
||||
async fn process_batch(&self, mut batch: Batch) -> Batch {
|
||||
match batch.content {
|
||||
BatchContent::DocumentsAdditionBatch(ref mut tasks) => {
|
||||
self.process_document_addition_batch(tasks).await;
|
||||
}
|
||||
BatchContent::IndexUpdate(ref mut task) => {
|
||||
self.process_task(task).await;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
batch
|
||||
}
|
||||
|
||||
async fn finish(&self, batch: &Batch) {
|
||||
if let BatchContent::DocumentsAdditionBatch(ref tasks) = batch.content {
|
||||
for task in tasks {
|
||||
if let Some(content_uuid) = task.get_content_uuid() {
|
||||
if let Err(e) = self.delete_content_file(content_uuid).await {
|
||||
log::error!("error deleting update file: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::index_resolver::index_store::MapIndexStore;
|
||||
use crate::index_resolver::meta_store::HeedMetaStore;
|
||||
use crate::index_resolver::{
|
||||
error::Result as IndexResult, index_store::MockIndexStore, meta_store::MockIndexMetaStore,
|
||||
};
|
||||
use crate::tasks::{
|
||||
handlers::test::task_to_batch,
|
||||
task::{Task, TaskContent},
|
||||
};
|
||||
use crate::update_file_store::{Result as FileStoreResult, UpdateFileStore};
|
||||
|
||||
use super::*;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use nelson::Mocker;
|
||||
use proptest::prelude::*;
|
||||
use uuid::Uuid;
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_accept_task(
|
||||
task in any::<Task>(),
|
||||
) {
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
let index_store = MockIndexStore::new();
|
||||
let meta_store = MockIndexMetaStore::new();
|
||||
let mocker = Mocker::default();
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store);
|
||||
|
||||
match batch.content {
|
||||
BatchContent::DocumentsAdditionBatch(_)
|
||||
| BatchContent::IndexUpdate(_) => assert!(index_resolver.accept(&batch)),
|
||||
BatchContent::Dump(_)
|
||||
| BatchContent::Snapshot(_)
|
||||
| BatchContent::Empty => assert!(!index_resolver.accept(&batch)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn finisher_called_on_document_update() {
|
||||
let index_store = MockIndexStore::new();
|
||||
let meta_store = MockIndexMetaStore::new();
|
||||
let mocker = Mocker::default();
|
||||
let content_uuid = Uuid::new_v4();
|
||||
mocker
|
||||
.when::<Uuid, FileStoreResult<()>>("delete")
|
||||
.once()
|
||||
.then(move |uuid| {
|
||||
assert_eq!(uuid, content_uuid);
|
||||
Ok(())
|
||||
});
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store);
|
||||
|
||||
let task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::DocumentAddition {
|
||||
content_uuid,
|
||||
merge_strategy: IndexDocumentsMethod::ReplaceDocuments,
|
||||
primary_key: None,
|
||||
documents_count: 100,
|
||||
allow_index_creation: true,
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
index_resolver.finish(&batch).await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[should_panic]
|
||||
async fn panic_when_passed_unsupported_batch() {
|
||||
let index_store = MockIndexStore::new();
|
||||
let meta_store = MockIndexMetaStore::new();
|
||||
let mocker = Mocker::default();
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store);
|
||||
|
||||
let task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::Dump {
|
||||
uid: String::from("hello"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
index_resolver.process_batch(batch).await;
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn index_document_task_deletes_update_file(
|
||||
task in any::<Task>(),
|
||||
) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = rt.spawn(async {
|
||||
let mocker = Mocker::default();
|
||||
|
||||
if let TaskContent::DocumentAddition{ .. } = task.content {
|
||||
mocker.when::<Uuid, IndexResult<()>>("delete_content_file").then(|_| Ok(()));
|
||||
}
|
||||
|
||||
let index_resolver: IndexResolver<HeedMetaStore, MapIndexStore> = IndexResolver::mock(mocker);
|
||||
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
index_resolver.finish(&batch).await;
|
||||
});
|
||||
|
||||
rt.block_on(handle).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_handle_batch(task in any::<Task>()) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = rt.spawn(async {
|
||||
let mocker = Mocker::default();
|
||||
match task.content {
|
||||
TaskContent::DocumentAddition { .. } => {
|
||||
mocker.when::<&mut [Task], ()>("process_document_addition_batch").then(|_| ());
|
||||
}
|
||||
TaskContent::Dump { .. } => (),
|
||||
_ => {
|
||||
mocker.when::<&mut Task, ()>("process_task").then(|_| ());
|
||||
}
|
||||
}
|
||||
let index_resolver: IndexResolver<HeedMetaStore, MapIndexStore> = IndexResolver::mock(mocker);
|
||||
|
||||
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
if index_resolver.accept(&batch) {
|
||||
index_resolver.process_batch(batch).await;
|
||||
}
|
||||
});
|
||||
|
||||
if let Err(e) = rt.block_on(handle) {
|
||||
if e.is_panic() {
|
||||
std::panic::resume_unwind(e.into_panic());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,34 +0,0 @@
|
||||
pub mod dump_handler;
|
||||
pub mod empty_handler;
|
||||
mod index_resolver_handler;
|
||||
pub mod snapshot_handler;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::tasks::{
|
||||
batch::{Batch, BatchContent},
|
||||
task::{Task, TaskContent},
|
||||
};
|
||||
|
||||
pub fn task_to_batch(task: Task) -> Batch {
|
||||
let content = match task.content {
|
||||
TaskContent::DocumentAddition { .. } => {
|
||||
BatchContent::DocumentsAdditionBatch(vec![task])
|
||||
}
|
||||
TaskContent::DocumentDeletion { .. }
|
||||
| TaskContent::SettingsUpdate { .. }
|
||||
| TaskContent::IndexDeletion { .. }
|
||||
| TaskContent::IndexCreation { .. }
|
||||
| TaskContent::IndexUpdate { .. } => BatchContent::IndexUpdate(task),
|
||||
TaskContent::Dump { .. } => BatchContent::Dump(task),
|
||||
};
|
||||
|
||||
Batch {
|
||||
id: Some(1),
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
content,
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,26 +0,0 @@
|
||||
use crate::tasks::batch::{Batch, BatchContent};
|
||||
use crate::tasks::BatchHandler;
|
||||
|
||||
pub struct SnapshotHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl BatchHandler for SnapshotHandler {
|
||||
fn accept(&self, batch: &Batch) -> bool {
|
||||
matches!(batch.content, BatchContent::Snapshot(_))
|
||||
}
|
||||
|
||||
async fn process_batch(&self, batch: Batch) -> Batch {
|
||||
match batch.content {
|
||||
BatchContent::Snapshot(job) => {
|
||||
if let Err(e) = job.run().await {
|
||||
log::error!("snapshot error: {e}");
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
Batch::empty()
|
||||
}
|
||||
|
||||
async fn finish(&self, _: &Batch) {}
|
||||
}
|
@@ -1,56 +0,0 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
pub use handlers::empty_handler::EmptyBatchHandler;
|
||||
pub use handlers::snapshot_handler::SnapshotHandler;
|
||||
pub use scheduler::Scheduler;
|
||||
pub use task_store::TaskFilter;
|
||||
|
||||
#[cfg(test)]
|
||||
pub use task_store::test::MockTaskStore as TaskStore;
|
||||
#[cfg(not(test))]
|
||||
pub use task_store::TaskStore;
|
||||
|
||||
use batch::Batch;
|
||||
use error::Result;
|
||||
|
||||
pub mod batch;
|
||||
pub mod error;
|
||||
mod handlers;
|
||||
mod scheduler;
|
||||
pub mod task;
|
||||
mod task_store;
|
||||
pub mod update_loop;
|
||||
|
||||
#[cfg_attr(test, mockall::automock(type Error=test::DebugError;))]
|
||||
#[async_trait]
|
||||
pub trait BatchHandler: Sync + Send + 'static {
|
||||
/// return whether this handler can accept this batch
|
||||
fn accept(&self, batch: &Batch) -> bool;
|
||||
|
||||
/// Processes the `Task` batch returning the batch with the `Task` updated.
|
||||
///
|
||||
/// It is ok for this function to panic if a batch is handed that hasn't been verified by
|
||||
/// `accept` beforehand.
|
||||
async fn process_batch(&self, batch: Batch) -> Batch;
|
||||
|
||||
/// `finish` is called when the result of `process` has been committed to the task store. This
|
||||
/// method can be used to perform cleanup after the update has been completed for example.
|
||||
async fn finish(&self, batch: &Batch);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct DebugError;
|
||||
|
||||
impl Display for DebugError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("an error")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for DebugError {}
|
||||
}
|
@@ -1,195 +0,0 @@
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::batch::BatchId;
|
||||
use crate::index::{Settings, Unchecked};
|
||||
|
||||
pub type TaskId = u32;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub enum TaskResult {
|
||||
DocumentAddition { indexed_documents: u64 },
|
||||
DocumentDeletion { deleted_documents: u64 },
|
||||
ClearAll { deleted_documents: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
impl From<DocumentAdditionResult> for TaskResult {
|
||||
fn from(other: DocumentAdditionResult) -> Self {
|
||||
Self::DocumentAddition {
|
||||
indexed_documents: other.indexed_documents,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub enum TaskEvent {
|
||||
Created(
|
||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
OffsetDateTime,
|
||||
),
|
||||
Batched {
|
||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
batch_id: BatchId,
|
||||
},
|
||||
Processing(
|
||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
OffsetDateTime,
|
||||
),
|
||||
Succeeded {
|
||||
result: TaskResult,
|
||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
},
|
||||
Failed {
|
||||
error: ResponseError,
|
||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
},
|
||||
}
|
||||
|
||||
impl TaskEvent {
|
||||
pub fn succeeded(result: TaskResult) -> Self {
|
||||
Self::Succeeded {
|
||||
result,
|
||||
timestamp: OffsetDateTime::now_utc(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn failed(error: impl Into<ResponseError>) -> Self {
|
||||
Self::Failed {
|
||||
error: error.into(),
|
||||
timestamp: OffsetDateTime::now_utc(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A task represents an operation that Meilisearch must do.
|
||||
/// It's stored on disk and executed from the lowest to highest Task id.
|
||||
/// Every time a new task is created it has a higher Task id than the previous one.
|
||||
/// See also `Job`.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub struct Task {
|
||||
pub id: TaskId,
|
||||
/// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task)
|
||||
/// then this is None
|
||||
// TODO: when next forward breaking dumps, it would be a good idea to move this field inside of
|
||||
// the TaskContent.
|
||||
pub content: TaskContent,
|
||||
pub events: Vec<TaskEvent>,
|
||||
}
|
||||
|
||||
impl Task {
|
||||
/// Return true when a task is finished.
|
||||
/// A task is finished when its last state is either `Succeeded` or `Failed`.
|
||||
pub fn is_finished(&self) -> bool {
|
||||
self.events.last().map_or(false, |event| {
|
||||
matches!(
|
||||
event,
|
||||
TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. }
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the content_uuid of the `Task` if there is one.
|
||||
pub fn get_content_uuid(&self) -> Option<Uuid> {
|
||||
match self {
|
||||
Task {
|
||||
content: TaskContent::DocumentAddition { content_uuid, .. },
|
||||
..
|
||||
} => Some(*content_uuid),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_uid(&self) -> Option<&str> {
|
||||
match &self.content {
|
||||
TaskContent::DocumentAddition { index_uid, .. }
|
||||
| TaskContent::DocumentDeletion { index_uid, .. }
|
||||
| TaskContent::SettingsUpdate { index_uid, .. }
|
||||
| TaskContent::IndexDeletion { index_uid }
|
||||
| TaskContent::IndexCreation { index_uid, .. }
|
||||
| TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()),
|
||||
TaskContent::Dump { .. } => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub enum DocumentDeletion {
|
||||
Clear,
|
||||
Ids(Vec<String>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum TaskContent {
|
||||
DocumentAddition {
|
||||
index_uid: IndexUid,
|
||||
#[cfg_attr(test, proptest(value = "Uuid::new_v4()"))]
|
||||
content_uuid: Uuid,
|
||||
#[cfg_attr(test, proptest(strategy = "test::index_document_method_strategy()"))]
|
||||
merge_strategy: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
documents_count: usize,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
DocumentDeletion {
|
||||
index_uid: IndexUid,
|
||||
deletion: DocumentDeletion,
|
||||
},
|
||||
SettingsUpdate {
|
||||
index_uid: IndexUid,
|
||||
settings: Settings<Unchecked>,
|
||||
/// Indicates whether the task was a deletion
|
||||
is_deletion: bool,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
IndexDeletion {
|
||||
index_uid: IndexUid,
|
||||
},
|
||||
IndexCreation {
|
||||
index_uid: IndexUid,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
IndexUpdate {
|
||||
index_uid: IndexUid,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
Dump {
|
||||
uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub(super) fn index_document_method_strategy() -> impl Strategy<Value = IndexDocumentsMethod> {
|
||||
prop_oneof![
|
||||
Just(IndexDocumentsMethod::ReplaceDocuments),
|
||||
Just(IndexDocumentsMethod::UpdateDocuments),
|
||||
]
|
||||
}
|
||||
|
||||
pub(super) fn datetime_strategy() -> impl Strategy<Value = OffsetDateTime> {
|
||||
Just(OffsetDateTime::now_utc())
|
||||
}
|
||||
}
|
@@ -1,377 +0,0 @@
|
||||
#[allow(clippy::upper_case_acronyms)]
|
||||
|
||||
type BEU32 = milli::heed::zerocopy::U32<milli::heed::byteorder::BE>;
|
||||
|
||||
const INDEX_UIDS_TASK_IDS: &str = "index-uids-task-ids";
|
||||
const TASKS: &str = "tasks";
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::ops::Bound::{Excluded, Unbounded};
|
||||
use std::result::Result as StdResult;
|
||||
use std::sync::Arc;
|
||||
|
||||
use milli::heed::types::{OwnedType, SerdeJson, Str};
|
||||
use milli::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use milli::heed_codec::RoaringBitmapCodec;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::tasks::task::{Task, TaskId};
|
||||
|
||||
use super::super::Result;
|
||||
use super::TaskFilter;
|
||||
|
||||
pub struct Store {
|
||||
env: Arc<Env>,
|
||||
/// Maps an index uid to the set of tasks ids associated to it.
|
||||
index_uid_task_ids: Database<Str, RoaringBitmapCodec>,
|
||||
tasks: Database<OwnedType<BEU32>, SerdeJson<Task>>,
|
||||
}
|
||||
|
||||
impl Drop for Store {
|
||||
fn drop(&mut self) {
|
||||
if Arc::strong_count(&self.env) == 1 {
|
||||
self.env.as_ref().clone().prepare_for_closing();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Store {
|
||||
/// Create a new store from the specified `Path`.
|
||||
/// Be really cautious when calling this function, the returned `Store` may
|
||||
/// be in an invalid state, with dangling processing tasks.
|
||||
/// You want to patch all un-finished tasks and put them in your pending
|
||||
/// queue with the `reset_and_return_unfinished_update` method.
|
||||
pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> {
|
||||
let index_uid_task_ids = env.create_database(Some(INDEX_UIDS_TASK_IDS))?;
|
||||
let tasks = env.create_database(Some(TASKS))?;
|
||||
|
||||
Ok(Self {
|
||||
env,
|
||||
index_uid_task_ids,
|
||||
tasks,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn wtxn(&self) -> Result<RwTxn> {
|
||||
Ok(self.env.write_txn()?)
|
||||
}
|
||||
|
||||
pub fn rtxn(&self) -> Result<RoTxn> {
|
||||
Ok(self.env.read_txn()?)
|
||||
}
|
||||
|
||||
/// Returns the id for the next task.
|
||||
///
|
||||
/// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit
|
||||
/// the task to the store in the same transaction, no one else will hav this task id.
|
||||
pub fn next_task_id(&self, txn: &mut RwTxn) -> Result<TaskId> {
|
||||
let id = self
|
||||
.tasks
|
||||
.lazily_decode_data()
|
||||
.last(txn)?
|
||||
.map(|(id, _)| id.get() + 1)
|
||||
.unwrap_or(0);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
self.tasks.put(txn, &BEU32::new(task.id), task)?;
|
||||
// only add the task to the indexes index if it has an index_uid
|
||||
if let Some(index_uid) = task.index_uid() {
|
||||
let mut tasks_set = self
|
||||
.index_uid_task_ids
|
||||
.get(txn, index_uid)?
|
||||
.unwrap_or_default();
|
||||
|
||||
tasks_set.insert(task.id);
|
||||
|
||||
self.index_uid_task_ids.put(txn, index_uid, &tasks_set)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result<Option<Task>> {
|
||||
let task = self.tasks.get(txn, &BEU32::new(id))?;
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Returns the unfinished tasks starting from the given taskId in ascending order.
|
||||
pub fn fetch_unfinished_tasks(&self, txn: &RoTxn, from: Option<TaskId>) -> Result<Vec<Task>> {
|
||||
// We must NEVER re-enqueue an already processed task! It's content uuid would point to an unexisting file.
|
||||
//
|
||||
// TODO(marin): This may create some latency when the first batch lazy loads the pending updates.
|
||||
let from = from.unwrap_or_default();
|
||||
|
||||
let result: StdResult<Vec<_>, milli::heed::Error> = self
|
||||
.tasks
|
||||
.range(txn, &(BEU32::new(from)..))?
|
||||
.map(|r| r.map(|(_, t)| t))
|
||||
.filter(|result| result.as_ref().map_or(true, |t| !t.is_finished()))
|
||||
.collect();
|
||||
|
||||
result.map_err(Into::into)
|
||||
}
|
||||
|
||||
/// Returns all the tasks starting from the given taskId and going in descending order.
|
||||
pub fn list_tasks(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
from: Option<TaskId>,
|
||||
filter: Option<TaskFilter>,
|
||||
limit: Option<usize>,
|
||||
) -> Result<Vec<Task>> {
|
||||
let from = match from {
|
||||
Some(from) => from,
|
||||
None => self.tasks.last(txn)?.map_or(0, |(id, _)| id.get()),
|
||||
};
|
||||
|
||||
let filter_fn = |task: &Task| {
|
||||
filter
|
||||
.as_ref()
|
||||
.and_then(|f| f.filter_fn.as_ref())
|
||||
.map_or(true, |f| f(task))
|
||||
};
|
||||
|
||||
let result: Result<Vec<_>> = match filter.as_ref().and_then(|f| f.filtered_indexes()) {
|
||||
Some(indexes) => self
|
||||
.compute_candidates(txn, indexes, from)?
|
||||
.filter(|result| result.as_ref().map_or(true, filter_fn))
|
||||
.take(limit.unwrap_or(usize::MAX))
|
||||
.collect(),
|
||||
None => self
|
||||
.tasks
|
||||
.rev_range(txn, &(..=BEU32::new(from)))?
|
||||
.map(|r| r.map(|(_, t)| t).map_err(Into::into))
|
||||
.filter(|result| result.as_ref().map_or(true, filter_fn))
|
||||
.take(limit.unwrap_or(usize::MAX))
|
||||
.collect(),
|
||||
};
|
||||
|
||||
result.map_err(Into::into)
|
||||
}
|
||||
|
||||
fn compute_candidates<'a>(
|
||||
&'a self,
|
||||
txn: &'a RoTxn,
|
||||
indexes: &HashSet<String>,
|
||||
from: TaskId,
|
||||
) -> Result<impl Iterator<Item = Result<Task>> + 'a> {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
|
||||
for index_uid in indexes {
|
||||
if let Some(tasks_set) = self.index_uid_task_ids.get(txn, index_uid)? {
|
||||
candidates |= tasks_set;
|
||||
}
|
||||
}
|
||||
|
||||
candidates.remove_range((Excluded(from), Unbounded));
|
||||
|
||||
let iter = candidates
|
||||
.into_iter()
|
||||
.rev()
|
||||
.filter_map(|id| self.get(txn, id).transpose());
|
||||
|
||||
Ok(iter)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use itertools::Itertools;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use nelson::Mocker;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::tasks::task::TaskContent;
|
||||
|
||||
use super::*;
|
||||
|
||||
/// TODO: use this mock to test the task store properly.
|
||||
#[allow(dead_code)]
|
||||
pub enum MockStore {
|
||||
Real(Store),
|
||||
Fake(Mocker),
|
||||
}
|
||||
|
||||
pub struct TmpEnv(TempDir, Arc<milli::heed::Env>);
|
||||
|
||||
impl TmpEnv {
|
||||
pub fn env(&self) -> Arc<milli::heed::Env> {
|
||||
self.1.clone()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tmp_env() -> TmpEnv {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100000);
|
||||
options.max_dbs(1000);
|
||||
let env = Arc::new(options.open(tmp.path()).unwrap());
|
||||
|
||||
TmpEnv(tmp, env)
|
||||
}
|
||||
|
||||
impl MockStore {
|
||||
pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> {
|
||||
Ok(Self::Real(Store::new(env)?))
|
||||
}
|
||||
|
||||
pub fn wtxn(&self) -> Result<RwTxn> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.wtxn(),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn rtxn(&self) -> Result<RoTxn> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.rtxn(),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_task_id(&self, txn: &mut RwTxn) -> Result<TaskId> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.next_task_id(txn),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.put(txn, task),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result<Option<Task>> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.get(txn, id),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fetch_unfinished_tasks(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
from: Option<TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.fetch_unfinished_tasks(txn, from),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_tasks(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
from: Option<TaskId>,
|
||||
filter: Option<TaskFilter>,
|
||||
limit: Option<usize>,
|
||||
) -> Result<Vec<Task>> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.list_tasks(txn, from, filter, limit),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ordered_filtered_updates() {
|
||||
let tmp = tmp_env();
|
||||
let store = Store::new(tmp.env()).unwrap();
|
||||
|
||||
let tasks = (0..100)
|
||||
.map(|_| Task {
|
||||
id: rand::random(),
|
||||
content: TaskContent::IndexDeletion {
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: vec![],
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut txn = store.env.write_txn().unwrap();
|
||||
tasks
|
||||
.iter()
|
||||
.try_for_each(|t| store.put(&mut txn, t))
|
||||
.unwrap();
|
||||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index("test".into());
|
||||
|
||||
let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap();
|
||||
|
||||
assert!(tasks
|
||||
.iter()
|
||||
.map(|t| t.id)
|
||||
.tuple_windows()
|
||||
.all(|(a, b)| a > b));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_same_index_prefix() {
|
||||
let tmp = tmp_env();
|
||||
let store = Store::new(tmp.env()).unwrap();
|
||||
|
||||
let task_1 = Task {
|
||||
id: 1,
|
||||
content: TaskContent::IndexDeletion {
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: vec![],
|
||||
};
|
||||
|
||||
let task_2 = Task {
|
||||
id: 0,
|
||||
content: TaskContent::IndexDeletion {
|
||||
index_uid: IndexUid::new_unchecked("test1"),
|
||||
},
|
||||
events: vec![],
|
||||
};
|
||||
|
||||
let mut txn = store.wtxn().unwrap();
|
||||
store.put(&mut txn, &task_1).unwrap();
|
||||
store.put(&mut txn, &task_2).unwrap();
|
||||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index("test".into());
|
||||
|
||||
let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap();
|
||||
|
||||
txn.abort().unwrap();
|
||||
assert_eq!(tasks.len(), 1);
|
||||
assert_eq!(tasks.first().as_ref().unwrap().index_uid().unwrap(), "test");
|
||||
|
||||
// same thing but invert the ids
|
||||
let task_1 = Task {
|
||||
id: 0,
|
||||
content: TaskContent::IndexDeletion {
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: vec![],
|
||||
};
|
||||
let task_2 = Task {
|
||||
id: 1,
|
||||
content: TaskContent::IndexDeletion {
|
||||
index_uid: IndexUid::new_unchecked("test1"),
|
||||
},
|
||||
events: vec![],
|
||||
};
|
||||
|
||||
let mut txn = store.wtxn().unwrap();
|
||||
store.put(&mut txn, &task_1).unwrap();
|
||||
store.put(&mut txn, &task_2).unwrap();
|
||||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index("test".into());
|
||||
|
||||
let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap();
|
||||
|
||||
assert_eq!(tasks.len(), 1);
|
||||
assert_eq!(tasks.first().as_ref().unwrap().index_uid().unwrap(), "test");
|
||||
}
|
||||
}
|
@@ -1,93 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use time::OffsetDateTime;
|
||||
use tokio::sync::{watch, RwLock};
|
||||
|
||||
use super::batch::Batch;
|
||||
use super::error::Result;
|
||||
use super::{BatchHandler, Scheduler};
|
||||
use crate::tasks::task::TaskEvent;
|
||||
|
||||
/// The update loop sequentially performs batches of updates by asking the scheduler for a batch,
|
||||
/// and handing it to the `TaskPerformer`.
|
||||
pub struct UpdateLoop {
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
performers: Vec<Arc<dyn BatchHandler + Send + Sync + 'static>>,
|
||||
|
||||
notifier: Option<watch::Receiver<()>>,
|
||||
}
|
||||
|
||||
impl UpdateLoop {
|
||||
pub fn new(
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
performers: Vec<Arc<dyn BatchHandler + Send + Sync + 'static>>,
|
||||
notifier: watch::Receiver<()>,
|
||||
) -> Self {
|
||||
Self {
|
||||
scheduler,
|
||||
performers,
|
||||
notifier: Some(notifier),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
let mut notifier = self.notifier.take().unwrap();
|
||||
|
||||
loop {
|
||||
if notifier.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
|
||||
if let Err(e) = self.process_next_batch().await {
|
||||
log::error!("an error occurred while processing an update batch: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_next_batch(&self) -> Result<()> {
|
||||
let mut batch = { self.scheduler.write().await.prepare().await? };
|
||||
let performer = self
|
||||
.performers
|
||||
.iter()
|
||||
.find(|p| p.accept(&batch))
|
||||
.expect("No performer found for batch")
|
||||
.clone();
|
||||
|
||||
batch
|
||||
.content
|
||||
.push_event(TaskEvent::Processing(OffsetDateTime::now_utc()));
|
||||
|
||||
batch.content = {
|
||||
self.scheduler
|
||||
.read()
|
||||
.await
|
||||
.update_tasks(batch.content)
|
||||
.await?
|
||||
};
|
||||
|
||||
let batch = performer.process_batch(batch).await;
|
||||
|
||||
self.handle_batch_result(batch, performer).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handles the result from a processed batch.
|
||||
///
|
||||
/// When a task is processed, the result of the process is pushed to its event list. The
|
||||
/// `handle_batch_result` make sure that the new state is saved to the store.
|
||||
/// The tasks are then removed from the processing queue.
|
||||
async fn handle_batch_result(
|
||||
&self,
|
||||
mut batch: Batch,
|
||||
performer: Arc<dyn BatchHandler + Sync + Send + 'static>,
|
||||
) -> Result<()> {
|
||||
let mut scheduler = self.scheduler.write().await;
|
||||
let content = scheduler.update_tasks(batch.content).await?;
|
||||
scheduler.finish();
|
||||
drop(scheduler);
|
||||
batch.content = content;
|
||||
performer.finish(&batch).await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
@@ -1,258 +0,0 @@
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{self, BufReader, BufWriter, Write};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use milli::documents::DocumentsBatchReader;
|
||||
use serde_json::Map;
|
||||
use tempfile::{NamedTempFile, PersistError};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub use store::UpdateFileStore;
|
||||
#[cfg(test)]
|
||||
pub use test::MockUpdateFileStore as UpdateFileStore;
|
||||
|
||||
const UPDATE_FILES_PATH: &str = "updates/updates_files";
|
||||
|
||||
use crate::document_formats::read_ndjson;
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
file: NamedTempFile,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("Error while persisting update to disk: {0}")]
|
||||
pub struct UpdateFileStoreError(Box<dyn std::error::Error + Sync + Send + 'static>);
|
||||
|
||||
pub type Result<T> = std::result::Result<T, UpdateFileStoreError>;
|
||||
|
||||
macro_rules! into_update_store_error {
|
||||
($($other:path),*) => {
|
||||
$(
|
||||
impl From<$other> for UpdateFileStoreError {
|
||||
fn from(other: $other) -> Self {
|
||||
Self(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
into_update_store_error!(
|
||||
PersistError,
|
||||
io::Error,
|
||||
serde_json::Error,
|
||||
milli::documents::Error,
|
||||
milli::documents::DocumentsBatchCursorError
|
||||
);
|
||||
|
||||
impl UpdateFile {
|
||||
pub fn persist(self) -> Result<()> {
|
||||
self.file.persist(&self.path)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for UpdateFile {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for UpdateFile {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
mod store {
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UpdateFileStore {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl UpdateFileStore {
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH);
|
||||
let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH);
|
||||
|
||||
// No update files to load
|
||||
if !src_update_files_path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
create_dir_all(&dst_update_files_path)?;
|
||||
|
||||
let entries = std::fs::read_dir(src_update_files_path)?;
|
||||
|
||||
for entry in entries {
|
||||
let entry = entry?;
|
||||
let update_file = BufReader::new(File::open(entry.path())?);
|
||||
let file_uuid = entry.file_name();
|
||||
let file_uuid = file_uuid
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow::anyhow!("invalid update file name"))?;
|
||||
let dst_path = dst_update_files_path.join(file_uuid);
|
||||
let dst_file = BufWriter::new(File::create(dst_path)?);
|
||||
read_ndjson(update_file, dst_file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&path)?;
|
||||
Ok(Self { path })
|
||||
}
|
||||
|
||||
/// Creates a new temporary update file.
|
||||
/// A call to `persist` is needed to persist the file in the database.
|
||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||
let file = NamedTempFile::new_in(&self.path)?;
|
||||
let uuid = Uuid::new_v4();
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let update_file = UpdateFile { file, path };
|
||||
|
||||
Ok((uuid, update_file))
|
||||
}
|
||||
|
||||
/// Returns the file corresponding to the requested uuid.
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let file = File::open(path)?;
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
/// Copies the content of the update file pointed to by `uuid` to the `dst` directory.
|
||||
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let src = self.path.join(uuid.to_string());
|
||||
let mut dst = dst.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
dst.push(uuid.to_string());
|
||||
std::fs::copy(src, dst)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Peforms a dump of the given update file uuid into the provided dump path.
|
||||
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
|
||||
let uuid_string = uuid.to_string();
|
||||
let update_file_path = self.path.join(&uuid_string);
|
||||
let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
dst.push(&uuid_string);
|
||||
|
||||
let update_file = File::open(update_file_path)?;
|
||||
let mut dst_file = NamedTempFile::new_in(&dump_path)?;
|
||||
let (mut document_cursor, index) =
|
||||
DocumentsBatchReader::from_reader(update_file)?.into_cursor_and_fields_index();
|
||||
|
||||
let mut document_buffer = Map::new();
|
||||
// TODO: we need to find a way to do this more efficiently. (create a custom serializer
|
||||
// for jsonl for example...)
|
||||
while let Some(document) = document_cursor.next_document()? {
|
||||
for (field_id, content) in document.iter() {
|
||||
if let Some(field_name) = index.name(field_id) {
|
||||
let content = serde_json::from_slice(content)?;
|
||||
document_buffer.insert(field_name.to_string(), content);
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_writer(&mut dst_file, &document_buffer)?;
|
||||
dst_file.write_all(b"\n")?;
|
||||
document_buffer.clear();
|
||||
}
|
||||
|
||||
dst_file.persist(dst)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||
Ok(self.get_update(uuid)?.metadata()?.len())
|
||||
}
|
||||
|
||||
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
tokio::fs::remove_file(path).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use nelson::Mocker;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum MockUpdateFileStore {
|
||||
Real(store::UpdateFileStore),
|
||||
Mock(Arc<Mocker>),
|
||||
}
|
||||
|
||||
impl MockUpdateFileStore {
|
||||
pub fn mock(mocker: Mocker) -> Self {
|
||||
Self::Mock(Arc::new(mocker))
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
store::UpdateFileStore::load_dump(src, dst)
|
||||
}
|
||||
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
store::UpdateFileStore::new(path).map(Self::Real)
|
||||
}
|
||||
|
||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.new_update(),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.get_update(uuid),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.snapshot(uuid, dst),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.dump(uuid, dump_path),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.get_size(uuid),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.delete(uuid).await,
|
||||
MockUpdateFileStore::Mock(mocker) => unsafe { mocker.get("delete").call(uuid) },
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user