create workspace with meilisearch-error

This commit is contained in:
mpostma
2021-02-28 16:41:47 +01:00
parent 79708aeb67
commit a9a9ed6318
96 changed files with 12737 additions and 265 deletions

View File

@@ -0,0 +1,137 @@
use std::hash::{Hash, Hasher};
use std::{error, thread};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use log::error;
use serde::Serialize;
use serde_qs as qs;
use siphasher::sip::SipHasher;
use walkdir::WalkDir;
use crate::Data;
use crate::Opt;
const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47";
#[derive(Debug, Serialize)]
struct EventProperties {
database_size: u64,
last_update_timestamp: Option<i64>, //timestamp
number_of_documents: Vec<u64>,
}
impl EventProperties {
fn from(data: Data) -> Result<EventProperties, Box<dyn error::Error>> {
let mut index_list = Vec::new();
let reader = data.db.main_read_txn()?;
for index_uid in data.db.indexes_uids() {
if let Some(index) = data.db.open_index(&index_uid) {
let number_of_documents = index.main.number_of_documents(&reader)?;
index_list.push(number_of_documents);
}
}
let database_size = WalkDir::new(&data.db_path)
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len());
let last_update_timestamp = data.db.last_update(&reader)?.map(|u| u.timestamp());
Ok(EventProperties {
database_size,
last_update_timestamp,
number_of_documents: index_list,
})
}
}
#[derive(Debug, Serialize)]
struct UserProperties<'a> {
env: &'a str,
start_since_days: u64,
user_email: Option<String>,
server_provider: Option<String>,
}
#[derive(Debug, Serialize)]
struct Event<'a> {
user_id: &'a str,
event_type: &'a str,
device_id: &'a str,
time: u64,
app_version: &'a str,
user_properties: UserProperties<'a>,
event_properties: Option<EventProperties>,
}
#[derive(Debug, Serialize)]
struct AmplitudeRequest<'a> {
api_key: &'a str,
event: &'a str,
}
pub fn analytics_sender(data: Data, opt: Opt) {
let username = whoami::username();
let hostname = whoami::hostname();
let platform = whoami::platform();
let uid = username + &hostname + &platform.to_string();
let mut hasher = SipHasher::new();
uid.hash(&mut hasher);
let hash = hasher.finish();
let uid = format!("{:X}", hash);
let platform = platform.to_string();
let first_start = Instant::now();
loop {
let n = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
let user_id = &uid;
let device_id = &platform;
let time = n.as_secs();
let event_type = "runtime_tick";
let elapsed_since_start = first_start.elapsed().as_secs() / 86_400; // One day
let event_properties = EventProperties::from(data.clone()).ok();
let app_version = env!("CARGO_PKG_VERSION").to_string();
let app_version = app_version.as_str();
let user_email = std::env::var("MEILI_USER_EMAIL").ok();
let server_provider = std::env::var("MEILI_SERVER_PROVIDER").ok();
let user_properties = UserProperties {
env: &opt.env,
start_since_days: elapsed_since_start,
user_email,
server_provider,
};
let event = Event {
user_id,
event_type,
device_id,
time,
app_version,
user_properties,
event_properties
};
let event = serde_json::to_string(&event).unwrap();
let request = AmplitudeRequest {
api_key: AMPLITUDE_API_KEY,
event: &event,
};
let body = qs::to_string(&request).unwrap();
let response = ureq::post("https://api.amplitude.com/httpapi").send_string(&body);
if !response.ok() {
let body = response.into_string().unwrap();
error!("Unsuccessful call to Amplitude: {}", body);
}
thread::sleep(Duration::from_secs(3600)) // one hour
}
}

View File

@@ -0,0 +1,147 @@
mod search;
mod updates;
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
use std::fs::create_dir_all;
use std::ops::Deref;
use std::sync::Arc;
use sha2::Digest;
use crate::index_controller::{IndexController, LocalIndexController, IndexMetadata, Settings, IndexSettings};
use crate::option::Opt;
#[derive(Clone)]
pub struct Data {
inner: Arc<DataInner>,
}
impl Deref for Data {
type Target = DataInner;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
#[derive(Clone)]
pub struct DataInner {
pub index_controller: Arc<LocalIndexController>,
pub api_keys: ApiKeys,
options: Opt,
}
#[derive(Clone)]
pub struct ApiKeys {
pub public: Option<String>,
pub private: Option<String>,
pub master: Option<String>,
}
impl ApiKeys {
pub fn generate_missing_api_keys(&mut self) {
if let Some(master_key) = &self.master {
if self.private.is_none() {
let key = format!("{}-private", master_key);
let sha = sha2::Sha256::digest(key.as_bytes());
self.private = Some(format!("{:x}", sha));
}
if self.public.is_none() {
let key = format!("{}-public", master_key);
let sha = sha2::Sha256::digest(key.as_bytes());
self.public = Some(format!("{:x}", sha));
}
}
}
}
impl Data {
pub fn new(options: Opt) -> anyhow::Result<Data> {
let path = options.db_path.clone();
let indexer_opts = options.indexer_options.clone();
create_dir_all(&path)?;
let index_controller = LocalIndexController::new(
&path,
indexer_opts,
options.max_mdb_size.get_bytes(),
options.max_udb_size.get_bytes(),
)?;
let index_controller = Arc::new(index_controller);
let mut api_keys = ApiKeys {
master: options.clone().master_key,
private: None,
public: None,
};
api_keys.generate_missing_api_keys();
let inner = DataInner { index_controller, options, api_keys };
let inner = Arc::new(inner);
Ok(Data { inner })
}
pub fn settings<S: AsRef<str>>(&self, index_uid: S) -> anyhow::Result<Settings> {
let index = self.index_controller
.index(&index_uid)?
.ok_or_else(|| anyhow::anyhow!("Index {} does not exist.", index_uid.as_ref()))?;
let txn = index.read_txn()?;
let displayed_attributes = index
.displayed_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect())
.unwrap_or_else(|| vec!["*".to_string()]);
let searchable_attributes = index
.searchable_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect())
.unwrap_or_else(|| vec!["*".to_string()]);
let faceted_attributes = index
.faceted_fields(&txn)?
.into_iter()
.map(|(k, v)| (k, v.to_string()))
.collect();
Ok(Settings {
displayed_attributes: Some(Some(displayed_attributes)),
searchable_attributes: Some(Some(searchable_attributes)),
faceted_attributes: Some(Some(faceted_attributes)),
criteria: None,
})
}
pub fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> {
self.index_controller.list_indexes()
}
pub fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<IndexMetadata>> {
Ok(self
.list_indexes()?
.into_iter()
.find(|i| i.uid == name.as_ref()))
}
pub fn create_index(&self, name: impl AsRef<str>, primary_key: Option<impl AsRef<str>>) -> anyhow::Result<IndexMetadata> {
let settings = IndexSettings {
name: Some(name.as_ref().to_string()),
primary_key: primary_key.map(|s| s.as_ref().to_string()),
};
let meta = self.index_controller.create_index(settings)?;
Ok(meta)
}
#[inline]
pub fn http_payload_size_limit(&self) -> usize {
self.options.http_payload_size_limit.get_bytes() as usize
}
#[inline]
pub fn api_keys(&self) -> &ApiKeys {
&self.api_keys
}
}

View File

@@ -0,0 +1,353 @@
use std::collections::{HashSet, BTreeMap};
use std::mem;
use std::time::Instant;
use anyhow::{bail, Context};
use either::Either;
use heed::RoTxn;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
use milli::{obkv_to_json, FacetCondition, Index, facet::FacetValue};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use super::Data;
use crate::index_controller::IndexController;
pub const DEFAULT_SEARCH_LIMIT: usize = 20;
const fn default_search_limit() -> usize {
DEFAULT_SEARCH_LIMIT
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
#[allow(dead_code)]
pub struct SearchQuery {
pub q: Option<String>,
pub offset: Option<usize>,
#[serde(default = "default_search_limit")]
pub limit: usize,
pub attributes_to_retrieve: Option<Vec<String>>,
pub attributes_to_crop: Option<Vec<String>>,
pub crop_length: Option<usize>,
pub attributes_to_highlight: Option<HashSet<String>>,
pub filters: Option<String>,
pub matches: Option<bool>,
pub facet_filters: Option<Value>,
pub facet_distributions: Option<Vec<String>>,
}
impl SearchQuery {
pub fn perform(&self, index: impl AsRef<Index>) -> anyhow::Result<SearchResult> {
let index = index.as_ref();
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let mut search = index.search(&rtxn);
if let Some(ref query) = self.q {
search.query(query);
}
search.limit(self.limit);
search.offset(self.offset.unwrap_or_default());
if let Some(ref facets) = self.facet_filters {
if let Some(facets) = parse_facets(facets, index, &rtxn)? {
search.facet_condition(facets);
}
}
let milli::SearchResult {
documents_ids,
found_words,
candidates,
} = search.execute()?;
let mut documents = Vec::new();
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let displayed_fields_ids = index.displayed_fields_ids(&rtxn)?;
let attributes_to_retrieve_ids = match self.attributes_to_retrieve {
Some(ref attrs) if attrs.iter().any(|f| f == "*") => None,
Some(ref attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f))
.collect::<Vec<_>>()
.into(),
None => None,
};
let displayed_fields_ids = match (displayed_fields_ids, attributes_to_retrieve_ids) {
(_, Some(ids)) => ids,
(Some(ids), None) => ids,
(None, None) => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let stop_words = fst::Set::default();
let highlighter = Highlighter::new(&stop_words);
for (_id, obkv) in index.documents(&rtxn, documents_ids)? {
let mut object = obkv_to_json(&displayed_fields_ids, &fields_ids_map, obkv)?;
if let Some(ref attributes_to_highlight) = self.attributes_to_highlight {
highlighter.highlight_record(&mut object, &found_words, attributes_to_highlight);
}
documents.push(object);
}
let nb_hits = candidates.len();
let facet_distributions = match self.facet_distributions {
Some(ref fields) => {
let mut facet_distribution = index.facets_distribution(&rtxn);
if fields.iter().all(|f| f != "*") {
facet_distribution.facets(fields);
}
Some(facet_distribution.candidates(candidates).execute()?)
}
None => None,
};
Ok(SearchResult {
hits: documents,
nb_hits,
query: self.q.clone().unwrap_or_default(),
limit: self.limit,
offset: self.offset.unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(),
facet_distributions,
})
}
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
hits: Vec<Map<String, Value>>,
nb_hits: u64,
query: String,
limit: usize,
offset: usize,
processing_time_ms: u128,
#[serde(skip_serializing_if = "Option::is_none")]
facet_distributions: Option<BTreeMap<String, BTreeMap<FacetValue, u64>>>,
}
struct Highlighter<'a, A> {
analyzer: Analyzer<'a, A>,
}
impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
fn new(stop_words: &'a fst::Set<A>) -> Self {
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words));
Self { analyzer }
}
fn highlight_value(&self, value: Value, words_to_highlight: &HashSet<String>) -> Value {
match value {
Value::Null => Value::Null,
Value::Bool(boolean) => Value::Bool(boolean),
Value::Number(number) => Value::Number(number),
Value::String(old_string) => {
let mut string = String::new();
let analyzed = self.analyzer.analyze(&old_string);
for (word, token) in analyzed.reconstruct() {
if token.is_word() {
let to_highlight = words_to_highlight.contains(token.text());
if to_highlight {
string.push_str("<mark>")
}
string.push_str(word);
if to_highlight {
string.push_str("</mark>")
}
} else {
string.push_str(word);
}
}
Value::String(string)
}
Value::Array(values) => Value::Array(
values
.into_iter()
.map(|v| self.highlight_value(v, words_to_highlight))
.collect(),
),
Value::Object(object) => Value::Object(
object
.into_iter()
.map(|(k, v)| (k, self.highlight_value(v, words_to_highlight)))
.collect(),
),
}
}
fn highlight_record(
&self,
object: &mut Map<String, Value>,
words_to_highlight: &HashSet<String>,
attributes_to_highlight: &HashSet<String>,
) {
// TODO do we need to create a string for element that are not and needs to be highlight?
for (key, value) in object.iter_mut() {
if attributes_to_highlight.contains(key) {
let old_value = mem::take(value);
*value = self.highlight_value(old_value, words_to_highlight);
}
}
}
}
impl Data {
pub fn search<S: AsRef<str>>(
&self,
index: S,
search_query: SearchQuery,
) -> anyhow::Result<SearchResult> {
match self.index_controller.index(&index)? {
Some(index) => Ok(search_query.perform(index)?),
None => bail!("index {:?} doesn't exists", index.as_ref()),
}
}
pub async fn retrieve_documents<S>(
&self,
index: impl AsRef<str> + Send + Sync + 'static,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> anyhow::Result<Vec<Map<String, Value>>>
where
S: AsRef<str> + Send + Sync + 'static,
{
let index_controller = self.index_controller.clone();
let documents: anyhow::Result<_> = tokio::task::spawn_blocking(move || {
let index = index_controller
.index(&index)?
.with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?;
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let attributes_to_retrieve_ids = match attributes_to_retrieve {
Some(attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f.as_ref()))
.collect::<Vec<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let iter = index.documents.range(&txn, &(..))?.skip(offset).take(limit);
let mut documents = Vec::new();
for entry in iter {
let (_id, obkv) = entry?;
let object = obkv_to_json(&attributes_to_retrieve_ids, &fields_ids_map, obkv)?;
documents.push(object);
}
Ok(documents)
})
.await?;
documents
}
pub async fn retrieve_document<S>(
&self,
index: impl AsRef<str> + Sync + Send + 'static,
document_id: impl AsRef<str> + Sync + Send + 'static,
attributes_to_retrieve: Option<Vec<S>>,
) -> anyhow::Result<Map<String, Value>>
where
S: AsRef<str> + Sync + Send + 'static,
{
let index_controller = self.index_controller.clone();
let document: anyhow::Result<_> = tokio::task::spawn_blocking(move || {
let index = index_controller
.index(&index)?
.with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?;
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let attributes_to_retrieve_ids = match attributes_to_retrieve {
Some(attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f.as_ref()))
.collect::<Vec<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let internal_id = index
.external_documents_ids(&txn)?
.get(document_id.as_ref().as_bytes())
.with_context(|| format!("Document with id {} not found", document_id.as_ref()))?;
let document = index
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d);
match document {
Some(document) => Ok(obkv_to_json(
&attributes_to_retrieve_ids,
&fields_ids_map,
document,
)?),
None => bail!("Document with id {} not found", document_id.as_ref()),
}
})
.await?;
document
}
}
fn parse_facets_array(
txn: &RoTxn,
index: &Index,
arr: &Vec<Value>,
) -> anyhow::Result<Option<FacetCondition>> {
let mut ands = Vec::new();
for value in arr {
match value {
Value::String(s) => ands.push(Either::Right(s.clone())),
Value::Array(arr) => {
let mut ors = Vec::new();
for value in arr {
match value {
Value::String(s) => ors.push(s.clone()),
v => bail!("Invalid facet expression, expected String, found: {:?}", v),
}
}
ands.push(Either::Left(ors));
}
v => bail!(
"Invalid facet expression, expected String or [String], found: {:?}",
v
),
}
}
FacetCondition::from_array(txn, index, ands)
}
fn parse_facets(
facets: &Value,
index: &Index,
txn: &RoTxn,
) -> anyhow::Result<Option<FacetCondition>> {
match facets {
// Disabled for now
//Value::String(expr) => Ok(Some(FacetCondition::from_str(txn, index, expr)?)),
Value::Array(arr) => parse_facets_array(txn, index, arr),
v => bail!(
"Invalid facet expression, expected Array, found: {:?}",
v
),
}
}

View File

@@ -0,0 +1,115 @@
use std::ops::Deref;
use async_compression::tokio_02::write::GzipEncoder;
use futures_util::stream::StreamExt;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use tokio::io::AsyncWriteExt;
use crate::index_controller::UpdateStatus;
use crate::index_controller::{IndexController, Settings, IndexSettings, IndexMetadata};
use super::Data;
impl Data {
pub async fn add_documents<B, E>(
&self,
index: impl AsRef<str> + Send + Sync + 'static,
method: IndexDocumentsMethod,
format: UpdateFormat,
mut stream: impl futures::Stream<Item=Result<B, E>> + Unpin,
primary_key: Option<String>,
) -> anyhow::Result<UpdateStatus>
where
B: Deref<Target = [u8]>,
E: std::error::Error + Send + Sync + 'static,
{
let file = tokio::task::spawn_blocking(tempfile::tempfile).await?;
let file = tokio::fs::File::from_std(file?);
let mut encoder = GzipEncoder::new(file);
let mut empty_update = true;
while let Some(result) = stream.next().await {
empty_update = false;
let bytes = &*result?;
encoder.write_all(&bytes[..]).await?;
}
encoder.shutdown().await?;
let mut file = encoder.into_inner();
file.sync_all().await?;
let file = file.into_std().await;
let index_controller = self.index_controller.clone();
let update = tokio::task::spawn_blocking(move ||{
let mmap;
let bytes = if empty_update {
&[][..]
} else {
mmap = unsafe { memmap::Mmap::map(&file)? };
&mmap
};
index_controller.add_documents(index, method, format, &bytes, primary_key)
}).await??;
Ok(update.into())
}
pub async fn update_settings(
&self,
index: impl AsRef<str> + Send + Sync + 'static,
settings: Settings
) -> anyhow::Result<UpdateStatus> {
let index_controller = self.index_controller.clone();
let update = tokio::task::spawn_blocking(move || index_controller.update_settings(index, settings)).await??;
Ok(update.into())
}
pub async fn clear_documents(
&self,
index: impl AsRef<str> + Sync + Send + 'static,
) -> anyhow::Result<UpdateStatus> {
let index_controller = self.index_controller.clone();
let update = tokio::task::spawn_blocking(move || index_controller.clear_documents(index)).await??;
Ok(update.into())
}
pub async fn delete_documents(
&self,
index: impl AsRef<str> + Sync + Send + 'static,
document_ids: Vec<String>,
) -> anyhow::Result<UpdateStatus> {
let index_controller = self.index_controller.clone();
let update = tokio::task::spawn_blocking(move || index_controller.delete_documents(index, document_ids)).await??;
Ok(update.into())
}
pub async fn delete_index(
&self,
index: impl AsRef<str> + Send + Sync + 'static,
) -> anyhow::Result<()> {
let index_controller = self.index_controller.clone();
tokio::task::spawn_blocking(move || { index_controller.delete_index(index) }).await??;
Ok(())
}
#[inline]
pub fn get_update_status(&self, index: impl AsRef<str>, uid: u64) -> anyhow::Result<Option<UpdateStatus>> {
self.index_controller.update_status(index, uid)
}
pub fn get_updates_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus>> {
self.index_controller.all_update_status(index)
}
pub fn update_index(
&self,
name: impl AsRef<str>,
primary_key: Option<impl AsRef<str>>,
new_name: Option<impl AsRef<str>>
) -> anyhow::Result<IndexMetadata> {
let settings = IndexSettings {
name: new_name.map(|s| s.as_ref().to_string()),
primary_key: primary_key.map(|s| s.as_ref().to_string()),
};
self.index_controller.update_index(name, settings)
}
}

View File

@@ -0,0 +1,423 @@
use std::fs::{create_dir_all, File};
use std::io::prelude::*;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use std::thread;
use actix_web::web;
use chrono::offset::Utc;
use indexmap::IndexMap;
use log::{error, info};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tempfile::TempDir;
use crate::Data;
use crate::error::{Error, ResponseError};
use crate::helpers::compression;
use crate::routes::index;
use crate::routes::setting::Settings;
use crate::routes::index::IndexResponse;
// Mutex to share dump progress.
static DUMP_INFO: Lazy<Mutex<Option<DumpInfo>>> = Lazy::new(Mutex::default);
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
enum DumpVersion {
V1,
}
impl DumpVersion {
const CURRENT: Self = Self::V1;
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DumpMetadata {
indexes: Vec<crate::routes::index::IndexResponse>,
db_version: String,
dump_version: DumpVersion,
}
impl DumpMetadata {
/// Create a DumpMetadata with the current dump version of meilisearch.
pub fn new(indexes: Vec<crate::routes::index::IndexResponse>, db_version: String) -> Self {
DumpMetadata {
indexes,
db_version,
dump_version: DumpVersion::CURRENT,
}
}
/// Extract DumpMetadata from `metadata.json` file present at provided `dir_path`
fn from_path(dir_path: &Path) -> Result<Self, Error> {
let path = dir_path.join("metadata.json");
let file = File::open(path)?;
let reader = std::io::BufReader::new(file);
let metadata = serde_json::from_reader(reader)?;
Ok(metadata)
}
/// Write DumpMetadata in `metadata.json` file at provided `dir_path`
fn to_path(&self, dir_path: &Path) -> Result<(), Error> {
let path = dir_path.join("metadata.json");
let file = File::create(path)?;
serde_json::to_writer(file, &self)?;
Ok(())
}
}
/// Extract Settings from `settings.json` file present at provided `dir_path`
fn settings_from_path(dir_path: &Path) -> Result<Settings, Error> {
let path = dir_path.join("settings.json");
let file = File::open(path)?;
let reader = std::io::BufReader::new(file);
let metadata = serde_json::from_reader(reader)?;
Ok(metadata)
}
/// Write Settings in `settings.json` file at provided `dir_path`
fn settings_to_path(settings: &Settings, dir_path: &Path) -> Result<(), Error> {
let path = dir_path.join("settings.json");
let file = File::create(path)?;
serde_json::to_writer(file, settings)?;
Ok(())
}
/// Import settings and documents of a dump with version `DumpVersion::V1` in specified index.
fn import_index_v1(
data: &Data,
dumps_dir: &Path,
index_uid: &str,
document_batch_size: usize,
write_txn: &mut MainWriter,
) -> Result<(), Error> {
// open index
let index = data
.db
.open_index(index_uid)
.ok_or(Error::index_not_found(index_uid))?;
// index dir path in dump dir
let index_path = &dumps_dir.join(index_uid);
// extract `settings.json` file and import content
let settings = settings_from_path(&index_path)?;
let settings = settings.to_update().map_err(|e| Error::dump_failed(format!("importing settings for index {}; {}", index_uid, e)))?;
apply_settings_update(write_txn, &index, settings)?;
// create iterator over documents in `documents.jsonl` to make batch importation
// create iterator over documents in `documents.jsonl` to make batch importation
let documents = {
let file = File::open(&index_path.join("documents.jsonl"))?;
let reader = std::io::BufReader::new(file);
let deserializer = serde_json::Deserializer::from_reader(reader);
deserializer.into_iter::<IndexMap<String, serde_json::Value>>()
};
// batch import document every `document_batch_size`:
// create a Vec to bufferize documents
let mut values = Vec::with_capacity(document_batch_size);
// iterate over documents
for document in documents {
// push document in buffer
values.push(document?);
// if buffer is full, create and apply a batch, and clean buffer
if values.len() == document_batch_size {
let batch = std::mem::replace(&mut values, Vec::with_capacity(document_batch_size));
apply_documents_addition(write_txn, &index, batch)?;
}
}
// apply documents remaining in the buffer
if !values.is_empty() {
apply_documents_addition(write_txn, &index, values)?;
}
// sync index information: stats, updated_at, last_update
if let Err(e) = crate::index_update_callback_txn(index, index_uid, data, write_txn) {
return Err(Error::Internal(e));
}
Ok(())
}
/// Import dump from `dump_path` in database.
pub fn import_dump(
data: &Data,
dump_path: &Path,
document_batch_size: usize,
) -> Result<(), Error> {
info!("Importing dump from {:?}...", dump_path);
// create a temporary directory
let tmp_dir = TempDir::new()?;
let tmp_dir_path = tmp_dir.path();
// extract dump in temporary directory
compression::from_tar_gz(dump_path, tmp_dir_path)?;
// read dump metadata
let metadata = DumpMetadata::from_path(&tmp_dir_path)?;
// choose importation function from DumpVersion of metadata
let import_index = match metadata.dump_version {
DumpVersion::V1 => import_index_v1,
};
// remove indexes which have same `uid` than indexes to import and create empty indexes
let existing_index_uids = data.db.indexes_uids();
for index in metadata.indexes.iter() {
if existing_index_uids.contains(&index.uid) {
data.db.delete_index(index.uid.clone())?;
}
index::create_index_sync(&data.db, index.uid.clone(), index.name.clone(), index.primary_key.clone())?;
}
// import each indexes content
data.db.main_write::<_, _, Error>(|mut writer| {
for index in metadata.indexes {
import_index(&data, tmp_dir_path, &index.uid, document_batch_size, &mut writer)?;
}
Ok(())
})?;
info!("Dump importation from {:?} succeed", dump_path);
Ok(())
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
#[serde(rename_all = "snake_case")]
pub enum DumpStatus {
Done,
InProgress,
Failed,
}
#[derive(Debug, Serialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct DumpInfo {
pub uid: String,
pub status: DumpStatus,
#[serde(skip_serializing_if = "Option::is_none", flatten)]
pub error: Option<serde_json::Value>,
}
impl DumpInfo {
pub fn new(uid: String, status: DumpStatus) -> Self {
Self { uid, status, error: None }
}
pub fn with_error(mut self, error: ResponseError) -> Self {
self.status = DumpStatus::Failed;
self.error = Some(json!(error));
self
}
pub fn dump_already_in_progress(&self) -> bool {
self.status == DumpStatus::InProgress
}
pub fn get_current() -> Option<Self> {
DUMP_INFO.lock().unwrap().clone()
}
pub fn set_current(&self) {
*DUMP_INFO.lock().unwrap() = Some(self.clone());
}
}
/// Generate uid from creation date
fn generate_uid() -> String {
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
}
/// Infer dumps_dir from dump_uid
pub fn compressed_dumps_dir(dumps_dir: &Path, dump_uid: &str) -> PathBuf {
dumps_dir.join(format!("{}.dump", dump_uid))
}
/// Write metadata in dump
fn dump_metadata(data: &web::Data<Data>, dir_path: &Path, indexes: Vec<IndexResponse>) -> Result<(), Error> {
let (db_major, db_minor, db_patch) = data.db.version();
let metadata = DumpMetadata::new(indexes, format!("{}.{}.{}", db_major, db_minor, db_patch));
metadata.to_path(dir_path)
}
/// Export settings of provided index in dump
fn dump_index_settings(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
let settings = crate::routes::setting::get_all_sync(data, reader, index_uid)?;
settings_to_path(&settings, dir_path)
}
/// Export updates of provided index in dump
fn dump_index_updates(data: &web::Data<Data>, reader: &UpdateReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
let updates_path = dir_path.join("updates.jsonl");
let updates = crate::routes::index::get_all_updates_status_sync(data, reader, index_uid)?;
let file = File::create(updates_path)?;
for update in updates {
serde_json::to_writer(&file, &update)?;
writeln!(&file)?;
}
Ok(())
}
/// Export documents of provided index in dump
fn dump_index_documents(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
let documents_path = dir_path.join("documents.jsonl");
let file = File::create(documents_path)?;
let dump_batch_size = data.dump_batch_size;
let mut offset = 0;
loop {
let documents = crate::routes::document::get_all_documents_sync(data, reader, index_uid, offset, dump_batch_size, None)?;
if documents.is_empty() { break; } else { offset += dump_batch_size; }
for document in documents {
serde_json::to_writer(&file, &document)?;
writeln!(&file)?;
}
}
Ok(())
}
/// Write error with a context.
fn fail_dump_process<E: std::error::Error>(dump_info: DumpInfo, context: &str, error: E) {
let error_message = format!("{}; {}", context, error);
error!("Something went wrong during dump process: {}", &error_message);
dump_info.with_error(Error::dump_failed(error_message).into()).set_current();
}
/// Main function of dump.
fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo) {
// open read transaction on Update
let update_reader = match data.db.update_read_txn() {
Ok(r) => r,
Err(e) => {
fail_dump_process(dump_info, "creating RO transaction on updates", e);
return ;
}
};
// open read transaction on Main
let main_reader = match data.db.main_read_txn() {
Ok(r) => r,
Err(e) => {
fail_dump_process(dump_info, "creating RO transaction on main", e);
return ;
}
};
// create a temporary directory
let tmp_dir = match TempDir::new() {
Ok(tmp_dir) => tmp_dir,
Err(e) => {
fail_dump_process(dump_info, "creating temporary directory", e);
return ;
}
};
let tmp_dir_path = tmp_dir.path();
// fetch indexes
let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) {
Ok(indexes) => indexes,
Err(e) => {
fail_dump_process(dump_info, "listing indexes", e);
return ;
}
};
// create metadata
if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) {
fail_dump_process(dump_info, "generating metadata", e);
return ;
}
// export settings, updates and documents for each indexes
for index in indexes {
let index_path = tmp_dir_path.join(&index.uid);
// create index sub-dircetory
if let Err(e) = create_dir_all(&index_path) {
fail_dump_process(dump_info, &format!("creating directory for index {}", &index.uid), e);
return ;
}
// export settings
if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) {
fail_dump_process(dump_info, &format!("generating settings for index {}", &index.uid), e);
return ;
}
// export documents
if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) {
fail_dump_process(dump_info, &format!("generating documents for index {}", &index.uid), e);
return ;
}
// export updates
if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) {
fail_dump_process(dump_info, &format!("generating updates for index {}", &index.uid), e);
return ;
}
}
// compress dump in a file named `{dump_uid}.dump` in `dumps_dir`
if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) {
fail_dump_process(dump_info, "compressing dump", e);
return ;
}
// update dump info to `done`
let resume = DumpInfo::new(
dump_info.uid,
DumpStatus::Done
);
resume.set_current();
}
pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<DumpInfo, Error> {
create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?;
// check if a dump is already in progress
if let Some(resume) = DumpInfo::get_current() {
if resume.dump_already_in_progress() {
return Err(Error::dump_conflict())
}
}
// generate a new dump info
let info = DumpInfo::new(
generate_uid(),
DumpStatus::InProgress
);
info.set_current();
let data = data.clone();
let dumps_dir = dumps_dir.to_path_buf();
let info_cloned = info.clone();
// run dump process in a new thread
thread::spawn(move ||
dump_process(data, dumps_dir, info_cloned)
);
Ok(info)
}

View File

@@ -0,0 +1,297 @@
use std::error;
use std::fmt;
use actix_http::ResponseBuilder;
use actix_web as aweb;
use actix_web::error::{JsonPayloadError, QueryPayloadError};
use actix_web::http::StatusCode;
use serde::ser::{Serialize, Serializer, SerializeStruct};
use meilisearch_error::{ErrorCode, Code};
#[derive(Debug)]
pub struct ResponseError {
inner: Box<dyn ErrorCode>,
}
impl error::Error for ResponseError {}
impl ErrorCode for ResponseError {
fn error_code(&self) -> Code {
self.inner.error_code()
}
}
impl fmt::Display for ResponseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f)
}
}
// TODO: remove this when implementing actual error handling
impl From<anyhow::Error> for ResponseError {
fn from(other: anyhow::Error) -> ResponseError {
ResponseError { inner: Box::new(Error::NotFound(other.to_string())) }
}
}
impl From<Error> for ResponseError {
fn from(error: Error) -> ResponseError {
ResponseError { inner: Box::new(error) }
}
}
impl From<FacetCountError> for ResponseError {
fn from(err: FacetCountError) -> ResponseError {
ResponseError { inner: Box::new(err) }
}
}
impl Serialize for ResponseError {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let struct_name = "ResponseError";
let field_count = 4;
let mut state = serializer.serialize_struct(struct_name, field_count)?;
state.serialize_field("message", &self.to_string())?;
state.serialize_field("errorCode", &self.error_name())?;
state.serialize_field("errorType", &self.error_type())?;
state.serialize_field("errorLink", &self.error_url())?;
state.end()
}
}
impl aweb::error::ResponseError for ResponseError {
fn error_response(&self) -> aweb::HttpResponse {
ResponseBuilder::new(self.status_code()).json(&self)
}
fn status_code(&self) -> StatusCode {
self.http_status()
}
}
#[derive(Debug)]
pub enum Error {
BadParameter(String, String),
BadRequest(String),
CreateIndex(String),
DocumentNotFound(String),
IndexNotFound(String),
IndexAlreadyExists(String),
Internal(String),
InvalidIndexUid,
InvalidToken(String),
MissingAuthorizationHeader,
NotFound(String),
OpenIndex(String),
RetrieveDocument(u32, String),
SearchDocuments(String),
PayloadTooLarge,
UnsupportedMediaType,
DumpAlreadyInProgress,
DumpProcessFailed(String),
}
impl error::Error for Error {}
impl ErrorCode for Error {
fn error_code(&self) -> Code {
use Error::*;
match self {
BadParameter(_, _) => Code::BadParameter,
BadRequest(_) => Code::BadRequest,
CreateIndex(_) => Code::CreateIndex,
DocumentNotFound(_) => Code::DocumentNotFound,
IndexNotFound(_) => Code::IndexNotFound,
IndexAlreadyExists(_) => Code::IndexAlreadyExists,
Internal(_) => Code::Internal,
InvalidIndexUid => Code::InvalidIndexUid,
InvalidToken(_) => Code::InvalidToken,
MissingAuthorizationHeader => Code::MissingAuthorizationHeader,
NotFound(_) => Code::NotFound,
OpenIndex(_) => Code::OpenIndex,
RetrieveDocument(_, _) => Code::RetrieveDocument,
SearchDocuments(_) => Code::SearchDocuments,
PayloadTooLarge => Code::PayloadTooLarge,
UnsupportedMediaType => Code::UnsupportedMediaType,
_ => unreachable!()
//DumpAlreadyInProgress => Code::DumpAlreadyInProgress,
//DumpProcessFailed(_) => Code::DumpProcessFailed,
}
}
}
#[derive(Debug)]
pub enum FacetCountError {
AttributeNotSet(String),
SyntaxError(String),
UnexpectedToken { found: String, expected: &'static [&'static str] },
NoFacetSet,
}
impl error::Error for FacetCountError {}
impl ErrorCode for FacetCountError {
fn error_code(&self) -> Code {
Code::BadRequest
}
}
impl FacetCountError {
pub fn unexpected_token(found: impl ToString, expected: &'static [&'static str]) -> FacetCountError {
let found = found.to_string();
FacetCountError::UnexpectedToken { expected, found }
}
}
impl From<serde_json::error::Error> for FacetCountError {
fn from(other: serde_json::error::Error) -> FacetCountError {
FacetCountError::SyntaxError(other.to_string())
}
}
impl fmt::Display for FacetCountError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use FacetCountError::*;
match self {
AttributeNotSet(attr) => write!(f, "Attribute {} is not set as facet", attr),
SyntaxError(msg) => write!(f, "Syntax error: {}", msg),
UnexpectedToken { expected, found } => write!(f, "Unexpected {} found, expected {:?}", found, expected),
NoFacetSet => write!(f, "Can't perform facet count, as no facet is set"),
}
}
}
impl Error {
pub fn internal(err: impl fmt::Display) -> Error {
Error::Internal(err.to_string())
}
pub fn bad_request(err: impl fmt::Display) -> Error {
Error::BadRequest(err.to_string())
}
pub fn missing_authorization_header() -> Error {
Error::MissingAuthorizationHeader
}
pub fn invalid_token(err: impl fmt::Display) -> Error {
Error::InvalidToken(err.to_string())
}
pub fn not_found(err: impl fmt::Display) -> Error {
Error::NotFound(err.to_string())
}
pub fn index_not_found(err: impl fmt::Display) -> Error {
Error::IndexNotFound(err.to_string())
}
pub fn document_not_found(err: impl fmt::Display) -> Error {
Error::DocumentNotFound(err.to_string())
}
pub fn bad_parameter(param: impl fmt::Display, err: impl fmt::Display) -> Error {
Error::BadParameter(param.to_string(), err.to_string())
}
pub fn open_index(err: impl fmt::Display) -> Error {
Error::OpenIndex(err.to_string())
}
pub fn create_index(err: impl fmt::Display) -> Error {
Error::CreateIndex(err.to_string())
}
pub fn invalid_index_uid() -> Error {
Error::InvalidIndexUid
}
pub fn retrieve_document(doc_id: u32, err: impl fmt::Display) -> Error {
Error::RetrieveDocument(doc_id, err.to_string())
}
pub fn search_documents(err: impl fmt::Display) -> Error {
Error::SearchDocuments(err.to_string())
}
pub fn dump_conflict() -> Error {
Error::DumpAlreadyInProgress
}
pub fn dump_failed(message: String) -> Error {
Error::DumpProcessFailed(message)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::BadParameter(param, err) => write!(f, "Url parameter {} error: {}", param, err),
Self::BadRequest(err) => f.write_str(err),
Self::CreateIndex(err) => write!(f, "Impossible to create index; {}", err),
Self::DocumentNotFound(document_id) => write!(f, "Document with id {} not found", document_id),
Self::IndexNotFound(index_uid) => write!(f, "Index {} not found", index_uid),
Self::IndexAlreadyExists(index_uid) => write!(f, "Index {} already exists", index_uid),
Self::Internal(err) => f.write_str(err),
Self::InvalidIndexUid => f.write_str("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_)."),
Self::InvalidToken(err) => write!(f, "Invalid API key: {}", err),
Self::MissingAuthorizationHeader => f.write_str("You must have an authorization token"),
Self::NotFound(err) => write!(f, "{} not found", err),
Self::OpenIndex(err) => write!(f, "Impossible to open index; {}", err),
Self::RetrieveDocument(id, err) => write!(f, "Impossible to retrieve the document with id: {}; {}", id, err),
Self::SearchDocuments(err) => write!(f, "Impossible to search documents; {}", err),
Self::PayloadTooLarge => f.write_str("Payload too large"),
Self::UnsupportedMediaType => f.write_str("Unsupported media type"),
Self::DumpAlreadyInProgress => f.write_str("Another dump is already in progress"),
Self::DumpProcessFailed(message) => write!(f, "Dump process failed: {}", message),
}
}
}
impl From<std::io::Error> for Error {
fn from(err: std::io::Error) -> Error {
Error::Internal(err.to_string())
}
}
impl From<actix_http::Error> for Error {
fn from(err: actix_http::Error) -> Error {
Error::Internal(err.to_string())
}
}
impl From<serde_json::error::Error> for Error {
fn from(err: serde_json::error::Error) -> Error {
Error::Internal(err.to_string())
}
}
impl From<JsonPayloadError> for Error {
fn from(err: JsonPayloadError) -> Error {
match err {
JsonPayloadError::Deserialize(err) => Error::BadRequest(format!("Invalid JSON: {}", err)),
JsonPayloadError::Overflow => Error::PayloadTooLarge,
JsonPayloadError::ContentType => Error::UnsupportedMediaType,
JsonPayloadError::Payload(err) => Error::BadRequest(format!("Problem while decoding the request: {}", err)),
}
}
}
impl From<QueryPayloadError> for Error {
fn from(err: QueryPayloadError) -> Error {
match err {
QueryPayloadError::Deserialize(err) => Error::BadRequest(format!("Invalid query parameters: {}", err)),
}
}
}
pub fn payload_error_handler<E: Into<Error>>(err: E) -> ResponseError {
let error: Error = err.into();
error.into()
}

View File

@@ -0,0 +1,103 @@
use std::cell::RefCell;
use std::pin::Pin;
use std::rc::Rc;
use std::task::{Context, Poll};
use actix_service::{Service, Transform};
use actix_web::{dev::ServiceRequest, dev::ServiceResponse, web};
use futures::future::{err, ok, Future, Ready};
use crate::error::{Error, ResponseError};
use crate::Data;
#[derive(Clone)]
pub enum Authentication {
Public,
Private,
Admin,
}
impl<S: 'static, B> Transform<S> for Authentication
where
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
S::Future: 'static,
B: 'static,
{
type Request = ServiceRequest;
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type InitError = ();
type Transform = LoggingMiddleware<S>;
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ok(LoggingMiddleware {
acl: self.clone(),
service: Rc::new(RefCell::new(service)),
})
}
}
pub struct LoggingMiddleware<S> {
acl: Authentication,
service: Rc<RefCell<S>>,
}
#[allow(clippy::type_complexity)]
impl<S, B> Service for LoggingMiddleware<S>
where
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
S::Future: 'static,
B: 'static,
{
type Request = ServiceRequest;
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>>>>;
fn poll_ready(&mut self, cx: &mut Context) -> Poll<Result<(), Self::Error>> {
self.service.poll_ready(cx)
}
fn call(&mut self, req: ServiceRequest) -> Self::Future {
let mut svc = self.service.clone();
// This unwrap is left because this error should never appear. If that's the case, then
// it means that actix-web has an issue or someone changes the type `Data`.
let data = req.app_data::<web::Data<Data>>().unwrap();
if data.api_keys().master.is_none() {
return Box::pin(svc.call(req));
}
let auth_header = match req.headers().get("X-Meili-API-Key") {
Some(auth) => match auth.to_str() {
Ok(auth) => auth,
Err(_) => return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into())),
},
None => {
return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into()));
}
};
let authenticated = match self.acl {
Authentication::Admin => data.api_keys().master.as_deref() == Some(auth_header),
Authentication::Private => {
data.api_keys().master.as_deref() == Some(auth_header)
|| data.api_keys().private.as_deref() == Some(auth_header)
}
Authentication::Public => {
data.api_keys().master.as_deref() == Some(auth_header)
|| data.api_keys().private.as_deref() == Some(auth_header)
|| data.api_keys().public.as_deref() == Some(auth_header)
}
};
if authenticated {
Box::pin(svc.call(req))
} else {
Box::pin(err(
ResponseError::from(Error::InvalidToken(auth_header.to_string())).into()
))
}
}
}

View File

@@ -0,0 +1,27 @@
use flate2::Compression;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use std::fs::{create_dir_all, File};
use std::path::Path;
use tar::{Builder, Archive};
use crate::error::Error;
pub fn to_tar_gz(src: &Path, dest: &Path) -> Result<(), Error> {
let f = File::create(dest)?;
let gz_encoder = GzEncoder::new(f, Compression::default());
let mut tar_encoder = Builder::new(gz_encoder);
tar_encoder.append_dir_all(".", src)?;
let gz_encoder = tar_encoder.into_inner()?;
gz_encoder.finish()?;
Ok(())
}
pub fn from_tar_gz(src: &Path, dest: &Path) -> Result<(), Error> {
let f = File::open(src)?;
let gz = GzDecoder::new(f);
let mut ar = Archive::new(gz);
create_dir_all(dest)?;
ar.unpack(dest)?;
Ok(())
}

View File

@@ -0,0 +1,6 @@
pub mod authentication;
pub mod normalize_path;
pub mod compression;
pub use authentication::Authentication;
pub use normalize_path::NormalizePath;

View File

@@ -0,0 +1,86 @@
/// From https://docs.rs/actix-web/3.0.0-alpha.2/src/actix_web/middleware/normalize.rs.html#34
use actix_http::Error;
use actix_service::{Service, Transform};
use actix_web::{
dev::ServiceRequest,
dev::ServiceResponse,
http::uri::{PathAndQuery, Uri},
};
use futures::future::{ok, Ready};
use regex::Regex;
use std::task::{Context, Poll};
pub struct NormalizePath;
impl<S, B> Transform<S> for NormalizePath
where
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
S::Future: 'static,
{
type Request = ServiceRequest;
type Response = ServiceResponse<B>;
type Error = Error;
type InitError = ();
type Transform = NormalizePathNormalization<S>;
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ok(NormalizePathNormalization {
service,
merge_slash: Regex::new("//+").unwrap(),
})
}
}
pub struct NormalizePathNormalization<S> {
service: S,
merge_slash: Regex,
}
impl<S, B> Service for NormalizePathNormalization<S>
where
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
S::Future: 'static,
{
type Request = ServiceRequest;
type Response = ServiceResponse<B>;
type Error = Error;
type Future = S::Future;
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
self.service.poll_ready(cx)
}
fn call(&mut self, mut req: ServiceRequest) -> Self::Future {
let head = req.head_mut();
// always add trailing slash, might be an extra one
let path = head.uri.path().to_string() + "/";
if self.merge_slash.find(&path).is_some() {
// normalize multiple /'s to one /
let path = self.merge_slash.replace_all(&path, "/");
let path = if path.len() > 1 {
path.trim_end_matches('/')
} else {
&path
};
let mut parts = head.uri.clone().into_parts();
let pq = parts.path_and_query.as_ref().unwrap();
let path = if let Some(q) = pq.query() {
bytes::Bytes::from(format!("{}?{}", path, q))
} else {
bytes::Bytes::copy_from_slice(path.as_bytes())
};
parts.path_and_query = Some(PathAndQuery::from_maybe_shared(path).unwrap());
let uri = Uri::from_parts(parts).unwrap();
req.match_info_mut().get_mut().update(&uri);
req.head_mut().uri = uri;
}
self.service.call(req)
}
}

View File

@@ -0,0 +1,260 @@
use std::collections::HashMap;
use std::io;
use std::fs::File;
use anyhow::Result;
use flate2::read::GzDecoder;
use grenad::CompressionType;
use log::info;
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use milli::Index;
use rayon::ThreadPool;
use crate::index_controller::updates::{Failed, Processed, Processing};
use crate::index_controller::{Facets, Settings, UpdateMeta, UpdateResult};
use crate::option::IndexerOpts;
pub struct UpdateHandler {
max_nb_chunks: Option<usize>,
chunk_compression_level: Option<u32>,
thread_pool: ThreadPool,
log_frequency: usize,
max_memory: usize,
linked_hash_map_size: usize,
chunk_compression_type: CompressionType,
chunk_fusing_shrink_size: u64,
}
impl UpdateHandler {
pub fn new(
opt: &IndexerOpts,
) -> anyhow::Result<Self> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(opt.indexing_jobs.unwrap_or(0))
.build()?;
Ok(Self {
max_nb_chunks: opt.max_nb_chunks,
chunk_compression_level: opt.chunk_compression_level,
thread_pool,
log_frequency: opt.log_every_n,
max_memory: opt.max_memory.get_bytes() as usize,
linked_hash_map_size: opt.linked_hash_map_size,
chunk_compression_type: opt.chunk_compression_type,
chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(),
})
}
fn update_buidler(&self, update_id: u64) -> UpdateBuilder {
// We prepare the update by using the update builder.
let mut update_builder = UpdateBuilder::new(update_id);
if let Some(max_nb_chunks) = self.max_nb_chunks {
update_builder.max_nb_chunks(max_nb_chunks);
}
if let Some(chunk_compression_level) = self.chunk_compression_level {
update_builder.chunk_compression_level(chunk_compression_level);
}
update_builder.thread_pool(&self.thread_pool);
update_builder.log_every_n(self.log_frequency);
update_builder.max_memory(self.max_memory);
update_builder.linked_hash_map_size(self.linked_hash_map_size);
update_builder.chunk_compression_type(self.chunk_compression_type);
update_builder.chunk_fusing_shrink_size(self.chunk_fusing_shrink_size);
update_builder
}
fn update_documents(
&self,
format: UpdateFormat,
method: IndexDocumentsMethod,
content: File,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
index: &Index,
) -> anyhow::Result<UpdateResult> {
info!("performing document addition");
// We must use the write transaction of the update here.
let mut wtxn = index.write_txn()?;
// Set the primary key if not set already, ignore if already set.
match (index.primary_key(&wtxn)?, primary_key) {
(None, Some(ref primary_key)) => {
index.put_primary_key(&mut wtxn, primary_key)?;
}
_ => (),
}
let mut builder = update_builder.index_documents(&mut wtxn, index);
builder.update_format(format);
builder.index_documents_method(method);
let gzipped = false;
let reader = if gzipped {
Box::new(GzDecoder::new(content))
} else {
Box::new(content) as Box<dyn io::Read>
};
let result = builder.execute(reader, |indexing_step, update_id| {
info!("update {}: {:?}", update_id, indexing_step)
});
info!("document addition done: {:?}", result);
match result {
Ok(addition_result) => wtxn
.commit()
.and(Ok(UpdateResult::DocumentsAddition(addition_result)))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn clear_documents(&self, update_builder: UpdateBuilder, index: &Index) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = index.write_txn()?;
let builder = update_builder.clear_documents(&mut wtxn, index);
match builder.execute() {
Ok(_count) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn update_settings(
&self,
settings: &Settings,
update_builder: UpdateBuilder,
index: &Index,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = index.write_txn()?;
let mut builder = update_builder.settings(&mut wtxn, index);
// We transpose the settings JSON struct into a real setting update.
if let Some(ref names) = settings.searchable_attributes {
match names {
Some(names) => builder.set_searchable_fields(names.clone()),
None => builder.reset_searchable_fields(),
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref names) = settings.displayed_attributes {
match names {
Some(names) => builder.set_displayed_fields(names.clone()),
None => builder.reset_displayed_fields(),
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref facet_types) = settings.faceted_attributes {
let facet_types = facet_types.clone().unwrap_or_else(|| HashMap::new());
builder.set_faceted_fields(facet_types);
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref criteria) = settings.criteria {
match criteria {
Some(criteria) => builder.set_criteria(criteria.clone()),
None => builder.reset_criteria(),
}
}
let result = builder
.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
match result {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn update_facets(
&self,
levels: &Facets,
update_builder: UpdateBuilder,
index: &Index,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = index.write_txn()?;
let mut builder = update_builder.facets(&mut wtxn, index);
if let Some(value) = levels.level_group_size {
builder.level_group_size(value);
}
if let Some(value) = levels.min_level_size {
builder.min_level_size(value);
}
match builder.execute() {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn delete_documents(
&self,
document_ids: File,
update_builder: UpdateBuilder,
index: &Index,
) -> anyhow::Result<UpdateResult> {
let ids: Vec<String> = serde_json::from_reader(document_ids)?;
let mut txn = index.write_txn()?;
let mut builder = update_builder.delete_documents(&mut txn, index)?;
// We ignore unexisting document ids
ids.iter().for_each(|id| { builder.delete_external_id(id); });
match builder.execute() {
Ok(deleted) => txn
.commit()
.and(Ok(UpdateResult::DocumentDeletion { deleted }))
.map_err(Into::into),
Err(e) => Err(e.into())
}
}
pub fn handle_update(
&self,
meta: Processing<UpdateMeta>,
content: File,
index: &Index,
) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> {
use UpdateMeta::*;
let update_id = meta.id();
let update_builder = self.update_buidler(update_id);
let result = match meta.meta() {
DocumentsAddition {
method,
format,
primary_key,
} => self.update_documents(
*format,
*method,
content,
update_builder,
primary_key.as_deref(),
index,
),
ClearDocuments => self.clear_documents(update_builder, index),
DeleteDocuments => self.delete_documents(content, update_builder, index),
Settings(settings) => self.update_settings(settings, update_builder, index),
Facets(levels) => self.update_facets(levels, update_builder, index),
};
match result {
Ok(result) => Ok(meta.process(result)),
Err(e) => Err(meta.fail(e.to_string())),
}
}
}

View File

@@ -0,0 +1,423 @@
use std::path::Path;
use std::sync::{Arc, RwLock};
use std::io::{Cursor, SeekFrom, Seek};
use crossbeam_channel::Sender;
use heed::types::{OwnedType, DecodeIgnore, SerdeJson, ByteSlice};
use heed::{EnvOpenOptions, Env, Database};
use serde::{Serialize, Deserialize};
use std::fs::File;
use uuid::Uuid;
use crate::index_controller::updates::*;
type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
#[derive(Clone)]
pub struct UpdateStore<M, N, E> {
env: Env,
pending_meta: Database<OwnedType<BEU64>, SerdeJson<Pending<M>>>,
pending: Database<OwnedType<BEU64>, ByteSlice>,
processed_meta: Database<OwnedType<BEU64>, SerdeJson<Processed<M, N>>>,
failed_meta: Database<OwnedType<BEU64>, SerdeJson<Failed<M, E>>>,
aborted_meta: Database<OwnedType<BEU64>, SerdeJson<Aborted<M>>>,
processing: Arc<RwLock<Option<Processing<M>>>>,
notification_sender: Sender<()>,
}
pub trait HandleUpdate<M, N, E> {
fn handle_update(&mut self, meta: Processing<M>, content: File) -> Result<Processed<M, N>, Failed<M, E>>;
}
impl<M, N, E, F> HandleUpdate<M, N, E> for F
where F: FnMut(Processing<M>, File) -> Result<Processed<M, N>, Failed<M, E>>
{
fn handle_update(&mut self, meta: Processing<M>, content: File) -> Result<Processed<M, N>, Failed<M, E>> {
self(meta, content)
}
}
impl<M, N, E> UpdateStore<M, N, E>
where
M: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync + Clone,
N: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync,
E: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync,
{
pub fn open<P, U>(
mut options: EnvOpenOptions,
path: P,
mut update_handler: U,
) -> heed::Result<Arc<Self>>
where
P: AsRef<Path>,
U: HandleUpdate<M, N, E> + Send + 'static,
{
options.max_dbs(5);
let env = options.open(path)?;
let pending_meta = env.create_database(Some("pending-meta"))?;
let pending = env.create_database(Some("pending"))?;
let processed_meta = env.create_database(Some("processed-meta"))?;
let aborted_meta = env.create_database(Some("aborted-meta"))?;
let failed_meta = env.create_database(Some("failed-meta"))?;
let processing = Arc::new(RwLock::new(None));
let (notification_sender, notification_receiver) = crossbeam_channel::bounded(1);
// Send a first notification to trigger the process.
let _ = notification_sender.send(());
let update_store = Arc::new(UpdateStore {
env,
pending,
pending_meta,
processed_meta,
aborted_meta,
notification_sender,
failed_meta,
processing,
});
// We need a weak reference so we can take ownership on the arc later when we
// want to close the index.
let update_store_weak = Arc::downgrade(&update_store);
std::thread::spawn(move || {
// Block and wait for something to process.
'outer: for _ in notification_receiver {
loop {
match update_store_weak.upgrade() {
Some(update_store) => {
match update_store.process_pending_update(&mut update_handler) {
Ok(Some(_)) => (),
Ok(None) => break,
Err(e) => eprintln!("error while processing update: {}", e),
}
}
// the ownership on the arc has been taken, we need to exit.
None => break 'outer,
}
}
}
});
Ok(update_store)
}
pub fn prepare_for_closing(self) -> heed::EnvClosingEvent {
self.env.prepare_for_closing()
}
/// Returns the new biggest id to use to store the new update.
fn new_update_id(&self, txn: &heed::RoTxn) -> heed::Result<u64> {
let last_pending = self.pending_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_processed = self.processed_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_aborted = self.aborted_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_update_id = [last_pending, last_processed, last_aborted]
.iter()
.copied()
.flatten()
.max();
match last_update_id {
Some(last_id) => Ok(last_id + 1),
None => Ok(0),
}
}
/// Registers the update content in the pending store and the meta
/// into the pending-meta store. Returns the new unique update id.
pub fn register_update(
&self,
meta: M,
content: &[u8],
index_uuid: Uuid,
) -> heed::Result<Pending<M>> {
let mut wtxn = self.env.write_txn()?;
// We ask the update store to give us a new update id, this is safe,
// no other update can have the same id because we use a write txn before
// asking for the id and registering it so other update registering
// will be forced to wait for a new write txn.
let update_id = self.new_update_id(&wtxn)?;
let update_key = BEU64::new(update_id);
let meta = Pending::new(meta, update_id, index_uuid);
self.pending_meta.put(&mut wtxn, &update_key, &meta)?;
self.pending.put(&mut wtxn, &update_key, content)?;
wtxn.commit()?;
if let Err(e) = self.notification_sender.try_send(()) {
assert!(!e.is_disconnected(), "update notification channel is disconnected");
}
Ok(meta)
}
/// Executes the user provided function on the next pending update (the one with the lowest id).
/// This is asynchronous as it let the user process the update with a read-only txn and
/// only writing the result meta to the processed-meta store *after* it has been processed.
fn process_pending_update<U>(&self, handler: &mut U) -> heed::Result<Option<()>>
where
U: HandleUpdate<M, N, E> + Send + 'static,
{
// Create a read transaction to be able to retrieve the pending update in order.
let rtxn = self.env.read_txn()?;
let first_meta = self.pending_meta.first(&rtxn)?;
// If there is a pending update we process and only keep
// a reader while processing it, not a writer.
match first_meta {
Some((first_id, pending)) => {
let first_content = self.pending
.get(&rtxn, &first_id)?
.expect("associated update content");
// we change the state of the update from pending to processing before we pass it
// to the update handler. Processing store is non persistent to be able recover
// from a failure
let processing = pending.processing();
self.processing
.write()
.unwrap()
.replace(processing.clone());
let mut cursor = Cursor::new(first_content);
let mut file = tempfile::tempfile()?;
std::io::copy(&mut cursor, &mut file)?;
file.seek(SeekFrom::Start(0))?;
// Process the pending update using the provided user function.
let result = handler.handle_update(processing, file);
drop(rtxn);
// Once the pending update have been successfully processed
// we must remove the content from the pending and processing stores and
// write the *new* meta to the processed-meta store and commit.
let mut wtxn = self.env.write_txn()?;
self.processing
.write()
.unwrap()
.take();
self.pending_meta.delete(&mut wtxn, &first_id)?;
self.pending.delete(&mut wtxn, &first_id)?;
match result {
Ok(processed) => self.processed_meta.put(&mut wtxn, &first_id, &processed)?,
Err(failed) => self.failed_meta.put(&mut wtxn, &first_id, &failed)?,
}
wtxn.commit()?;
Ok(Some(()))
},
None => Ok(None)
}
}
/// Execute the user defined function with the meta-store iterators, the first
/// iterator is the *processed* meta one, the second the *aborted* meta one
/// and, the last is the *pending* meta one.
pub fn iter_metas<F, T>(&self, mut f: F) -> heed::Result<T>
where
F: for<'a> FnMut(
Option<Processing<M>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Processed<M, N>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Aborted<M>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Pending<M>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Failed<M, E>>>,
) -> heed::Result<T>,
{
let rtxn = self.env.read_txn()?;
// We get the pending, processed and aborted meta iterators.
let processed_iter = self.processed_meta.iter(&rtxn)?;
let aborted_iter = self.aborted_meta.iter(&rtxn)?;
let pending_iter = self.pending_meta.iter(&rtxn)?;
let processing = self.processing.read().unwrap().clone();
let failed_iter = self.failed_meta.iter(&rtxn)?;
// We execute the user defined function with both iterators.
(f)(processing, processed_iter, aborted_iter, pending_iter, failed_iter)
}
/// Returns the update associated meta or `None` if the update doesn't exist.
pub fn meta(&self, update_id: u64) -> heed::Result<Option<UpdateStatus<M, N, E>>> {
let rtxn = self.env.read_txn()?;
let key = BEU64::new(update_id);
if let Some(ref meta) = *self.processing.read().unwrap() {
if meta.id() == update_id {
return Ok(Some(UpdateStatus::Processing(meta.clone())));
}
}
if let Some(meta) = self.pending_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Pending(meta)));
}
if let Some(meta) = self.processed_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Processed(meta)));
}
if let Some(meta) = self.aborted_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Aborted(meta)));
}
if let Some(meta) = self.failed_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Failed(meta)));
}
Ok(None)
}
/// Aborts an update, an aborted update content is deleted and
/// the meta of it is moved into the aborted updates database.
///
/// Trying to abort an update that is currently being processed, an update
/// that as already been processed or which doesn't actually exist, will
/// return `None`.
#[allow(dead_code)]
pub fn abort_update(&self, update_id: u64) -> heed::Result<Option<Aborted<M>>> {
let mut wtxn = self.env.write_txn()?;
let key = BEU64::new(update_id);
// We cannot abort an update that is currently being processed.
if self.pending_meta.first(&wtxn)?.map(|(key, _)| key.get()) == Some(update_id) {
return Ok(None);
}
let pending = match self.pending_meta.get(&wtxn, &key)? {
Some(meta) => meta,
None => return Ok(None),
};
let aborted = pending.abort();
self.aborted_meta.put(&mut wtxn, &key, &aborted)?;
self.pending_meta.delete(&mut wtxn, &key)?;
self.pending.delete(&mut wtxn, &key)?;
wtxn.commit()?;
Ok(Some(aborted))
}
/// Aborts all the pending updates, and not the one being currently processed.
/// Returns the update metas and ids that were successfully aborted.
#[allow(dead_code)]
pub fn abort_pendings(&self) -> heed::Result<Vec<(u64, Aborted<M>)>> {
let mut wtxn = self.env.write_txn()?;
let mut aborted_updates = Vec::new();
// We skip the first pending update as it is currently being processed.
for result in self.pending_meta.iter(&wtxn)?.skip(1) {
let (key, pending) = result?;
let id = key.get();
aborted_updates.push((id, pending.abort()));
}
for (id, aborted) in &aborted_updates {
let key = BEU64::new(*id);
self.aborted_meta.put(&mut wtxn, &key, &aborted)?;
self.pending_meta.delete(&mut wtxn, &key)?;
self.pending.delete(&mut wtxn, &key)?;
}
wtxn.commit()?;
Ok(aborted_updates)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
use std::time::{Duration, Instant};
impl<M, N, F, E> HandleUpdate<M, N, E> for F
where F: FnMut(Processing<M>, &[u8]) -> Result<Processed<M, N>, Failed<M, E>> + Send + 'static {
fn handle_update(&mut self, meta: Processing<M>, content: &[u8]) -> Result<Processed<M, N>, Failed<M, E>> {
self(meta, content)
}
}
#[test]
fn simple() {
let dir = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let update_store = UpdateStore::open(options, dir, |meta: Processing<String>, _content: &_| -> Result<_, Failed<_, ()>> {
let new_meta = meta.meta().to_string() + " processed";
let processed = meta.process(new_meta);
Ok(processed)
}).unwrap();
let meta = String::from("kiki");
let update = update_store.register_update(meta, &[]).unwrap();
thread::sleep(Duration::from_millis(100));
let meta = update_store.meta(update.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "kiki processed");
} else {
panic!()
}
}
#[test]
#[ignore]
fn long_running_update() {
let dir = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let update_store = UpdateStore::open(options, dir, |meta: Processing<String>, _content:&_| -> Result<_, Failed<_, ()>> {
thread::sleep(Duration::from_millis(400));
let new_meta = meta.meta().to_string() + "processed";
let processed = meta.process(new_meta);
Ok(processed)
}).unwrap();
let before_register = Instant::now();
let meta = String::from("kiki");
let update_kiki = update_store.register_update(meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("coco");
let update_coco = update_store.register_update(meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("cucu");
let update_cucu = update_store.register_update(meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
thread::sleep(Duration::from_millis(400 * 3 + 100));
let meta = update_store.meta(update_kiki.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "kiki processed");
} else {
panic!()
}
let meta = update_store.meta(update_coco.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "coco processed");
} else {
panic!()
}
let meta = update_store.meta(update_cucu.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "cucu processed");
} else {
panic!()
}
}
}

View File

@@ -0,0 +1,607 @@
use std::fs::{create_dir_all, remove_dir_all};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use anyhow::{bail, Context};
use chrono::{DateTime, Utc};
use dashmap::{mapref::entry::Entry, DashMap};
use heed::{
types::{ByteSlice, SerdeJson, Str},
Database, Env, EnvOpenOptions, RoTxn, RwTxn,
};
use log::{error, info};
use milli::Index;
use rayon::ThreadPool;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::update_handler::UpdateHandler;
use super::{UpdateMeta, UpdateResult};
use crate::option::IndexerOpts;
type UpdateStore = super::update_store::UpdateStore<UpdateMeta, UpdateResult, String>;
#[derive(Serialize, Deserialize, Debug, PartialEq)]
pub struct IndexMeta {
update_store_size: u64,
index_store_size: u64,
pub uuid: Uuid,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
impl IndexMeta {
fn open(
&self,
path: impl AsRef<Path>,
thread_pool: Arc<ThreadPool>,
indexer_options: &IndexerOpts,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>)> {
let update_path = make_update_db_path(&path, &self.uuid);
let index_path = make_index_db_path(&path, &self.uuid);
create_dir_all(&update_path)?;
create_dir_all(&index_path)?;
let mut options = EnvOpenOptions::new();
options.map_size(self.index_store_size as usize);
let index = Arc::new(Index::new(options, index_path)?);
let mut options = EnvOpenOptions::new();
options.map_size(self.update_store_size as usize);
let handler = UpdateHandler::new(indexer_options, index.clone(), thread_pool)?;
let update_store = UpdateStore::open(options, update_path, handler)?;
Ok((index, update_store))
}
}
pub struct IndexStore {
env: Env,
name_to_uuid: Database<Str, ByteSlice>,
uuid_to_index: DashMap<Uuid, (Arc<Index>, Arc<UpdateStore>)>,
uuid_to_index_meta: Database<ByteSlice, SerdeJson<IndexMeta>>,
thread_pool: Arc<ThreadPool>,
indexer_options: IndexerOpts,
}
impl IndexStore {
pub fn new(path: impl AsRef<Path>, indexer_options: IndexerOpts) -> anyhow::Result<Self> {
let env = EnvOpenOptions::new()
.map_size(4096 * 100)
.max_dbs(2)
.open(path)?;
let uuid_to_index = DashMap::new();
let name_to_uuid = open_or_create_database(&env, Some("name_to_uid"))?;
let uuid_to_index_meta = open_or_create_database(&env, Some("uid_to_index_db"))?;
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(indexer_options.indexing_jobs.unwrap_or(0))
.build()?;
let thread_pool = Arc::new(thread_pool);
Ok(Self {
env,
name_to_uuid,
uuid_to_index,
uuid_to_index_meta,
thread_pool,
indexer_options,
})
}
pub fn delete(&self, index_uid: impl AsRef<str>) -> anyhow::Result<()> {
// we remove the references to the index from the index map so it is not accessible anymore
let mut txn = self.env.write_txn()?;
let uuid = self
.index_uuid(&txn, &index_uid)?
.with_context(|| format!("Index {:?} doesn't exist", index_uid.as_ref()))?;
self.name_to_uuid.delete(&mut txn, index_uid.as_ref())?;
self.uuid_to_index_meta.delete(&mut txn, uuid.as_bytes())?;
txn.commit()?;
// If the index was loaded (i.e it is present in the uuid_to_index map), then we need to
// close it. The process goes as follow:
//
// 1) We want to remove any pending updates from the store.
// 2) We try to get ownership on the update store so we can close it. It may take a
// couple of tries, but since the update store event loop only has a weak reference to
// itself, and we are the only other function holding a reference to it otherwise, we will
// get it eventually.
// 3) We request a closing of the update store.
// 4) We can take ownership on the index, and close it.
// 5) We remove all the files from the file system.
let index_uid = index_uid.as_ref().to_string();
let path = self.env.path().to_owned();
if let Some((_, (index, updates))) = self.uuid_to_index.remove(&uuid) {
std::thread::spawn(move || {
info!("Preparing for {:?} deletion.", index_uid);
// this error is non fatal, but may delay the deletion.
if let Err(e) = updates.abort_pendings() {
error!(
"error aborting pending updates when deleting index {:?}: {}",
index_uid, e
);
}
let updates = get_arc_ownership_blocking(updates);
let close_event = updates.prepare_for_closing();
close_event.wait();
info!("closed update store for {:?}", index_uid);
let index = get_arc_ownership_blocking(index);
let close_event = index.prepare_for_closing();
close_event.wait();
let update_path = make_update_db_path(&path, &uuid);
let index_path = make_index_db_path(&path, &uuid);
if let Err(e) = remove_dir_all(index_path) {
error!("error removing index {:?}: {}", index_uid, e);
}
if let Err(e) = remove_dir_all(update_path) {
error!("error removing index {:?}: {}", index_uid, e);
}
info!("index {:?} deleted.", index_uid);
});
}
Ok(())
}
fn index_uuid(&self, txn: &RoTxn, name: impl AsRef<str>) -> anyhow::Result<Option<Uuid>> {
match self.name_to_uuid.get(txn, name.as_ref())? {
Some(bytes) => {
let uuid = Uuid::from_slice(bytes)?;
Ok(Some(uuid))
}
None => Ok(None),
}
}
fn retrieve_index(
&self,
txn: &RoTxn,
uid: Uuid,
) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> {
match self.uuid_to_index.entry(uid.clone()) {
Entry::Vacant(entry) => match self.uuid_to_index_meta.get(txn, uid.as_bytes())? {
Some(meta) => {
let path = self.env.path();
let (index, updates) =
meta.open(path, self.thread_pool.clone(), &self.indexer_options)?;
entry.insert((index.clone(), updates.clone()));
Ok(Some((index, updates)))
}
None => Ok(None),
},
Entry::Occupied(entry) => {
let (index, updates) = entry.get();
Ok(Some((index.clone(), updates.clone())))
}
}
}
fn get_index_txn(
&self,
txn: &RoTxn,
name: impl AsRef<str>,
) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> {
match self.index_uuid(&txn, name)? {
Some(uid) => self.retrieve_index(&txn, uid),
None => Ok(None),
}
}
pub fn index(
&self,
name: impl AsRef<str>,
) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> {
let txn = self.env.read_txn()?;
self.get_index_txn(&txn, name)
}
/// Use this function to perform an update on an index.
/// This function also puts a lock on what index is allowed to perform an update.
pub fn update_index<F, T>(&self, name: impl AsRef<str>, f: F) -> anyhow::Result<(T, IndexMeta)>
where
F: FnOnce(&Index) -> anyhow::Result<T>,
{
let mut txn = self.env.write_txn()?;
let (index, _) = self
.get_index_txn(&txn, &name)?
.with_context(|| format!("Index {:?} doesn't exist", name.as_ref()))?;
let result = f(index.as_ref());
match result {
Ok(ret) => {
let meta = self.update_meta(&mut txn, name, |meta| meta.updated_at = Utc::now())?;
txn.commit()?;
Ok((ret, meta))
}
Err(e) => Err(e),
}
}
pub fn index_with_meta(
&self,
name: impl AsRef<str>,
) -> anyhow::Result<Option<(Arc<Index>, IndexMeta)>> {
let txn = self.env.read_txn()?;
let uuid = self.index_uuid(&txn, &name)?;
match uuid {
Some(uuid) => {
let meta = self
.uuid_to_index_meta
.get(&txn, uuid.as_bytes())?
.with_context(|| {
format!("unable to retrieve metadata for index {:?}", name.as_ref())
})?;
let (index, _) = self
.retrieve_index(&txn, uuid)?
.with_context(|| format!("unable to retrieve index {:?}", name.as_ref()))?;
Ok(Some((index, meta)))
}
None => Ok(None),
}
}
fn update_meta<F>(
&self,
txn: &mut RwTxn,
name: impl AsRef<str>,
f: F,
) -> anyhow::Result<IndexMeta>
where
F: FnOnce(&mut IndexMeta),
{
let uuid = self
.index_uuid(txn, &name)?
.with_context(|| format!("Index {:?} doesn't exist", name.as_ref()))?;
let mut meta = self
.uuid_to_index_meta
.get(txn, uuid.as_bytes())?
.with_context(|| format!("couldn't retrieve metadata for index {:?}", name.as_ref()))?;
f(&mut meta);
self.uuid_to_index_meta.put(txn, uuid.as_bytes(), &meta)?;
Ok(meta)
}
pub fn get_or_create_index(
&self,
name: impl AsRef<str>,
update_size: u64,
index_size: u64,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>)> {
let mut txn = self.env.write_txn()?;
match self.get_index_txn(&txn, name.as_ref())? {
Some(res) => Ok(res),
None => {
let uuid = Uuid::new_v4();
let (index, updates, _) =
self.create_index_txn(&mut txn, uuid, name, update_size, index_size)?;
// If we fail to commit the transaction, we must delete the database from the
// file-system.
if let Err(e) = txn.commit() {
self.clean_db(uuid);
return Err(e)?;
}
Ok((index, updates))
}
}
}
// Remove all the files and data associated with a db uuid.
fn clean_db(&self, uuid: Uuid) {
let update_db_path = make_update_db_path(self.env.path(), &uuid);
let index_db_path = make_index_db_path(self.env.path(), &uuid);
remove_dir_all(update_db_path).expect("Failed to clean database");
remove_dir_all(index_db_path).expect("Failed to clean database");
self.uuid_to_index.remove(&uuid);
}
fn create_index_txn(
&self,
txn: &mut RwTxn,
uuid: Uuid,
name: impl AsRef<str>,
update_store_size: u64,
index_store_size: u64,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>, IndexMeta)> {
let created_at = Utc::now();
let updated_at = created_at;
let meta = IndexMeta {
update_store_size,
index_store_size,
uuid: uuid.clone(),
created_at,
updated_at,
};
self.name_to_uuid.put(txn, name.as_ref(), uuid.as_bytes())?;
self.uuid_to_index_meta.put(txn, uuid.as_bytes(), &meta)?;
let path = self.env.path();
let (index, update_store) =
match meta.open(path, self.thread_pool.clone(), &self.indexer_options) {
Ok(res) => res,
Err(e) => {
self.clean_db(uuid);
return Err(e);
}
};
self.uuid_to_index
.insert(uuid, (index.clone(), update_store.clone()));
Ok((index, update_store, meta))
}
/// Same as `get_or_create`, but returns an error if the index already exists.
pub fn create_index(
&self,
name: impl AsRef<str>,
update_size: u64,
index_size: u64,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>, IndexMeta)> {
let uuid = Uuid::new_v4();
let mut txn = self.env.write_txn()?;
if self.name_to_uuid.get(&txn, name.as_ref())?.is_some() {
bail!("index {:?} already exists", name.as_ref())
}
let result = self.create_index_txn(&mut txn, uuid, name, update_size, index_size)?;
// If we fail to commit the transaction, we must delete the database from the
// file-system.
if let Err(e) = txn.commit() {
self.clean_db(uuid);
return Err(e)?;
}
Ok(result)
}
/// Returns each index associated with its metadata:
/// (index_name, IndexMeta, primary_key)
/// This method will force all the indexes to be loaded.
pub fn list_indexes(&self) -> anyhow::Result<Vec<(String, IndexMeta, Option<String>)>> {
let txn = self.env.read_txn()?;
let metas = self.name_to_uuid.iter(&txn)?.filter_map(|entry| {
entry
.map_err(|e| {
error!("error decoding entry while listing indexes: {}", e);
e
})
.ok()
});
let mut indexes = Vec::new();
for (name, uuid) in metas {
// get index to retrieve primary key
let (index, _) = self
.get_index_txn(&txn, name)?
.with_context(|| format!("could not load index {:?}", name))?;
let primary_key = index.primary_key(&index.read_txn()?)?.map(String::from);
// retieve meta
let meta = self
.uuid_to_index_meta
.get(&txn, &uuid)?
.with_context(|| format!("could not retieve meta for index {:?}", name))?;
indexes.push((name.to_owned(), meta, primary_key));
}
Ok(indexes)
}
}
// Loops on an arc to get ownership on the wrapped value. This method sleeps 100ms before retrying.
fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
loop {
match Arc::try_unwrap(item) {
Ok(item) => return item,
Err(item_arc) => {
item = item_arc;
std::thread::sleep(Duration::from_millis(100));
continue;
}
}
}
}
fn open_or_create_database<K: 'static, V: 'static>(
env: &Env,
name: Option<&str>,
) -> anyhow::Result<Database<K, V>> {
match env.open_database::<K, V>(name)? {
Some(db) => Ok(db),
None => Ok(env.create_database::<K, V>(name)?),
}
}
fn make_update_db_path(path: impl AsRef<Path>, uuid: &Uuid) -> PathBuf {
let mut path = path.as_ref().to_path_buf();
path.push(format!("update{}", uuid));
path
}
fn make_index_db_path(path: impl AsRef<Path>, uuid: &Uuid) -> PathBuf {
let mut path = path.as_ref().to_path_buf();
path.push(format!("index{}", uuid));
path
}
#[cfg(test)]
mod test {
use super::*;
use std::path::PathBuf;
#[test]
fn test_make_update_db_path() {
let uuid = Uuid::new_v4();
assert_eq!(
make_update_db_path("/home", &uuid),
PathBuf::from(format!("/home/update{}", uuid))
);
}
#[test]
fn test_make_index_db_path() {
let uuid = Uuid::new_v4();
assert_eq!(
make_index_db_path("/home", &uuid),
PathBuf::from(format!("/home/index{}", uuid))
);
}
mod index_store {
use super::*;
#[test]
fn test_index_uuid() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
let txn = store.env.read_txn().unwrap();
// name is not found if the uuid in not present in the db
assert!(store.index_uuid(&txn, &name).unwrap().is_none());
drop(txn);
// insert an uuid in the the name_to_uuid_db:
let uuid = Uuid::new_v4();
let mut txn = store.env.write_txn().unwrap();
store
.name_to_uuid
.put(&mut txn, &name, uuid.as_bytes())
.unwrap();
txn.commit().unwrap();
// check that the uuid is there
let txn = store.env.read_txn().unwrap();
assert_eq!(store.index_uuid(&txn, &name).unwrap(), Some(uuid));
}
#[test]
fn test_retrieve_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let uuid = Uuid::new_v4();
let txn = store.env.read_txn().unwrap();
assert!(store.retrieve_index(&txn, uuid).unwrap().is_none());
let created_at = Utc::now();
let updated_at = created_at;
let meta = IndexMeta {
update_store_size: 4096 * 100,
index_store_size: 4096 * 100,
uuid: uuid.clone(),
created_at,
updated_at,
};
let mut txn = store.env.write_txn().unwrap();
store
.uuid_to_index_meta
.put(&mut txn, uuid.as_bytes(), &meta)
.unwrap();
txn.commit().unwrap();
// the index cache should be empty
assert!(store.uuid_to_index.is_empty());
let txn = store.env.read_txn().unwrap();
assert!(store.retrieve_index(&txn, uuid).unwrap().is_some());
assert_eq!(store.uuid_to_index.len(), 1);
}
#[test]
fn test_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
assert!(store.index(&name).unwrap().is_none());
let created_at = Utc::now();
let updated_at = created_at;
let uuid = Uuid::new_v4();
let meta = IndexMeta {
update_store_size: 4096 * 100,
index_store_size: 4096 * 100,
uuid: uuid.clone(),
created_at,
updated_at,
};
let mut txn = store.env.write_txn().unwrap();
store
.name_to_uuid
.put(&mut txn, &name, uuid.as_bytes())
.unwrap();
store
.uuid_to_index_meta
.put(&mut txn, uuid.as_bytes(), &meta)
.unwrap();
txn.commit().unwrap();
assert!(store.index(&name).unwrap().is_some());
}
#[test]
fn test_get_or_create_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
let update_store_size = 4096 * 100;
let index_store_size = 4096 * 100;
store
.get_or_create_index(&name, update_store_size, index_store_size)
.unwrap();
let txn = store.env.read_txn().unwrap();
let uuid = store.name_to_uuid.get(&txn, &name).unwrap();
assert_eq!(store.uuid_to_index.len(), 1);
assert!(uuid.is_some());
let uuid = Uuid::from_slice(uuid.unwrap()).unwrap();
let meta = store
.uuid_to_index_meta
.get(&txn, uuid.as_bytes())
.unwrap()
.unwrap();
assert_eq!(meta.update_store_size, update_store_size);
assert_eq!(meta.index_store_size, index_store_size);
assert_eq!(meta.uuid, uuid);
}
#[test]
fn test_create_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
let update_store_size = 4096 * 100;
let index_store_size = 4096 * 100;
let uuid = Uuid::new_v4();
let mut txn = store.env.write_txn().unwrap();
store
.create_index_txn(&mut txn, uuid, name, update_store_size, index_store_size)
.unwrap();
let uuid = store.name_to_uuid.get(&txn, &name).unwrap();
assert_eq!(store.uuid_to_index.len(), 1);
assert!(uuid.is_some());
let uuid = Uuid::from_slice(uuid.unwrap()).unwrap();
let meta = store
.uuid_to_index_meta
.get(&txn, uuid.as_bytes())
.unwrap()
.unwrap();
assert_eq!(meta.update_store_size, update_store_size);
assert_eq!(meta.index_store_size, index_store_size);
assert_eq!(meta.uuid, uuid);
}
}
}

View File

@@ -0,0 +1,228 @@
mod update_store;
mod index_store;
mod update_handler;
use std::path::Path;
use std::sync::Arc;
use anyhow::{bail, Context};
use itertools::Itertools;
use milli::Index;
use crate::option::IndexerOpts;
use index_store::IndexStore;
use super::IndexController;
use super::updates::UpdateStatus;
use super::{UpdateMeta, UpdateResult, IndexMetadata, IndexSettings};
pub struct LocalIndexController {
indexes: IndexStore,
update_db_size: u64,
index_db_size: u64,
}
impl LocalIndexController {
pub fn new(
path: impl AsRef<Path>,
opt: IndexerOpts,
index_db_size: u64,
update_db_size: u64,
) -> anyhow::Result<Self> {
let indexes = IndexStore::new(path, opt)?;
Ok(Self { indexes, index_db_size, update_db_size })
}
}
impl IndexController for LocalIndexController {
fn add_documents<S: AsRef<str>>(
&self,
index: S,
method: milli::update::IndexDocumentsMethod,
format: milli::update::UpdateFormat,
data: &[u8],
primary_key: Option<String>,
) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateResult, String>> {
let (_, update_store) = self.indexes.get_or_create_index(&index, self.update_db_size, self.index_db_size)?;
let meta = UpdateMeta::DocumentsAddition { method, format, primary_key };
let pending = update_store.register_update(meta, data)?;
Ok(pending.into())
}
fn update_settings<S: AsRef<str>>(
&self,
index: S,
settings: super::Settings
) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateResult, String>> {
let (_, update_store) = self.indexes.get_or_create_index(&index, self.update_db_size, self.index_db_size)?;
let meta = UpdateMeta::Settings(settings);
let pending = update_store.register_update(meta, &[])?;
Ok(pending.into())
}
fn create_index(&self, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> {
let index_name = index_settings.name.context("Missing name for index")?;
let (index, _, meta) = self.indexes.create_index(&index_name, self.update_db_size, self.index_db_size)?;
if let Some(ref primary_key) = index_settings.primary_key {
if let Err(e) = update_primary_key(index, primary_key).context("error creating index") {
// TODO: creating index could not be completed, delete everything.
Err(e)?
}
}
let meta = IndexMetadata {
uid: index_name,
uuid: meta.uuid.clone(),
created_at: meta.created_at,
updated_at: meta.created_at,
primary_key: index_settings.primary_key,
};
Ok(meta)
}
fn delete_index<S: AsRef<str>>(&self, index_uid: S) -> anyhow::Result<()> {
self.indexes.delete(index_uid)
}
fn swap_indices<S1: AsRef<str>, S2: AsRef<str>>(&self, _index1_uid: S1, _index2_uid: S2) -> anyhow::Result<()> {
todo!()
}
fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<Arc<Index>>> {
let index = self.indexes.index(name)?.map(|(i, _)| i);
Ok(index)
}
fn update_status(&self, index: impl AsRef<str>, id: u64) -> anyhow::Result<Option<UpdateStatus<UpdateMeta, UpdateResult, String>>> {
match self.indexes.index(&index)? {
Some((_, update_store)) => Ok(update_store.meta(id)?),
None => bail!("index {:?} doesn't exist", index.as_ref()),
}
}
fn all_update_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus<UpdateMeta, UpdateResult, String>>> {
match self.indexes.index(&index)? {
Some((_, update_store)) => {
let updates = update_store.iter_metas(|processing, processed, pending, aborted, failed| {
Ok(processing
.map(UpdateStatus::from)
.into_iter()
.chain(pending.filter_map(|p| p.ok()).map(|(_, u)| UpdateStatus::from(u)))
.chain(aborted.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.chain(processed.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.chain(failed.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.sorted_by(|a, b| a.id().cmp(&b.id()))
.collect())
})?;
Ok(updates)
}
None => bail!("index {} doesn't exist.", index.as_ref()),
}
}
fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> {
let metas = self.indexes.list_indexes()?;
let mut output_meta = Vec::new();
for (uid, meta, primary_key) in metas {
let created_at = meta.created_at;
let uuid = meta.uuid;
let updated_at = self
.all_update_status(&uid)?
.iter()
.filter_map(|u| u.processed().map(|u| u.processed_at))
.max()
.unwrap_or(created_at);
let index_meta = IndexMetadata {
uid,
created_at,
updated_at,
uuid,
primary_key,
};
output_meta.push(index_meta);
}
Ok(output_meta)
}
fn update_index(&self, uid: impl AsRef<str>, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> {
if index_settings.name.is_some() {
bail!("can't udpate an index name.")
}
let (primary_key, meta) = match index_settings.primary_key {
Some(ref primary_key) => {
self.indexes
.update_index(&uid, |index| {
let mut txn = index.write_txn()?;
if index.primary_key(&txn)?.is_some() {
bail!("primary key already exists.")
}
index.put_primary_key(&mut txn, primary_key)?;
txn.commit()?;
Ok(Some(primary_key.clone()))
})?
},
None => {
let (index, meta) = self.indexes
.index_with_meta(&uid)?
.with_context(|| format!("index {:?} doesn't exist.", uid.as_ref()))?;
let primary_key = index
.primary_key(&index.read_txn()?)?
.map(String::from);
(primary_key, meta)
},
};
Ok(IndexMetadata {
uid: uid.as_ref().to_string(),
uuid: meta.uuid.clone(),
created_at: meta.created_at,
updated_at: meta.updated_at,
primary_key,
})
}
fn clear_documents(&self, index: impl AsRef<str>) -> anyhow::Result<super::UpdateStatus> {
let (_, update_store) = self.indexes.index(&index)?
.with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?;
let meta = UpdateMeta::ClearDocuments;
let pending = update_store.register_update(meta, &[])?;
Ok(pending.into())
}
fn delete_documents(&self, index: impl AsRef<str>, document_ids: Vec<String>) -> anyhow::Result<super::UpdateStatus> {
let (_, update_store) = self.indexes.index(&index)?
.with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?;
let meta = UpdateMeta::DeleteDocuments;
let content = serde_json::to_vec(&document_ids)?;
let pending = update_store.register_update(meta, &content)?;
Ok(pending.into())
}
}
fn update_primary_key(index: impl AsRef<Index>, primary_key: impl AsRef<str>) -> anyhow::Result<()> {
let index = index.as_ref();
let mut txn = index.write_txn()?;
if index.primary_key(&txn)?.is_some() {
bail!("primary key already set.")
}
index.put_primary_key(&mut txn, primary_key.as_ref())?;
txn.commit()?;
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
use tempfile::tempdir;
use crate::make_index_controller_tests;
make_index_controller_tests!({
let options = IndexerOpts::default();
let path = tempdir().unwrap();
let size = 4096 * 100;
LocalIndexController::new(path, options, size, size).unwrap()
});
}

View File

@@ -0,0 +1,255 @@
use std::collections::HashMap;
use std::io;
use std::sync::Arc;
use anyhow::Result;
use flate2::read::GzDecoder;
use grenad::CompressionType;
use log::info;
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use milli::Index;
use rayon::ThreadPool;
use super::update_store::HandleUpdate;
use crate::index_controller::updates::{Failed, Processed, Processing};
use crate::index_controller::{Facets, Settings, UpdateMeta, UpdateResult};
use crate::option::IndexerOpts;
pub struct UpdateHandler {
index: Arc<Index>,
max_nb_chunks: Option<usize>,
chunk_compression_level: Option<u32>,
thread_pool: Arc<ThreadPool>,
log_frequency: usize,
max_memory: usize,
linked_hash_map_size: usize,
chunk_compression_type: CompressionType,
chunk_fusing_shrink_size: u64,
}
impl UpdateHandler {
pub fn new(
opt: &IndexerOpts,
index: Arc<Index>,
thread_pool: Arc<ThreadPool>,
) -> anyhow::Result<Self> {
Ok(Self {
index,
max_nb_chunks: opt.max_nb_chunks,
chunk_compression_level: opt.chunk_compression_level,
thread_pool,
log_frequency: opt.log_every_n,
max_memory: opt.max_memory.get_bytes() as usize,
linked_hash_map_size: opt.linked_hash_map_size,
chunk_compression_type: opt.chunk_compression_type,
chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(),
})
}
fn update_buidler(&self, update_id: u64) -> UpdateBuilder {
// We prepare the update by using the update builder.
let mut update_builder = UpdateBuilder::new(update_id);
if let Some(max_nb_chunks) = self.max_nb_chunks {
update_builder.max_nb_chunks(max_nb_chunks);
}
if let Some(chunk_compression_level) = self.chunk_compression_level {
update_builder.chunk_compression_level(chunk_compression_level);
}
update_builder.thread_pool(&self.thread_pool);
update_builder.log_every_n(self.log_frequency);
update_builder.max_memory(self.max_memory);
update_builder.linked_hash_map_size(self.linked_hash_map_size);
update_builder.chunk_compression_type(self.chunk_compression_type);
update_builder.chunk_fusing_shrink_size(self.chunk_fusing_shrink_size);
update_builder
}
fn update_documents(
&self,
format: UpdateFormat,
method: IndexDocumentsMethod,
content: &[u8],
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.index.write_txn()?;
// Set the primary key if not set already, ignore if already set.
match (self.index.primary_key(&wtxn)?, primary_key) {
(None, Some(ref primary_key)) => {
self.index.put_primary_key(&mut wtxn, primary_key)?;
}
_ => (),
}
let mut builder = update_builder.index_documents(&mut wtxn, &self.index);
builder.update_format(format);
builder.index_documents_method(method);
let gzipped = true;
let reader = if gzipped && !content.is_empty() {
Box::new(GzDecoder::new(content))
} else {
Box::new(content) as Box<dyn io::Read>
};
let result = builder.execute(reader, |indexing_step, update_id| {
info!("update {}: {:?}", update_id, indexing_step)
});
match result {
Ok(addition_result) => wtxn
.commit()
.and(Ok(UpdateResult::DocumentsAddition(addition_result)))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.index.write_txn()?;
let builder = update_builder.clear_documents(&mut wtxn, &self.index);
match builder.execute() {
Ok(_count) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn update_settings(
&self,
settings: &Settings,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.index.write_txn()?;
let mut builder = update_builder.settings(&mut wtxn, &self.index);
// We transpose the settings JSON struct into a real setting update.
if let Some(ref names) = settings.searchable_attributes {
match names {
Some(names) => builder.set_searchable_fields(names.clone()),
None => builder.reset_searchable_fields(),
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref names) = settings.displayed_attributes {
match names {
Some(names) => builder.set_displayed_fields(names.clone()),
None => builder.reset_displayed_fields(),
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref facet_types) = settings.faceted_attributes {
let facet_types = facet_types.clone().unwrap_or_else(|| HashMap::new());
builder.set_faceted_fields(facet_types);
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref criteria) = settings.criteria {
match criteria {
Some(criteria) => builder.set_criteria(criteria.clone()),
None => builder.reset_criteria(),
}
}
let result = builder
.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
match result {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn update_facets(
&self,
levels: &Facets,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.index.write_txn()?;
let mut builder = update_builder.facets(&mut wtxn, &self.index);
if let Some(value) = levels.level_group_size {
builder.level_group_size(value);
}
if let Some(value) = levels.min_level_size {
builder.min_level_size(value);
}
match builder.execute() {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn delete_documents(
&self,
document_ids: &[u8],
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
let ids: Vec<String> = serde_json::from_slice(document_ids)?;
let mut txn = self.index.write_txn()?;
let mut builder = update_builder.delete_documents(&mut txn, &self.index)?;
// We ignore unexisting document ids
ids.iter().for_each(|id| { builder.delete_external_id(id); });
match builder.execute() {
Ok(deleted) => txn
.commit()
.and(Ok(UpdateResult::DocumentDeletion { deleted }))
.map_err(Into::into),
Err(e) => Err(e.into())
}
}
}
impl HandleUpdate<UpdateMeta, UpdateResult, String> for UpdateHandler {
fn handle_update(
&mut self,
meta: Processing<UpdateMeta>,
content: &[u8],
) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> {
use UpdateMeta::*;
let update_id = meta.id();
let update_builder = self.update_buidler(update_id);
let result = match meta.meta() {
DocumentsAddition {
method,
format,
primary_key,
} => self.update_documents(
*format,
*method,
content,
update_builder,
primary_key.as_deref(),
),
ClearDocuments => self.clear_documents(update_builder),
DeleteDocuments => self.delete_documents(content, update_builder),
Settings(settings) => self.update_settings(settings, update_builder),
Facets(levels) => self.update_facets(levels, update_builder),
};
match result {
Ok(result) => Ok(meta.process(result)),
Err(e) => Err(meta.fail(e.to_string())),
}
}
}

View File

@@ -0,0 +1,407 @@
use std::path::Path;
use std::sync::{Arc, RwLock};
use crossbeam_channel::Sender;
use heed::types::{OwnedType, DecodeIgnore, SerdeJson, ByteSlice};
use heed::{EnvOpenOptions, Env, Database};
use serde::{Serialize, Deserialize};
use crate::index_controller::updates::*;
type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
#[derive(Clone)]
pub struct UpdateStore<M, N, E> {
env: Env,
pending_meta: Database<OwnedType<BEU64>, SerdeJson<Pending<M>>>,
pending: Database<OwnedType<BEU64>, ByteSlice>,
processed_meta: Database<OwnedType<BEU64>, SerdeJson<Processed<M, N>>>,
failed_meta: Database<OwnedType<BEU64>, SerdeJson<Failed<M, E>>>,
aborted_meta: Database<OwnedType<BEU64>, SerdeJson<Aborted<M>>>,
processing: Arc<RwLock<Option<Processing<M>>>>,
notification_sender: Sender<()>,
}
pub trait HandleUpdate<M, N, E> {
fn handle_update(&mut self, meta: Processing<M>, content: &[u8]) -> Result<Processed<M, N>, Failed<M, E>>;
}
impl<M, N, E> UpdateStore<M, N, E>
where
M: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync + Clone,
N: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync,
E: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync,
{
pub fn open<P, U>(
mut options: EnvOpenOptions,
path: P,
mut update_handler: U,
) -> heed::Result<Arc<Self>>
where
P: AsRef<Path>,
U: HandleUpdate<M, N, E> + Send + 'static,
{
options.max_dbs(5);
let env = options.open(path)?;
let pending_meta = env.create_database(Some("pending-meta"))?;
let pending = env.create_database(Some("pending"))?;
let processed_meta = env.create_database(Some("processed-meta"))?;
let aborted_meta = env.create_database(Some("aborted-meta"))?;
let failed_meta = env.create_database(Some("failed-meta"))?;
let processing = Arc::new(RwLock::new(None));
let (notification_sender, notification_receiver) = crossbeam_channel::bounded(1);
// Send a first notification to trigger the process.
let _ = notification_sender.send(());
let update_store = Arc::new(UpdateStore {
env,
pending,
pending_meta,
processed_meta,
aborted_meta,
notification_sender,
failed_meta,
processing,
});
// We need a weak reference so we can take ownership on the arc later when we
// want to close the index.
let update_store_weak = Arc::downgrade(&update_store);
std::thread::spawn(move || {
// Block and wait for something to process.
'outer: for _ in notification_receiver {
loop {
match update_store_weak.upgrade() {
Some(update_store) => {
match update_store.process_pending_update(&mut update_handler) {
Ok(Some(_)) => (),
Ok(None) => break,
Err(e) => eprintln!("error while processing update: {}", e),
}
}
// the ownership on the arc has been taken, we need to exit.
None => break 'outer,
}
}
}
});
Ok(update_store)
}
pub fn prepare_for_closing(self) -> heed::EnvClosingEvent {
self.env.prepare_for_closing()
}
/// Returns the new biggest id to use to store the new update.
fn new_update_id(&self, txn: &heed::RoTxn) -> heed::Result<u64> {
let last_pending = self.pending_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_processed = self.processed_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_aborted = self.aborted_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_update_id = [last_pending, last_processed, last_aborted]
.iter()
.copied()
.flatten()
.max();
match last_update_id {
Some(last_id) => Ok(last_id + 1),
None => Ok(0),
}
}
/// Registers the update content in the pending store and the meta
/// into the pending-meta store. Returns the new unique update id.
pub fn register_update(
&self,
meta: M,
content: &[u8]
) -> heed::Result<Pending<M>> {
let mut wtxn = self.env.write_txn()?;
// We ask the update store to give us a new update id, this is safe,
// no other update can have the same id because we use a write txn before
// asking for the id and registering it so other update registering
// will be forced to wait for a new write txn.
let update_id = self.new_update_id(&wtxn)?;
let update_key = BEU64::new(update_id);
let meta = Pending::new(meta, update_id);
self.pending_meta.put(&mut wtxn, &update_key, &meta)?;
self.pending.put(&mut wtxn, &update_key, content)?;
wtxn.commit()?;
if let Err(e) = self.notification_sender.try_send(()) {
assert!(!e.is_disconnected(), "update notification channel is disconnected");
}
Ok(meta)
}
/// Executes the user provided function on the next pending update (the one with the lowest id).
/// This is asynchronous as it let the user process the update with a read-only txn and
/// only writing the result meta to the processed-meta store *after* it has been processed.
fn process_pending_update<U>(&self, handler: &mut U) -> heed::Result<Option<()>>
where
U: HandleUpdate<M, N, E> + Send + 'static,
{
// Create a read transaction to be able to retrieve the pending update in order.
let rtxn = self.env.read_txn()?;
let first_meta = self.pending_meta.first(&rtxn)?;
// If there is a pending update we process and only keep
// a reader while processing it, not a writer.
match first_meta {
Some((first_id, pending)) => {
let first_content = self.pending
.get(&rtxn, &first_id)?
.expect("associated update content");
// we change the state of the update from pending to processing before we pass it
// to the update handler. Processing store is non persistent to be able recover
// from a failure
let processing = pending.processing();
self.processing
.write()
.unwrap()
.replace(processing.clone());
// Process the pending update using the provided user function.
let result = handler.handle_update(processing, first_content);
drop(rtxn);
// Once the pending update have been successfully processed
// we must remove the content from the pending and processing stores and
// write the *new* meta to the processed-meta store and commit.
let mut wtxn = self.env.write_txn()?;
self.processing
.write()
.unwrap()
.take();
self.pending_meta.delete(&mut wtxn, &first_id)?;
self.pending.delete(&mut wtxn, &first_id)?;
match result {
Ok(processed) => self.processed_meta.put(&mut wtxn, &first_id, &processed)?,
Err(failed) => self.failed_meta.put(&mut wtxn, &first_id, &failed)?,
}
wtxn.commit()?;
Ok(Some(()))
},
None => Ok(None)
}
}
/// Execute the user defined function with the meta-store iterators, the first
/// iterator is the *processed* meta one, the second the *aborted* meta one
/// and, the last is the *pending* meta one.
pub fn iter_metas<F, T>(&self, mut f: F) -> heed::Result<T>
where
F: for<'a> FnMut(
Option<Processing<M>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Processed<M, N>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Aborted<M>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Pending<M>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Failed<M, E>>>,
) -> heed::Result<T>,
{
let rtxn = self.env.read_txn()?;
// We get the pending, processed and aborted meta iterators.
let processed_iter = self.processed_meta.iter(&rtxn)?;
let aborted_iter = self.aborted_meta.iter(&rtxn)?;
let pending_iter = self.pending_meta.iter(&rtxn)?;
let processing = self.processing.read().unwrap().clone();
let failed_iter = self.failed_meta.iter(&rtxn)?;
// We execute the user defined function with both iterators.
(f)(processing, processed_iter, aborted_iter, pending_iter, failed_iter)
}
/// Returns the update associated meta or `None` if the update doesn't exist.
pub fn meta(&self, update_id: u64) -> heed::Result<Option<UpdateStatus<M, N, E>>> {
let rtxn = self.env.read_txn()?;
let key = BEU64::new(update_id);
if let Some(ref meta) = *self.processing.read().unwrap() {
if meta.id() == update_id {
return Ok(Some(UpdateStatus::Processing(meta.clone())));
}
}
if let Some(meta) = self.pending_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Pending(meta)));
}
if let Some(meta) = self.processed_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Processed(meta)));
}
if let Some(meta) = self.aborted_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Aborted(meta)));
}
if let Some(meta) = self.failed_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Failed(meta)));
}
Ok(None)
}
/// Aborts an update, an aborted update content is deleted and
/// the meta of it is moved into the aborted updates database.
///
/// Trying to abort an update that is currently being processed, an update
/// that as already been processed or which doesn't actually exist, will
/// return `None`.
#[allow(dead_code)]
pub fn abort_update(&self, update_id: u64) -> heed::Result<Option<Aborted<M>>> {
let mut wtxn = self.env.write_txn()?;
let key = BEU64::new(update_id);
// We cannot abort an update that is currently being processed.
if self.pending_meta.first(&wtxn)?.map(|(key, _)| key.get()) == Some(update_id) {
return Ok(None);
}
let pending = match self.pending_meta.get(&wtxn, &key)? {
Some(meta) => meta,
None => return Ok(None),
};
let aborted = pending.abort();
self.aborted_meta.put(&mut wtxn, &key, &aborted)?;
self.pending_meta.delete(&mut wtxn, &key)?;
self.pending.delete(&mut wtxn, &key)?;
wtxn.commit()?;
Ok(Some(aborted))
}
/// Aborts all the pending updates, and not the one being currently processed.
/// Returns the update metas and ids that were successfully aborted.
#[allow(dead_code)]
pub fn abort_pendings(&self) -> heed::Result<Vec<(u64, Aborted<M>)>> {
let mut wtxn = self.env.write_txn()?;
let mut aborted_updates = Vec::new();
// We skip the first pending update as it is currently being processed.
for result in self.pending_meta.iter(&wtxn)?.skip(1) {
let (key, pending) = result?;
let id = key.get();
aborted_updates.push((id, pending.abort()));
}
for (id, aborted) in &aborted_updates {
let key = BEU64::new(*id);
self.aborted_meta.put(&mut wtxn, &key, &aborted)?;
self.pending_meta.delete(&mut wtxn, &key)?;
self.pending.delete(&mut wtxn, &key)?;
}
wtxn.commit()?;
Ok(aborted_updates)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
use std::time::{Duration, Instant};
impl<M, N, F, E> HandleUpdate<M, N, E> for F
where F: FnMut(Processing<M>, &[u8]) -> Result<Processed<M, N>, Failed<M, E>> + Send + 'static {
fn handle_update(&mut self, meta: Processing<M>, content: &[u8]) -> Result<Processed<M, N>, Failed<M, E>> {
self(meta, content)
}
}
#[test]
fn simple() {
let dir = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let update_store = UpdateStore::open(options, dir, |meta: Processing<String>, _content: &_| -> Result<_, Failed<_, ()>> {
let new_meta = meta.meta().to_string() + " processed";
let processed = meta.process(new_meta);
Ok(processed)
}).unwrap();
let meta = String::from("kiki");
let update = update_store.register_update(meta, &[]).unwrap();
thread::sleep(Duration::from_millis(100));
let meta = update_store.meta(update.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "kiki processed");
} else {
panic!()
}
}
#[test]
#[ignore]
fn long_running_update() {
let dir = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let update_store = UpdateStore::open(options, dir, |meta: Processing<String>, _content:&_| -> Result<_, Failed<_, ()>> {
thread::sleep(Duration::from_millis(400));
let new_meta = meta.meta().to_string() + "processed";
let processed = meta.process(new_meta);
Ok(processed)
}).unwrap();
let before_register = Instant::now();
let meta = String::from("kiki");
let update_kiki = update_store.register_update(meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("coco");
let update_coco = update_store.register_update(meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("cucu");
let update_cucu = update_store.register_update(meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
thread::sleep(Duration::from_millis(400 * 3 + 100));
let meta = update_store.meta(update_kiki.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "kiki processed");
} else {
panic!()
}
let meta = update_store.meta(update_coco.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "coco processed");
} else {
panic!()
}
let meta = update_store.meta(update_cucu.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "cucu processed");
} else {
panic!()
}
}
}

View File

@@ -0,0 +1,281 @@
mod local_index_controller;
mod updates;
pub use local_index_controller::LocalIndexController;
use std::collections::HashMap;
use std::num::NonZeroUsize;
use std::sync::Arc;
use anyhow::Result;
use chrono::{DateTime, Utc};
use milli::Index;
use milli::update::{IndexDocumentsMethod, UpdateFormat, DocumentAdditionResult};
use serde::{Serialize, Deserialize, de::Deserializer};
use uuid::Uuid;
pub use updates::{Processed, Processing, Failed};
pub type UpdateStatus = updates::UpdateStatus<UpdateMeta, UpdateResult, String>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
pub uid: String,
uuid: Uuid,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
primary_key: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
format: UpdateFormat,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments,
Settings(Settings),
Facets(Facets),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Facets {
pub level_group_size: Option<NonZeroUsize>,
pub min_level_size: Option<NonZeroUsize>,
}
fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
where T: Deserialize<'de>,
D: Deserializer<'de>
{
Deserialize::deserialize(deserializer).map(Some)
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Settings {
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
pub displayed_attributes: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(default)]
pub faceted_attributes: Option<Option<HashMap<String, String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
pub criteria: Option<Option<Vec<String>>>,
}
impl Settings {
pub fn cleared() -> Self {
Self {
displayed_attributes: Some(None),
searchable_attributes: Some(None),
faceted_attributes: Some(None),
criteria: Some(None),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: usize },
Other,
}
#[derive(Clone, Debug)]
pub struct IndexSettings {
pub name: Option<String>,
pub primary_key: Option<String>,
}
/// The `IndexController` is in charge of the access to the underlying indices. It splits the logic
/// for read access which is provided thanks to an handle to the index, and write access which must
/// be provided. This allows the implementer to define the behaviour of write accesses to the
/// indices, and abstract the scheduling of the updates. The implementer must be able to provide an
/// instance of `IndexStore`
pub trait IndexController {
/*
* Write operations
*
* Logic for the write operation need to be provided by the implementer, since they can be made
* asynchronous thanks to an update_store for example.
*
* */
/// Perform document addition on the database. If the provided index does not exist, it will be
/// created when the addition is applied to the index.
fn add_documents<S: AsRef<str>>(
&self,
index: S,
method: IndexDocumentsMethod,
format: UpdateFormat,
data: &[u8],
primary_key: Option<String>,
) -> anyhow::Result<UpdateStatus>;
/// Clear all documents in the given index.
fn clear_documents(&self, index: impl AsRef<str>) -> anyhow::Result<UpdateStatus>;
/// Delete all documents in `document_ids`.
fn delete_documents(&self, index: impl AsRef<str>, document_ids: Vec<String>) -> anyhow::Result<UpdateStatus>;
/// Updates an index settings. If the index does not exist, it will be created when the update
/// is applied to the index.
fn update_settings<S: AsRef<str>>(&self, index_uid: S, settings: Settings) -> anyhow::Result<UpdateStatus>;
/// Create an index with the given `index_uid`.
fn create_index(&self, index_settings: IndexSettings) -> Result<IndexMetadata>;
/// Delete index with the given `index_uid`, attempting to close it beforehand.
fn delete_index<S: AsRef<str>>(&self, index_uid: S) -> Result<()>;
/// Swap two indexes, concretely, it simply swaps the index the names point to.
fn swap_indices<S1: AsRef<str>, S2: AsRef<str>>(&self, index1_uid: S1, index2_uid: S2) -> Result<()>;
/// Apply an update to the given index. This method can be called when an update is ready to be
/// processed
fn handle_update<S: AsRef<str>>(
&self,
_index: S,
_update_id: u64,
_meta: Processing<UpdateMeta>,
_content: &[u8]
) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> {
todo!()
}
/// Returns, if it exists, the `Index` with the povided name.
fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<Arc<Index>>>;
/// Returns the udpate status an update
fn update_status(&self, index: impl AsRef<str>, id: u64) -> anyhow::Result<Option<UpdateStatus>>;
/// Returns all the udpate status for an index
fn all_update_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus>>;
/// List all the indexes
fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>>;
fn update_index(&self, name: impl AsRef<str>, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata>;
}
#[cfg(test)]
#[macro_use]
pub(crate) mod test {
use super::*;
#[macro_export]
macro_rules! make_index_controller_tests {
($controller_buider:block) => {
#[test]
fn test_create_and_list_indexes() {
crate::index_controller::test::create_and_list_indexes($controller_buider);
}
#[test]
fn test_create_index_with_no_name_is_error() {
crate::index_controller::test::create_index_with_no_name_is_error($controller_buider);
}
#[test]
fn test_update_index() {
crate::index_controller::test::update_index($controller_buider);
}
};
}
pub(crate) fn create_and_list_indexes(controller: impl IndexController) {
let settings1 = IndexSettings {
name: Some(String::from("test_index")),
primary_key: None,
};
let settings2 = IndexSettings {
name: Some(String::from("test_index2")),
primary_key: Some(String::from("foo")),
};
controller.create_index(settings1).unwrap();
controller.create_index(settings2).unwrap();
let indexes = controller.list_indexes().unwrap();
assert_eq!(indexes.len(), 2);
assert_eq!(indexes[0].uid, "test_index");
assert_eq!(indexes[1].uid, "test_index2");
assert_eq!(indexes[1].primary_key.clone().unwrap(), "foo");
}
pub(crate) fn create_index_with_no_name_is_error(controller: impl IndexController) {
let settings = IndexSettings {
name: None,
primary_key: None,
};
assert!(controller.create_index(settings).is_err());
}
pub(crate) fn update_index(controller: impl IndexController) {
let settings = IndexSettings {
name: Some(String::from("test")),
primary_key: None,
};
assert!(controller.create_index(settings).is_ok());
// perform empty update returns index meta unchanged
let settings = IndexSettings {
name: None,
primary_key: None,
};
let result = controller.update_index("test", settings).unwrap();
assert_eq!(result.uid, "test");
assert_eq!(result.created_at, result.updated_at);
assert!(result.primary_key.is_none());
// Changing the name trigger an error
let settings = IndexSettings {
name: Some(String::from("bar")),
primary_key: None,
};
assert!(controller.update_index("test", settings).is_err());
// Update primary key
let settings = IndexSettings {
name: None,
primary_key: Some(String::from("foo")),
};
let result = controller.update_index("test", settings.clone()).unwrap();
assert_eq!(result.uid, "test");
assert!(result.created_at < result.updated_at);
assert_eq!(result.primary_key.unwrap(), "foo");
// setting the primary key again is an error
assert!(controller.update_index("test", settings).is_err());
}
}

View File

@@ -0,0 +1,179 @@
use chrono::{Utc, DateTime};
use serde::{Serialize, Deserialize};
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Pending<M> {
pub update_id: u64,
pub meta: M,
pub enqueued_at: DateTime<Utc>,
}
impl<M> Pending<M> {
pub fn new(meta: M, update_id: u64) -> Self {
Self {
enqueued_at: Utc::now(),
meta,
update_id,
}
}
pub fn processing(self) -> Processing<M> {
Processing {
from: self,
started_processing_at: Utc::now(),
}
}
pub fn abort(self) -> Aborted<M> {
Aborted {
from: self,
aborted_at: Utc::now(),
}
}
pub fn meta(&self) -> &M {
&self.meta
}
pub fn id(&self) -> u64 {
self.update_id
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processed<M, N> {
pub success: N,
pub processed_at: DateTime<Utc>,
#[serde(flatten)]
pub from: Processing<M>,
}
impl<M, N> Processed<M, N> {
pub fn id(&self) -> u64 {
self.from.id()
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processing<M> {
#[serde(flatten)]
pub from: Pending<M>,
pub started_processing_at: DateTime<Utc>,
}
impl<M> Processing<M> {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &M {
self.from.meta()
}
pub fn process<N>(self, meta: N) -> Processed<M, N> {
Processed {
success: meta,
from: self,
processed_at: Utc::now(),
}
}
pub fn fail<E>(self, error: E) -> Failed<M, E> {
Failed {
from: self,
error,
failed_at: Utc::now(),
}
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Aborted<M> {
#[serde(flatten)]
from: Pending<M>,
aborted_at: DateTime<Utc>,
}
impl<M> Aborted<M> {
pub fn id(&self) -> u64 {
self.from.id()
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Failed<M, E> {
#[serde(flatten)]
from: Processing<M>,
error: E,
failed_at: DateTime<Utc>,
}
impl<M, E> Failed<M, E> {
pub fn id(&self) -> u64 {
self.from.id()
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize)]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus<M, N, E> {
Processing(Processing<M>),
Pending(Pending<M>),
Processed(Processed<M, N>),
Aborted(Aborted<M>),
Failed(Failed<M, E>),
}
impl<M, N, E> UpdateStatus<M, N, E> {
pub fn id(&self) -> u64 {
match self {
UpdateStatus::Processing(u) => u.id(),
UpdateStatus::Pending(u) => u.id(),
UpdateStatus::Processed(u) => u.id(),
UpdateStatus::Aborted(u) => u.id(),
UpdateStatus::Failed(u) => u.id(),
}
}
pub fn processed(&self) -> Option<&Processed<M, N>> {
match self {
UpdateStatus::Processed(p) => Some(p),
_ => None,
}
}
}
impl<M, N, E> From<Pending<M>> for UpdateStatus<M, N, E> {
fn from(other: Pending<M>) -> Self {
Self::Pending(other)
}
}
impl<M, N, E> From<Aborted<M>> for UpdateStatus<M, N, E> {
fn from(other: Aborted<M>) -> Self {
Self::Aborted(other)
}
}
impl<M, N, E> From<Processed<M, N>> for UpdateStatus<M, N, E> {
fn from(other: Processed<M, N>) -> Self {
Self::Processed(other)
}
}
impl<M, N, E> From<Processing<M>> for UpdateStatus<M, N, E> {
fn from(other: Processing<M>) -> Self {
Self::Processing(other)
}
}
impl<M, N, E> From<Failed<M, E>> for UpdateStatus<M, N, E> {
fn from(other: Failed<M, E>) -> Self {
Self::Failed(other)
}
}

View File

@@ -0,0 +1,60 @@
#![allow(clippy::or_fun_call)]
pub mod data;
pub mod error;
pub mod helpers;
pub mod option;
pub mod routes;
mod index_controller;
use actix_http::Error;
use actix_service::ServiceFactory;
use actix_web::{dev, web, App};
pub use option::Opt;
pub use self::data::Data;
use self::error::payload_error_handler;
pub fn create_app(
data: &Data,
enable_frontend: bool,
) -> App<
impl ServiceFactory<
Config = (),
Request = dev::ServiceRequest,
Response = dev::ServiceResponse<actix_http::body::Body>,
Error = Error,
InitError = (),
>,
actix_http::body::Body,
> {
let app = App::new()
.data(data.clone())
.app_data(
web::JsonConfig::default()
.limit(data.http_payload_size_limit())
.content_type(|_mime| true) // Accept all mime types
.error_handler(|err, _req| payload_error_handler(err).into()),
)
.app_data(
web::QueryConfig::default()
.error_handler(|err, _req| payload_error_handler(err).into())
)
.configure(routes::document::services)
.configure(routes::index::services)
.configure(routes::search::services)
.configure(routes::settings::services)
.configure(routes::stop_words::services)
.configure(routes::synonym::services)
.configure(routes::health::services)
.configure(routes::stats::services)
.configure(routes::key::services);
//.configure(routes::dump::services);
if enable_frontend {
app
.service(routes::load_html)
.service(routes::load_css)
} else {
app
}
}

View File

@@ -0,0 +1,162 @@
use std::env;
use actix_cors::Cors;
use actix_web::{middleware, HttpServer};
use main_error::MainError;
use meilisearch_http::helpers::NormalizePath;
use meilisearch_http::{create_app, Data, Opt};
use structopt::StructOpt;
//mod analytics;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[actix_web::main]
async fn main() -> Result<(), MainError> {
let opt = Opt::from_args();
#[cfg(all(not(debug_assertions), feature = "sentry"))]
let _sentry = sentry::init((
if !opt.no_sentry {
Some(opt.sentry_dsn.clone())
} else {
None
},
sentry::ClientOptions {
release: sentry::release_name!(),
..Default::default()
},
));
match opt.env.as_ref() {
"production" => {
if opt.master_key.is_none() {
return Err(
"In production mode, the environment variable MEILI_MASTER_KEY is mandatory"
.into(),
);
}
#[cfg(all(not(debug_assertions), feature = "sentry"))]
if !opt.no_sentry && _sentry.is_enabled() {
sentry::integrations::panic::register_panic_handler(); // TODO: This shouldn't be needed when upgrading to sentry 0.19.0. These integrations are turned on by default when using `sentry::init`.
sentry::integrations::env_logger::init(None, Default::default());
}
}
"development" => {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
}
_ => unreachable!(),
}
//if let Some(path) = &opt.import_snapshot {
//snapshot::load_snapshot(&opt.db_path, path, opt.ignore_snapshot_if_db_exists, opt.ignore_missing_snapshot)?;
//}
let data = Data::new(opt.clone())?;
//if !opt.no_analytics {
//let analytics_data = data.clone();
//let analytics_opt = opt.clone();
//thread::spawn(move || analytics::analytics_sender(analytics_data, analytics_opt));
//}
//if let Some(path) = &opt.import_dump {
//dump::import_dump(&data, path, opt.dump_batch_size)?;
//}
//if opt.schedule_snapshot {
//snapshot::schedule_snapshot(data.clone(), &opt.snapshot_dir, opt.snapshot_interval_sec.unwrap_or(86400))?;
//}
print_launch_resume(&opt, &data);
let enable_frontend = opt.env != "production";
let http_server = HttpServer::new(move || {
create_app(&data, enable_frontend)
.wrap(
Cors::default()
.send_wildcard()
.allowed_headers(vec!["content-type", "x-meili-api-key"])
.max_age(86_400) // 24h
)
.wrap(middleware::Logger::default())
.wrap(middleware::Compress::default())
.wrap(NormalizePath)
});
if let Some(config) = opt.get_ssl_config()? {
http_server
.bind_rustls(opt.http_addr, config)?
.run()
.await?;
} else {
http_server.bind(opt.http_addr)?.run().await?;
}
Ok(())
}
pub fn print_launch_resume(opt: &Opt, data: &Data) {
let ascii_name = r#"
888b d888 d8b 888 d8b .d8888b. 888
8888b d8888 Y8P 888 Y8P d88P Y88b 888
88888b.d88888 888 Y88b. 888
888Y88888P888 .d88b. 888 888 888 "Y888b. .d88b. 8888b. 888d888 .d8888b 88888b.
888 Y888P 888 d8P Y8b 888 888 888 "Y88b. d8P Y8b "88b 888P" d88P" 888 "88b
888 Y8P 888 88888888 888 888 888 "888 88888888 .d888888 888 888 888 888
888 " 888 Y8b. 888 888 888 Y88b d88P Y8b. 888 888 888 Y88b. 888 888
888 888 "Y8888 888 888 888 "Y8888P" "Y8888 "Y888888 888 "Y8888P 888 888
"#;
eprintln!("{}", ascii_name);
eprintln!("Database path:\t\t{:?}", opt.db_path);
eprintln!("Server listening on:\t{:?}", opt.http_addr);
eprintln!("Environment:\t\t{:?}", opt.env);
eprintln!("Commit SHA:\t\t{:?}", env!("VERGEN_SHA").to_string());
eprintln!(
"Build date:\t\t{:?}",
env!("VERGEN_BUILD_TIMESTAMP").to_string()
);
eprintln!(
"Package version:\t{:?}",
env!("CARGO_PKG_VERSION").to_string()
);
#[cfg(all(not(debug_assertions), feature = "sentry"))]
eprintln!(
"Sentry DSN:\t\t{:?}",
if !opt.no_sentry {
&opt.sentry_dsn
} else {
"Disabled"
}
);
eprintln!(
"Amplitude Analytics:\t{:?}",
if !opt.no_analytics {
"Enabled"
} else {
"Disabled"
}
);
eprintln!();
if data.api_keys().master.is_some() {
eprintln!("A Master Key has been set. Requests to MeiliSearch won't be authorized unless you provide an authentication key.");
} else {
eprintln!("No master key found; The server will accept unidentified requests. \
If you need some protection in development mode, please export a key: export MEILI_MASTER_KEY=xxx");
}
eprintln!();
eprintln!("Documentation:\t\thttps://docs.meilisearch.com");
eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
eprintln!("Contact:\t\thttps://docs.meilisearch.com/resources/contact.html or bonjour@meilisearch.com");
eprintln!();
}

View File

@@ -0,0 +1,294 @@
use std::{error, fs};
use std::io::{BufReader, Read};
use std::path::PathBuf;
use std::sync::Arc;
use byte_unit::Byte;
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
RootCertStore,
};
use grenad::CompressionType;
use structopt::StructOpt;
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory to use for the Grenad buffer. It is recommended
/// to use something like 80%-90% of the available memory.
///
/// It is automatically split by the number of jobs e.g. if you use 7 jobs
/// and 7 GB of max memory, each thread will use a maximum of 1 GB.
#[structopt(long, default_value = "7 GiB")]
pub max_memory: Byte,
/// Size of the linked hash map cache when indexing.
/// The bigger it is, the faster the indexing is but the more memory it takes.
#[structopt(long, default_value = "500")]
pub linked_hash_map_size: usize,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// The number of bytes to remove from the begining of the chunks while reading/sorting
/// or merging them.
///
/// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`,
/// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set.
#[structopt(long, default_value = "4 GiB")]
pub chunk_fusing_shrink_size: Byte,
/// Enable the chunk fusing or not, this reduces the amount of disk space used.
#[structopt(long)]
pub enable_chunk_fusing: bool,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: Byte::from_str("1GiB").unwrap(),
linked_hash_map_size: 500,
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
chunk_fusing_shrink_size: Byte::from_str("4GiB").unwrap(),
enable_chunk_fusing: false,
indexing_jobs: None,
}
}
}
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
#[derive(Debug, Clone, StructOpt)]
pub struct Opt {
/// The destination where the database must be created.
#[structopt(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
pub db_path: PathBuf,
/// The address on which the http server will listen.
#[structopt(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
pub http_addr: String,
/// The master key allowing you to do everything on the server.
#[structopt(long, env = "MEILI_MASTER_KEY")]
pub master_key: Option<String>,
/// The Sentry DSN to use for error reporting. This defaults to the MeiliSearch Sentry project.
/// You can disable sentry all together using the `--no-sentry` flag or `MEILI_NO_SENTRY` environment variable.
#[cfg(all(not(debug_assertions), feature = "sentry"))]
#[structopt(long, env = "SENTRY_DSN", default_value = "https://5ddfa22b95f241198be2271aaf028653@sentry.io/3060337")]
pub sentry_dsn: String,
/// Disable Sentry error reporting.
#[structopt(long, env = "MEILI_NO_SENTRY")]
#[cfg(all(not(debug_assertions), feature = "sentry"))]
pub no_sentry: bool,
/// This environment variable must be set to `production` if you are running in production.
/// If the server is running in development mode more logs will be displayed,
/// and the master key can be avoided which implies that there is no security on the updates routes.
/// This is useful to debug when integrating the engine with another service.
#[structopt(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)]
pub env: String,
/// Do not send analytics to Meili.
#[structopt(long, env = "MEILI_NO_ANALYTICS")]
pub no_analytics: bool,
/// The maximum size, in bytes, of the main lmdb database directory
#[structopt(long, env = "MEILI_MAX_MDB_SIZE", default_value = "100 GiB")]
pub max_mdb_size: Byte,
/// The maximum size, in bytes, of the update lmdb database directory
#[structopt(long, env = "MEILI_MAX_UDB_SIZE", default_value = "10 GiB")]
pub max_udb_size: Byte,
/// The maximum size, in bytes, of accepted JSON payloads
#[structopt(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "10 MiB")]
pub http_payload_size_limit: Byte,
/// Read server certificates from CERTFILE.
/// This should contain PEM-format certificates
/// in the right order (the first certificate should
/// certify KEYFILE, the last should be a root CA).
#[structopt(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))]
pub ssl_cert_path: Option<PathBuf>,
/// Read private key from KEYFILE. This should be a RSA
/// private key or PKCS8-encoded private key, in PEM format.
#[structopt(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))]
pub ssl_key_path: Option<PathBuf>,
/// Enable client authentication, and accept certificates
/// signed by those roots provided in CERTFILE.
#[structopt(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))]
pub ssl_auth_path: Option<PathBuf>,
/// Read DER-encoded OCSP response from OCSPFILE and staple to certificate.
/// Optional
#[structopt(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))]
pub ssl_ocsp_path: Option<PathBuf>,
/// Send a fatal alert if the client does not complete client authentication.
#[structopt(long, env = "MEILI_SSL_REQUIRE_AUTH")]
pub ssl_require_auth: bool,
/// SSL support session resumption
#[structopt(long, env = "MEILI_SSL_RESUMPTION")]
pub ssl_resumption: bool,
/// SSL support tickets.
#[structopt(long, env = "MEILI_SSL_TICKETS")]
pub ssl_tickets: bool,
/// Defines the path of the snapshot file to import.
/// This option will, by default, stop the process if a database already exist or if no snapshot exists at
/// the given path. If this option is not specified no snapshot is imported.
#[structopt(long)]
pub import_snapshot: Option<PathBuf>,
/// The engine will ignore a missing snapshot and not return an error in such case.
#[structopt(long, requires = "import-snapshot")]
pub ignore_missing_snapshot: bool,
/// The engine will skip snapshot importation and not return an error in such case.
#[structopt(long, requires = "import-snapshot")]
pub ignore_snapshot_if_db_exists: bool,
/// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap.
#[structopt(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")]
pub snapshot_dir: PathBuf,
/// Activate snapshot scheduling.
#[structopt(long, env = "MEILI_SCHEDULE_SNAPSHOT")]
pub schedule_snapshot: bool,
/// Defines time interval, in seconds, between each snapshot creation.
#[structopt(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC")]
pub snapshot_interval_sec: Option<u64>,
/// Folder where dumps are created when the dump route is called.
#[structopt(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
pub dumps_dir: PathBuf,
/// Import a dump from the specified path, must be a `.tar.gz` file.
#[structopt(long, conflicts_with = "import-snapshot")]
pub import_dump: Option<PathBuf>,
/// The batch size used in the importation process, the bigger it is the faster the dump is created.
#[structopt(long, env = "MEILI_DUMP_BATCH_SIZE", default_value = "1024")]
pub dump_batch_size: usize,
#[structopt(flatten)]
pub indexer_options: IndexerOpts,
}
impl Opt {
pub fn get_ssl_config(&self) -> Result<Option<rustls::ServerConfig>, Box<dyn error::Error>> {
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
let client_auth = match &self.ssl_auth_path {
Some(auth_path) => {
let roots = load_certs(auth_path.to_path_buf())?;
let mut client_auth_roots = RootCertStore::empty();
for root in roots {
client_auth_roots.add(&root).unwrap();
}
if self.ssl_require_auth {
AllowAnyAuthenticatedClient::new(client_auth_roots)
} else {
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots)
}
}
None => NoClientAuth::new(),
};
let mut config = rustls::ServerConfig::new(client_auth);
config.key_log = Arc::new(rustls::KeyLogFile::new());
let certs = load_certs(cert_path.to_path_buf())?;
let privkey = load_private_key(key_path.to_path_buf())?;
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
config
.set_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
.map_err(|_| "bad certificates/private key")?;
if self.ssl_resumption {
config.set_persistence(rustls::ServerSessionMemoryCache::new(256));
}
if self.ssl_tickets {
config.ticketer = rustls::Ticketer::new();
}
Ok(Some(config))
} else {
Ok(None)
}
}
}
fn load_certs(filename: PathBuf) -> Result<Vec<rustls::Certificate>, Box<dyn error::Error>> {
let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?;
let mut reader = BufReader::new(certfile);
Ok(certs(&mut reader).map_err(|_| "cannot read certificate file")?)
}
fn load_private_key(filename: PathBuf) -> Result<rustls::PrivateKey, Box<dyn error::Error>> {
let rsa_keys = {
let keyfile =
fs::File::open(filename.clone()).map_err(|_| "cannot open private key file")?;
let mut reader = BufReader::new(keyfile);
rsa_private_keys(&mut reader).map_err(|_| "file contains invalid rsa private key")?
};
let pkcs8_keys = {
let keyfile = fs::File::open(filename).map_err(|_| "cannot open private key file")?;
let mut reader = BufReader::new(keyfile);
pkcs8_private_keys(&mut reader)
.map_err(|_| "file contains invalid pkcs8 private key (encrypted keys not supported)")?
};
// prefer to load pkcs8 keys
if !pkcs8_keys.is_empty() {
Ok(pkcs8_keys[0].clone())
} else {
assert!(!rsa_keys.is_empty());
Ok(rsa_keys[0].clone())
}
}
fn load_ocsp(filename: &Option<PathBuf>) -> Result<Vec<u8>, Box<dyn error::Error>> {
let mut ret = Vec::new();
if let Some(ref name) = filename {
fs::File::open(name)
.map_err(|_| "cannot open ocsp file")?
.read_to_end(&mut ret)
.map_err(|_| "cannot read oscp file")?;
}
Ok(ret)
}

View File

@@ -0,0 +1,262 @@
use actix_web::web::Payload;
use actix_web::{delete, get, post, put};
use actix_web::{web, HttpResponse};
use indexmap::IndexMap;
use log::error;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use serde::Deserialize;
use serde_json::Value;
use crate::Data;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0;
const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20;
macro_rules! guard_content_type {
($fn_name:ident, $guard_value:literal) => {
fn $fn_name(head: &actix_web::dev::RequestHead) -> bool {
if let Some(content_type) = head.headers.get("Content-Type") {
content_type.to_str().map(|v| v.contains($guard_value)).unwrap_or(false)
} else {
false
}
}
};
}
guard_content_type!(guard_json, "application/json");
type Document = IndexMap<String, Value>;
#[derive(Deserialize)]
struct DocumentParam {
index_uid: String,
document_id: String,
}
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(get_document)
.service(delete_document)
.service(get_all_documents)
.service(add_documents_json)
.service(update_documents)
.service(delete_documents)
.service(clear_all_documents);
}
#[get(
"/indexes/{index_uid}/documents/{document_id}",
wrap = "Authentication::Public"
)]
async fn get_document(
data: web::Data<Data>,
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
let index = path.index_uid.clone();
let id = path.document_id.clone();
match data.retrieve_document(index, id, None as Option<Vec<String>>).await {
Ok(document) => {
let json = serde_json::to_string(&document).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[delete(
"/indexes/{index_uid}/documents/{document_id}",
wrap = "Authentication::Private"
)]
async fn delete_document(
data: web::Data<Data>,
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
match data.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()]).await {
Ok(result) => {
let json = serde_json::to_string(&result).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct BrowseQuery {
offset: Option<usize>,
limit: Option<usize>,
attributes_to_retrieve: Option<String>,
}
#[get("/indexes/{index_uid}/documents", wrap = "Authentication::Public")]
async fn get_all_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> {
let attributes_to_retrieve = params
.attributes_to_retrieve
.as_ref()
.map(|attrs| attrs
.split(",")
.map(String::from)
.collect::<Vec<_>>());
match data.retrieve_documents(
path.index_uid.clone(),
params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET),
params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT),
attributes_to_retrieve).await {
Ok(docs) => {
let json = serde_json::to_string(&docs).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct UpdateDocumentsQuery {
primary_key: Option<String>,
}
/// Route used when the payload type is "application/json"
#[post(
"/indexes/{index_uid}/documents",
wrap = "Authentication::Private",
guard = "guard_json"
)]
async fn add_documents_json(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
) -> Result<HttpResponse, ResponseError> {
let addition_result = data
.add_documents(
path.into_inner().index_uid,
IndexDocumentsMethod::ReplaceDocuments,
UpdateFormat::Json,
body,
params.primary_key.clone(),
).await;
match addition_result {
Ok(update) => {
let value = serde_json::to_string(&update).unwrap();
let response = HttpResponse::Ok().body(value);
Ok(response)
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
/// Default route for adding documents, this should return an error and redirect to the documentation
#[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
async fn add_documents_default(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
_params: web::Query<UpdateDocumentsQuery>,
_body: web::Json<Vec<Document>>,
) -> Result<HttpResponse, ResponseError> {
error!("Unknown document type");
todo!()
}
/// Default route for adding documents, this should return an error and redirect to the documentation
#[put("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
async fn update_documents_default(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
_params: web::Query<UpdateDocumentsQuery>,
_body: web::Json<Vec<Document>>,
) -> Result<HttpResponse, ResponseError> {
error!("Unknown document type");
todo!()
}
#[put(
"/indexes/{index_uid}/documents",
wrap = "Authentication::Private",
guard = "guard_json",
)]
async fn update_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: web::Payload,
) -> Result<HttpResponse, ResponseError> {
let addition_result = data
.add_documents(
path.into_inner().index_uid,
IndexDocumentsMethod::UpdateDocuments,
UpdateFormat::Json,
body,
params.primary_key.clone(),
).await;
match addition_result {
Ok(update) => {
let value = serde_json::to_string(&update).unwrap();
let response = HttpResponse::Ok().body(value);
Ok(response)
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[post(
"/indexes/{index_uid}/documents/delete-batch",
wrap = "Authentication::Private"
)]
async fn delete_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
body: web::Json<Vec<Value>>,
) -> Result<HttpResponse, ResponseError> {
let ids = body
.iter()
.map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string()))
.collect();
match data.delete_documents(path.index_uid.clone(), ids).await {
Ok(result) => {
let json = serde_json::to_string(&result).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[delete("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
async fn clear_all_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
match data.clear_documents(path.index_uid.clone()).await {
Ok(update) => {
let json = serde_json::to_string(&update).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}

View File

@@ -0,0 +1,42 @@
use std::fs::File;
use std::path::Path;
use actix_web::{get, post};
use actix_web::{HttpResponse, web};
use serde::{Deserialize, Serialize};
use crate::dump::{DumpInfo, DumpStatus, compressed_dumps_dir, init_dump_process};
use crate::Data;
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(trigger_dump)
.service(get_dump_status);
}
#[post("/dumps", wrap = "Authentication::Private")]
async fn trigger_dump(
data: web::Data<Data>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct DumpStatusResponse {
status: String,
}
#[derive(Deserialize)]
struct DumpParam {
dump_uid: String,
}
#[get("/dumps/{dump_uid}/status", wrap = "Authentication::Private")]
async fn get_dump_status(
data: web::Data<Data>,
path: web::Path<DumpParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}

View File

@@ -0,0 +1,13 @@
use actix_web::get;
use actix_web::{web, HttpResponse};
use crate::error::ResponseError;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(get_health);
}
#[get("/health")]
async fn get_health() -> Result<HttpResponse, ResponseError> {
Ok(HttpResponse::NoContent().finish())
}

View File

@@ -0,0 +1,167 @@
use actix_web::{delete, get, post, put};
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::Data;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(list_indexes)
.service(get_index)
.service(create_index)
.service(update_index)
.service(delete_index)
.service(get_update_status)
.service(get_all_updates_status);
}
#[get("/indexes", wrap = "Authentication::Private")]
async fn list_indexes(data: web::Data<Data>) -> Result<HttpResponse, ResponseError> {
match data.list_indexes() {
Ok(indexes) => {
let json = serde_json::to_string(&indexes).unwrap();
Ok(HttpResponse::Ok().body(&json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[get("/indexes/{index_uid}", wrap = "Authentication::Private")]
async fn get_index(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
match data.index(&path.index_uid)? {
Some(meta) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
}
None => {
let e = format!("Index {:?} doesn't exist.", path.index_uid);
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct IndexCreateRequest {
uid: String,
primary_key: Option<String>,
}
#[post("/indexes", wrap = "Authentication::Private")]
async fn create_index(
data: web::Data<Data>,
body: web::Json<IndexCreateRequest>,
) -> Result<HttpResponse, ResponseError> {
match data.create_index(&body.uid, body.primary_key.clone()) {
Ok(meta) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct UpdateIndexRequest {
name: Option<String>,
primary_key: Option<String>,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct UpdateIndexResponse {
name: String,
uid: String,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
primary_key: Option<String>,
}
#[put("/indexes/{index_uid}", wrap = "Authentication::Private")]
async fn update_index(
data: web::Data<Data>,
path: web::Path<IndexParam>,
body: web::Json<UpdateIndexRequest>,
) -> Result<HttpResponse, ResponseError> {
match data.update_index(&path.index_uid, body.primary_key.as_ref(), body.name.as_ref()) {
Ok(meta) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[delete("/indexes/{index_uid}", wrap = "Authentication::Private")]
async fn delete_index(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
match data.delete_index(path.index_uid.clone()).await {
Ok(_) => Ok(HttpResponse::Ok().finish()),
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[derive(Deserialize)]
struct UpdateParam {
index_uid: String,
update_id: u64,
}
#[get(
"/indexes/{index_uid}/updates/{update_id}",
wrap = "Authentication::Private"
)]
async fn get_update_status(
data: web::Data<Data>,
path: web::Path<UpdateParam>,
) -> Result<HttpResponse, ResponseError> {
let result = data.get_update_status(&path.index_uid, path.update_id);
match result {
Ok(Some(meta)) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Ok(None) => {
let e = format!("udpate {} for index {:?} doesn't exists.", path.update_id, path.index_uid);
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[get("/indexes/{index_uid}/updates", wrap = "Authentication::Private")]
async fn get_all_updates_status(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let result = data.get_updates_status(&path.index_uid);
match result {
Ok(metas) => {
let json = serde_json::to_string(&metas).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}

View File

@@ -0,0 +1,26 @@
use actix_web::web;
use actix_web::HttpResponse;
use actix_web::get;
use serde::Serialize;
use crate::helpers::Authentication;
use crate::Data;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(list);
}
#[derive(Serialize)]
struct KeysResponse {
private: Option<String>,
public: Option<String>,
}
#[get("/keys", wrap = "Authentication::Admin")]
async fn list(data: web::Data<Data>) -> HttpResponse {
let api_keys = data.api_keys.clone();
HttpResponse::Ok().json(KeysResponse {
private: api_keys.private,
public: api_keys.public,
})
}

View File

@@ -0,0 +1,44 @@
use actix_web::{get, HttpResponse};
use serde::{Deserialize, Serialize};
pub mod document;
pub mod health;
pub mod index;
pub mod key;
pub mod search;
pub mod settings;
pub mod stats;
pub mod stop_words;
pub mod synonym;
//pub mod dump;
#[derive(Deserialize)]
pub struct IndexParam {
index_uid: String,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
pub struct IndexUpdateResponse {
pub update_id: u64,
}
impl IndexUpdateResponse {
pub fn with_id(update_id: u64) -> Self {
Self { update_id }
}
}
#[get("/")]
pub async fn load_html() -> HttpResponse {
HttpResponse::Ok()
.content_type("text/html; charset=utf-8")
.body(include_str!("../../public/interface.html").to_string())
}
#[get("/bulma.min.css")]
pub async fn load_css() -> HttpResponse {
HttpResponse::Ok()
.content_type("text/css; charset=utf-8")
.body(include_str!("../../public/bulma.min.css").to_string())
}

View File

@@ -0,0 +1,114 @@
use std::collections::HashSet;
use std::convert::{TryFrom, TryInto};
use actix_web::{get, post, web, HttpResponse};
use serde::Deserialize;
use crate::data::{SearchQuery, DEFAULT_SEARCH_LIMIT};
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
use crate::Data;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(search_with_post).service(search_with_url_query);
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SearchQueryGet {
q: Option<String>,
offset: Option<usize>,
limit: Option<usize>,
attributes_to_retrieve: Option<String>,
attributes_to_crop: Option<String>,
crop_length: Option<usize>,
attributes_to_highlight: Option<String>,
filters: Option<String>,
matches: Option<bool>,
facet_filters: Option<String>,
facet_distributions: Option<String>,
}
impl TryFrom<SearchQueryGet> for SearchQuery {
type Error = anyhow::Error;
fn try_from(other: SearchQueryGet) -> anyhow::Result<Self> {
let attributes_to_retrieve = other
.attributes_to_retrieve
.map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>());
let attributes_to_crop = other
.attributes_to_crop
.map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>());
let attributes_to_highlight = other
.attributes_to_highlight
.map(|attrs| attrs.split(",").map(String::from).collect::<HashSet<_>>());
let facet_distributions = other
.facet_distributions
.map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>());
let facet_filters = match other.facet_filters {
Some(ref f) => Some(serde_json::from_str(f)?),
None => None,
};
Ok(Self {
q: other.q,
offset: other.offset,
limit: other.limit.unwrap_or(DEFAULT_SEARCH_LIMIT),
attributes_to_retrieve,
attributes_to_crop,
crop_length: other.crop_length,
attributes_to_highlight,
filters: other.filters,
matches: other.matches,
facet_filters,
facet_distributions,
})
}
}
#[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
async fn search_with_url_query(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<SearchQueryGet>,
) -> Result<HttpResponse, ResponseError> {
let query: SearchQuery = match params.into_inner().try_into() {
Ok(q) => q,
Err(e) => {
return Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
};
let search_result = data.search(&path.index_uid, query);
match search_result {
Ok(docs) => {
let docs = serde_json::to_string(&docs).unwrap();
Ok(HttpResponse::Ok().body(docs))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[post("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
async fn search_with_post(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Json<SearchQuery>,
) -> Result<HttpResponse, ResponseError> {
let search_result = data.search(&path.index_uid, params.into_inner());
match search_result {
Ok(docs) => {
let docs = serde_json::to_string(&docs).unwrap();
Ok(HttpResponse::Ok().body(docs))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}

View File

@@ -0,0 +1,43 @@
use actix_web::{web, HttpResponse, get};
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::make_update_delete_routes;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/attributes-for-faceting",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = data
.db
.load()
.open_index(&index_uid.as_ref())
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
let attributes_for_faceting = data.db.load().main_read::<_, _, ResponseError>(|reader| {
let schema = index.main.schema(reader)?;
let attrs = index.main.attributes_for_faceting(reader)?;
let attr_names = match (&schema, &attrs) {
(Some(schema), Some(attrs)) => attrs
.iter()
.filter_map(|&id| schema.name(id))
.map(str::to_string)
.collect(),
_ => vec![],
};
Ok(attr_names)
})?;
Ok(HttpResponse::Ok().json(attributes_for_faceting))
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/attributes-for-faceting",
Vec<String>,
attributes_for_faceting
);

View File

@@ -0,0 +1,25 @@
use std::collections::HashSet;
use actix_web::{web, HttpResponse, get};
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::make_update_delete_routes;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/displayed-attributes",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/displayed-attributes",
HashSet<String>,
displayed_attributes
);

View File

@@ -0,0 +1,36 @@
use crate::make_update_delete_routes;
use actix_web::{web, HttpResponse, get};
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/distinct-attribute",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = data
.db
.load()
.open_index(&index_uid.as_ref())
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
let reader = data.db.load().main_read_txn()?;
let distinct_attribute_id = index.main.distinct_attribute(&reader)?;
let schema = index.main.schema(&reader)?;
let distinct_attribute = match (schema, distinct_attribute_id) {
(Some(schema), Some(id)) => schema.name(id).map(str::to_string),
_ => None,
};
Ok(HttpResponse::Ok().json(distinct_attribute))
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/distinct-attribute",
String,
distinct_attribute
);

View File

@@ -0,0 +1,183 @@
use actix_web::{web, HttpResponse, delete, get, post};
use crate::Data;
use crate::error::ResponseError;
use crate::index_controller::Settings;
use crate::helpers::Authentication;
#[macro_export]
macro_rules! make_setting_route {
($route:literal, $type:ty, $attr:ident) => {
mod $attr {
use actix_web::{web, HttpResponse};
use crate::data;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::index_controller::Settings;
#[actix_web::delete($route, wrap = "Authentication::Private")]
pub async fn delete(
data: web::Data<data::Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
use crate::index_controller::Settings;
let settings = Settings {
$attr: Some(None),
..Default::default()
};
match data.update_settings(index_uid.into_inner(), settings).await {
Ok(update_status) => {
let json = serde_json::to_string(&update_status).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[actix_web::post($route, wrap = "Authentication::Private")]
pub async fn update(
data: actix_web::web::Data<data::Data>,
index_uid: actix_web::web::Path<String>,
body: actix_web::web::Json<Option<$type>>,
) -> std::result::Result<HttpResponse, ResponseError> {
let settings = Settings {
$attr: Some(body.into_inner()),
..Default::default()
};
match data.update_settings(index_uid.into_inner(), settings).await {
Ok(update_status) => {
let json = serde_json::to_string(&update_status).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[actix_web::get($route, wrap = "Authentication::Private")]
pub async fn get(
data: actix_web::web::Data<data::Data>,
index_uid: actix_web::web::Path<String>,
) -> std::result::Result<HttpResponse, ResponseError> {
match data.settings(index_uid.as_ref()) {
Ok(settings) => {
let setting = settings.$attr;
let json = serde_json::to_string(&setting).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
}
};
}
make_setting_route!(
"/indexes/{index_uid}/settings/attributes-for-faceting",
std::collections::HashMap<String, String>,
faceted_attributes
);
make_setting_route!(
"/indexes/{index_uid}/settings/displayed-attributes",
Vec<String>,
displayed_attributes
);
make_setting_route!(
"/indexes/{index_uid}/settings/searchable-attributes",
Vec<String>,
searchable_attributes
);
//make_setting_route!(
//"/indexes/{index_uid}/settings/distinct-attribute",
//String,
//distinct_attribute
//);
//make_setting_route!(
//"/indexes/{index_uid}/settings/ranking-rules",
//Vec<String>,
//ranking_rules
//);
macro_rules! create_services {
($($mod:ident),*) => {
pub fn services(cfg: &mut web::ServiceConfig) {
cfg
.service(update_all)
.service(get_all)
.service(delete_all)
$(
.service($mod::get)
.service($mod::update)
.service($mod::delete)
)*;
}
};
}
create_services!(
faceted_attributes,
displayed_attributes,
searchable_attributes
);
#[post("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
async fn update_all(
data: web::Data<Data>,
index_uid: web::Path<String>,
body: web::Json<Settings>,
) -> Result<HttpResponse, ResponseError> {
match data.update_settings(index_uid.into_inner(), body.into_inner()).await {
Ok(update_result) => {
let json = serde_json::to_string(&update_result).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[get("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
async fn get_all(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
match data.settings(index_uid.as_ref()) {
Ok(settings) => {
let json = serde_json::to_string(&settings).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[delete("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
async fn delete_all(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let settings = Settings::cleared();
match data.update_settings(index_uid.into_inner(), settings).await {
Ok(update_result) => {
let json = serde_json::to_string(&update_result).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}

View File

@@ -0,0 +1,23 @@
use crate::make_update_delete_routes;
use actix_web::{web, HttpResponse, get};
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/ranking-rules",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/ranking-rules",
Vec<String>,
ranking_rules
);

View File

@@ -0,0 +1,34 @@
use actix_web::{web, HttpResponse, get};
use crate::data::get_indexed_attributes;
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::make_update_delete_routes;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/searchable-attributes",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = data
.db
.load()
.open_index(&index_uid.as_ref())
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
let reader = data.db.load().main_read_txn()?;
let schema = index.main.schema(&reader)?;
let searchable_attributes: Option<Vec<String>> = schema.as_ref().map(get_indexed_attributes);
Ok(HttpResponse::Ok().json(searchable_attributes))
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/searchable-attributes",
Vec<String>,
searchable_attributes
);

View File

@@ -0,0 +1,33 @@
use std::collections::BTreeSet;
use crate::make_update_delete_routes;
use actix_web::{web, HttpResponse, get};
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/stop-words",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = data
.db
.load()
.open_index(&index_uid.as_ref())
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
let reader = data.db.load().main_read_txn()?;
let stop_words = index.main.stop_words(&reader)?;
Ok(HttpResponse::Ok().json(stop_words))
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/stop-words",
BTreeSet<String>,
stop_words
);

View File

@@ -0,0 +1,43 @@
use std::collections::BTreeMap;
use actix_web::{web, HttpResponse, get};
use indexmap::IndexMap;
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::make_update_delete_routes;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/synonyms",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = data
.db
.load()
.open_index(&index_uid.as_ref())
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
let reader = data.db.load().main_read_txn()?;
let synonyms_list = index.main.synonyms(&reader)?;
let mut synonyms = IndexMap::new();
let index_synonyms = &index.synonyms;
for synonym in synonyms_list {
let list = index_synonyms.synonyms(&reader, synonym.as_bytes())?;
synonyms.insert(synonym, list);
}
Ok(HttpResponse::Ok().json(synonyms))
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/synonyms",
BTreeMap<String, Vec<String>>,
synonyms
);

View File

@@ -0,0 +1,60 @@
use std::collections::{HashMap, BTreeMap};
use actix_web::web;
use actix_web::HttpResponse;
use actix_web::get;
use chrono::{DateTime, Utc};
use serde::Serialize;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
use crate::Data;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(index_stats)
.service(get_stats)
.service(get_version);
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct IndexStatsResponse {
number_of_documents: u64,
is_indexing: bool,
fields_distribution: BTreeMap<String, usize>,
}
#[get("/indexes/{index_uid}/stats", wrap = "Authentication::Private")]
async fn index_stats(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct StatsResult {
database_size: u64,
last_update: Option<DateTime<Utc>>,
indexes: HashMap<String, IndexStatsResponse>,
}
#[get("/stats", wrap = "Authentication::Private")]
async fn get_stats(_data: web::Data<Data>) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct VersionResponse {
commit_sha: String,
build_date: String,
pkg_version: String,
}
#[get("/version", wrap = "Authentication::Private")]
async fn get_version() -> HttpResponse {
todo!()
}

View File

@@ -0,0 +1,46 @@
use actix_web::{web, HttpResponse};
use actix_web::{delete, get, post};
use std::collections::BTreeSet;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
use crate::Data;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(get).service(update).service(delete);
}
#[get(
"/indexes/{index_uid}/settings/stop-words",
wrap = "Authentication::Private"
)]
async fn get(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[post(
"/indexes/{index_uid}/settings/stop-words",
wrap = "Authentication::Private"
)]
async fn update(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
_body: web::Json<BTreeSet<String>>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[delete(
"/indexes/{index_uid}/settings/stop-words",
wrap = "Authentication::Private"
)]
async fn delete(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}

View File

@@ -0,0 +1,47 @@
use std::collections::BTreeMap;
use actix_web::{web, HttpResponse};
use actix_web::{delete, get, post};
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
use crate::Data;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(get).service(update).service(delete);
}
#[get(
"/indexes/{index_uid}/settings/synonyms",
wrap = "Authentication::Private"
)]
async fn get(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[post(
"/indexes/{index_uid}/settings/synonyms",
wrap = "Authentication::Private"
)]
async fn update(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
_body: web::Json<BTreeMap<String, Vec<String>>>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[delete(
"/indexes/{index_uid}/settings/synonyms",
wrap = "Authentication::Private"
)]
async fn delete(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}

View File

@@ -0,0 +1,96 @@
use crate::Data;
use crate::error::Error;
use crate::helpers::compression;
use log::error;
use std::fs::create_dir_all;
use std::path::Path;
use std::thread;
use std::time::{Duration};
use tempfile::TempDir;
pub fn load_snapshot(
db_path: &str,
snapshot_path: &Path,
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool
) -> Result<(), Error> {
let db_path = Path::new(db_path);
if !db_path.exists() && snapshot_path.exists() {
compression::from_tar_gz(snapshot_path, db_path)
} else if db_path.exists() && !ignore_snapshot_if_db_exists {
Err(Error::Internal(format!("database already exists at {:?}, try to delete it or rename it", db_path.canonicalize().unwrap_or(db_path.into()))))
} else if !snapshot_path.exists() && !ignore_missing_snapshot {
Err(Error::Internal(format!("snapshot doesn't exist at {:?}", snapshot_path.canonicalize().unwrap_or(snapshot_path.into()))))
} else {
Ok(())
}
}
pub fn create_snapshot(data: &Data, snapshot_path: &Path) -> Result<(), Error> {
let tmp_dir = TempDir::new()?;
data.db.copy_and_compact_to_path(tmp_dir.path())?;
compression::to_tar_gz(tmp_dir.path(), snapshot_path).map_err(|e| Error::Internal(format!("something went wrong during snapshot compression: {}", e)))
}
pub fn schedule_snapshot(data: Data, snapshot_dir: &Path, time_gap_s: u64) -> Result<(), Error> {
if snapshot_dir.file_name().is_none() {
return Err(Error::Internal("invalid snapshot file path".to_string()));
}
let db_name = Path::new(&data.db_path).file_name().ok_or_else(|| Error::Internal("invalid database name".to_string()))?;
create_dir_all(snapshot_dir)?;
let snapshot_path = snapshot_dir.join(format!("{}.snapshot", db_name.to_str().unwrap_or("data.ms")));
thread::spawn(move || loop {
if let Err(e) = create_snapshot(&data, &snapshot_path) {
error!("Unsuccessful snapshot creation: {}", e);
}
thread::sleep(Duration::from_secs(time_gap_s));
});
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::prelude::*;
use std::fs;
#[test]
fn test_pack_unpack() {
let tempdir = TempDir::new().unwrap();
let test_dir = tempdir.path();
let src_dir = test_dir.join("src");
let dest_dir = test_dir.join("complex/destination/path/");
let archive_path = test_dir.join("archive.snapshot");
let file_1_relative = Path::new("file1.txt");
let subdir_relative = Path::new("subdir/");
let file_2_relative = Path::new("subdir/file2.txt");
create_dir_all(src_dir.join(subdir_relative)).unwrap();
fs::File::create(src_dir.join(file_1_relative)).unwrap().write_all(b"Hello_file_1").unwrap();
fs::File::create(src_dir.join(file_2_relative)).unwrap().write_all(b"Hello_file_2").unwrap();
assert!(compression::to_tar_gz(&src_dir, &archive_path).is_ok());
assert!(archive_path.exists());
assert!(load_snapshot(&dest_dir.to_str().unwrap(), &archive_path, false, false).is_ok());
assert!(dest_dir.exists());
assert!(dest_dir.join(file_1_relative).exists());
assert!(dest_dir.join(subdir_relative).exists());
assert!(dest_dir.join(file_2_relative).exists());
let contents = fs::read_to_string(dest_dir.join(file_1_relative)).unwrap();
assert_eq!(contents, "Hello_file_1");
let contents = fs::read_to_string(dest_dir.join(file_2_relative)).unwrap();
assert_eq!(contents, "Hello_file_2");
}
}