split meilisearch-http and meilisearch-lib

This commit is contained in:
mpostma
2021-09-21 13:23:22 +02:00
parent 09d4e37044
commit 60518449fc
63 changed files with 608 additions and 324 deletions

79
Cargo.lock generated
View File

@@ -872,6 +872,15 @@ dependencies = [
"termcolor",
]
[[package]]
name = "erased-serde"
version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3de9ad4541d99dc22b59134e7ff8dc3d6c988c89ecd7324bf10a8362b07a2afa"
dependencies = [
"serde",
]
[[package]]
name = "fake-simd"
version = "0.1.2"
@@ -1571,6 +1580,7 @@ name = "meilisearch-error"
version = "0.22.0"
dependencies = [
"actix-http",
"serde",
]
[[package]]
@@ -1606,6 +1616,7 @@ dependencies = [
"log",
"main_error",
"meilisearch-error",
"meilisearch-lib",
"meilisearch-tokenizer",
"memmap",
"milli",
@@ -1646,6 +1657,74 @@ dependencies = [
"zip",
]
[[package]]
name = "meilisearch-lib"
version = "0.1.0"
dependencies = [
"actix-cors",
"actix-rt",
"actix-web",
"actix-web-static-files",
"anyhow",
"arc-swap",
"assert-json-diff",
"async-stream",
"async-trait",
"byte-unit",
"bytes",
"chrono",
"crossbeam-channel",
"either",
"env_logger",
"erased-serde",
"flate2",
"fst",
"futures",
"futures-util",
"heed",
"http",
"indexmap",
"itertools",
"log",
"main_error",
"meilisearch-error",
"meilisearch-tokenizer",
"memmap",
"milli",
"mime",
"mockall",
"num_cpus",
"obkv",
"once_cell",
"parking_lot",
"paste",
"pin-project",
"rand 0.8.4",
"rayon",
"regex",
"reqwest",
"rustls",
"serde",
"serde_json",
"serde_url_params",
"serdeval",
"sha2",
"siphasher",
"slice-group-by",
"structopt",
"sysinfo",
"tar",
"tempdir",
"tempfile",
"thiserror",
"tokio",
"tokio-stream",
"urlencoding",
"uuid",
"walkdir",
"whoami",
]
[[package]]
name = "meilisearch-tokenizer"
version = "0.2.5"

View File

@@ -2,6 +2,7 @@
members = [
"meilisearch-http",
"meilisearch-error",
"meilisearch-lib",
]
[profile.release]

View File

@@ -6,3 +6,4 @@ edition = "2018"
[dependencies]
actix-http = "=3.0.0-beta.10"
serde = { version = "1.0.130", features = ["derive"] }

View File

@@ -1,6 +1,7 @@
use std::fmt;
use actix_http::http::StatusCode;
use serde::{Serialize, Deserialize};
pub trait ErrorCode: std::error::Error {
fn error_code(&self) -> Code;
@@ -45,6 +46,7 @@ impl fmt::Display for ErrorType {
}
}
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code {
// index related error
CreateIndex,

View File

@@ -45,6 +45,7 @@ indexmap = { version = "1.7.0", features = ["serde-1"] }
itertools = "0.10.1"
log = "0.4.14"
main_error = "0.1.1"
meilisearch-lib = { path = "../meilisearch-lib" }
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
memmap = "0.7.0"

View File

@@ -4,8 +4,8 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use log::debug;
use serde::Serialize;
use siphasher::sip::SipHasher;
use meilisearch_lib::MeiliSearch;
use crate::Data;
use crate::Opt;
const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47";
@@ -18,8 +18,8 @@ struct EventProperties {
}
impl EventProperties {
async fn from(data: Data) -> anyhow::Result<EventProperties> {
let stats = data.index_controller.get_all_stats().await?;
async fn from(data: MeiliSearch) -> anyhow::Result<EventProperties> {
let stats = data.get_all_stats().await?;
let database_size = stats.database_size;
let last_update_timestamp = stats.last_update.map(|u| u.timestamp());
@@ -62,7 +62,7 @@ struct AmplitudeRequest<'a> {
events: Vec<Event<'a>>,
}
pub async fn analytics_sender(data: Data, opt: Opt) {
pub async fn analytics_sender(data: MeiliSearch, opt: Opt) {
let username = whoami::username();
let hostname = whoami::hostname();
let platform = whoami::platform();

View File

@@ -1,86 +0,0 @@
use std::ops::Deref;
use std::sync::Arc;
use crate::index::{Checked, Settings};
use crate::index_controller::{
error::Result, DumpInfo, IndexController, IndexMetadata, IndexStats, Stats,
};
use crate::option::Opt;
pub mod search;
mod updates;
#[derive(Clone)]
pub struct Data {
inner: Arc<DataInner>,
}
impl Deref for Data {
type Target = DataInner;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
pub struct DataInner {
pub index_controller: IndexController,
//pub api_keys: ApiKeys,
}
impl Data {
pub fn new(options: Opt) -> anyhow::Result<Data> {
let path = options.db_path.clone();
let index_controller = IndexController::new(&path, &options)?;
let inner = DataInner {
index_controller,
};
let inner = Arc::new(inner);
Ok(Data { inner })
}
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
self.index_controller.settings(uid).await
}
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
self.index_controller.list_indexes().await
}
pub async fn index(&self, uid: String) -> Result<IndexMetadata> {
self.index_controller.get_index(uid).await
}
//pub async fn create_index(
//&self,
//uid: String,
//primary_key: Option<String>,
//) -> Result<IndexMetadata> {
//let settings = IndexSettings {
//uid: Some(uid),
//primary_key,
//};
//let meta = self.index_controller.create_index(settings).await?;
//Ok(meta)
//}
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
Ok(self.index_controller.get_index_stats(uid).await?)
}
pub async fn get_all_stats(&self) -> Result<Stats> {
Ok(self.index_controller.get_all_stats().await?)
}
pub async fn create_dump(&self) -> Result<DumpInfo> {
Ok(self.index_controller.create_dump().await?)
}
pub async fn dump_status(&self, uid: String) -> Result<DumpInfo> {
Ok(self.index_controller.dump_info(uid).await?)
}
}

View File

@@ -1,34 +0,0 @@
use serde_json::{Map, Value};
use super::Data;
use crate::index::{SearchQuery, SearchResult};
use crate::index_controller::error::Result;
impl Data {
pub async fn search(&self, index: String, search_query: SearchQuery) -> Result<SearchResult> {
self.index_controller.search(index, search_query).await
}
pub async fn retrieve_documents(
&self,
index: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Map<String, Value>>> {
self.index_controller
.documents(index, offset, limit, attributes_to_retrieve)
.await
}
pub async fn retrieve_document(
&self,
index: String,
document_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Map<String, Value>> {
self.index_controller
.document(index, document_id, attributes_to_retrieve)
.await
}
}

View File

@@ -1,32 +0,0 @@
use crate::index_controller::Update;
use crate::index_controller::{error::Result, IndexMetadata, IndexSettings, UpdateStatus};
use crate::Data;
impl Data {
pub async fn register_update(&self, index_uid: &str, update: Update) -> Result<UpdateStatus> {
let status = self.index_controller.register_update(index_uid, update).await?;
Ok(status)
}
pub async fn get_update_status(&self, index: String, uid: u64) -> Result<UpdateStatus> {
self.index_controller.update_status(index, uid).await
}
pub async fn get_updates_status(&self, index: String) -> Result<Vec<UpdateStatus>> {
self.index_controller.all_update_status(index).await
}
pub async fn update_index(
&self,
uid: String,
primary_key: Option<String>,
new_uid: Option<String>,
) -> Result<IndexMetadata> {
let settings = IndexSettings {
uid: new_uid,
primary_key,
};
self.index_controller.update_index(uid, settings).await
}
}

View File

@@ -55,18 +55,6 @@ impl aweb::error::ResponseError for ResponseError {
}
}
macro_rules! internal_error {
($target:ty : $($other:path), *) => {
$(
impl From<$other> for $target {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
#[derive(Debug)]
pub struct MilliError<'a>(pub &'a milli::Error);

View File

@@ -38,7 +38,6 @@
//! Most of the routes use [extractors] to handle the authentication.
#![allow(rustdoc::private_intra_doc_links)]
pub mod data;
#[macro_use]
pub mod error;
#[macro_use]
@@ -46,11 +45,8 @@ pub mod extractors;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub mod analytics;
pub mod helpers;
mod index;
mod index_controller;
pub mod option;
pub mod routes;
pub use self::data::Data;
use crate::extractors::authentication::AuthConfig;
pub use option::Opt;
@@ -58,6 +54,7 @@ use actix_web::web;
use extractors::authentication::policies::*;
use extractors::payload::PayloadConfig;
use meilisearch_lib::MeiliSearch;
use sha2::Digest;
#[derive(Clone)]
@@ -86,14 +83,14 @@ impl ApiKeys {
pub fn configure_data(
config: &mut web::ServiceConfig,
data: Data,
data: MeiliSearch,
opt: &Opt,
) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
config
.app_data(web::Data::new(data.clone()))
// TODO!: Why are we passing the data with two different things?
.app_data(data)
// TODO!: Why are we passing the data with two different things?
//.app_data(data)
.app_data(
web::JsonConfig::default()
.limit(http_payload_size_limit)

View File

@@ -1,7 +1,8 @@
use std::env;
use actix_web::HttpServer;
use meilisearch_http::{create_app, Data, Opt};
use meilisearch_http::{create_app, Opt};
use meilisearch_lib::MeiliSearch;
use structopt::StructOpt;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
@@ -39,6 +40,26 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
Ok(())
}
fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
let mut meilisearch = MeiliSearch::builder();
meilisearch
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
.set_max_update_store_size(opt.max_udb_size.get_bytes() as usize)
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
.set_dump_dst(opt.dumps_dir.clone())
.set_snapshot_dir(opt.snapshot_dir.clone());
if let Some(ref path) = opt.import_snapshot {
meilisearch.set_import_snapshot(path.clone());
}
if let Some(ref path) = opt.import_dump {
meilisearch.set_dump_src(path.clone());
}
meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone())
}
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
@@ -55,23 +76,23 @@ async fn main() -> anyhow::Result<()> {
_ => unreachable!(),
}
let data = Data::new(opt.clone())?;
let meilisearch = setup_meilisearch(&opt)?;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
if !opt.no_analytics {
let analytics_data = data.clone();
let analytics_data = meilisearch.clone();
let analytics_opt = opt.clone();
tokio::task::spawn(analytics::analytics_sender(analytics_data, analytics_opt));
}
print_launch_resume(&opt);
run_http(data, opt).await?;
run_http(meilisearch, opt).await?;
Ok(())
}
async fn run_http(data: Data, opt: Opt) -> anyhow::Result<()> {
async fn run_http(data: MeiliSearch, opt: Opt) -> anyhow::Result<()> {
let _enable_dashboard = &opt.env == "development";
let opt_clone = opt.clone();
let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone))

View File

@@ -8,7 +8,6 @@ use std::sync::Arc;
use std::fs;
use byte_unit::Byte;
use milli::CompressionType;
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
@@ -16,56 +15,7 @@ use rustls::{
};
use structopt::StructOpt;
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
/// of the available memory. It is recommended to use something like 80%-90%
/// of the available memory, no more.
///
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[structopt(long, default_value)]
pub max_memory: MaxMemory,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: MaxMemory::default(),
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
indexing_jobs: None,
}
}
}
use meilisearch_lib::options::IndexerOpts;
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];

View File

@@ -1,17 +1,17 @@
use actix_web::{web, HttpResponse};
use log::debug;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(create_dump)))
.service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status)));
}
pub async fn create_dump(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
pub async fn create_dump(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
let res = data.create_dump().await?;
debug!("returns: {:?}", res);
@@ -30,10 +30,10 @@ struct DumpParam {
}
async fn get_dump_status(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<DumpParam>,
) -> Result<HttpResponse, ResponseError> {
let res = data.dump_status(path.dump_uid.clone()).await?;
let res = data.dump_info(path.dump_uid.clone()).await?;
debug!("returns: {:?}", res);
Ok(HttpResponse::Ok().json(res))

View File

@@ -3,6 +3,8 @@ use actix_web::{web, HttpResponse};
use actix_web::web::Bytes;
use futures::{Stream, StreamExt};
use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update};
use milli::update::IndexDocumentsMethod;
use serde::Deserialize;
//use serde_json::Value;
@@ -11,9 +13,7 @@ use tokio::sync::mpsc;
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::payload::Payload;
use crate::index_controller::{DocumentAdditionFormat, Update};
use crate::routes::IndexParam;
use crate::Data;
const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0;
const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20;
@@ -88,20 +88,20 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
}
pub async fn get_document(
data: GuardedData<Public, Data>,
data: GuardedData<Public, MeiliSearch>,
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
let index = path.index_uid.clone();
let id = path.document_id.clone();
let document = data
.retrieve_document(index, id, None as Option<Vec<String>>)
.document(index, id, None as Option<Vec<String>>)
.await?;
debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document))
}
//pub async fn delete_document(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//path: web::Path<DocumentParam>,
//) -> Result<HttpResponse, ResponseError> {
//let update_status = data
@@ -120,7 +120,7 @@ pub struct BrowseQuery {
}
pub async fn get_all_documents(
data: GuardedData<Public, Data>,
data: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> {
@@ -137,7 +137,7 @@ pub async fn get_all_documents(
});
let documents = data
.retrieve_documents(
.documents(
path.index_uid.clone(),
params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET),
params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT),
@@ -157,7 +157,7 @@ pub struct UpdateDocumentsQuery {
/// Route used when the payload type is "application/json"
/// Used to add or replace documents
pub async fn add_documents(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
@@ -180,7 +180,7 @@ pub async fn add_documents(
/// Route used when the payload type is "application/json"
/// Used to add or replace documents
pub async fn update_documents(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
@@ -201,7 +201,7 @@ pub async fn update_documents(
}
//pub async fn delete_documents(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//path: web::Path<IndexParam>,
//body: web::Json<Vec<Value>>,
//) -> Result<HttpResponse, ResponseError> {
@@ -221,7 +221,7 @@ pub async fn update_documents(
//}
//pub async fn clear_all_documents(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//path: web::Path<IndexParam>,
//) -> Result<HttpResponse, ResponseError> {
//let update_status = data.clear_documents(path.index_uid.clone()).await?;

View File

@@ -1,12 +1,13 @@
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_lib::index_controller::IndexSettings;
use serde::{Deserialize, Serialize};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::routes::IndexParam;
use crate::Data;
pub mod documents;
pub mod search;
@@ -35,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
);
}
pub async fn list_indexes(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
pub async fn list_indexes(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
let indexes = data.list_indexes().await?;
debug!("returns: {:?}", indexes);
Ok(HttpResponse::Ok().json(indexes))
@@ -49,7 +50,7 @@ pub struct IndexCreateRequest {
}
//pub async fn create_index(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//body: web::Json<IndexCreateRequest>,
//) -> Result<HttpResponse, ResponseError> {
//let body = body.into_inner();
@@ -75,30 +76,34 @@ pub struct UpdateIndexResponse {
}
pub async fn get_index(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let meta = data.index(path.index_uid.clone()).await?;
let meta = data.get_index(path.index_uid.clone()).await?;
debug!("returns: {:?}", meta);
Ok(HttpResponse::Ok().json(meta))
}
pub async fn update_index(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
body: web::Json<UpdateIndexRequest>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
let body = body.into_inner();
let settings = IndexSettings {
uid: body.uid,
primary_key: body.primary_key,
};
let meta = data
.update_index(path.into_inner().index_uid, body.primary_key, body.uid)
.update_index(path.into_inner().index_uid, settings)
.await?;
debug!("returns: {:?}", meta);
Ok(HttpResponse::Ok().json(meta))
}
//pub async fn delete_index(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//path: web::Path<IndexParam>,
//) -> Result<HttpResponse, ResponseError> {
//data.delete_index(path.index_uid.clone()).await?;
@@ -106,7 +111,7 @@ pub async fn update_index(
//}
pub async fn get_index_stats(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let response = data.get_index_stats(path.index_uid.clone()).await?;

View File

@@ -1,13 +1,13 @@
use actix_web::{web, HttpResponse};
use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
use serde::Deserialize;
use serde_json::Value;
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
use crate::routes::IndexParam;
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
@@ -82,7 +82,7 @@ impl From<SearchQueryGet> for SearchQuery {
}
pub async fn search_with_url_query(
data: GuardedData<Public, Data>,
data: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<SearchQueryGet>,
) -> Result<HttpResponse, ResponseError> {
@@ -99,7 +99,7 @@ pub async fn search_with_url_query(
}
pub async fn search_with_post(
data: GuardedData<Public, Data>,
data: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Json<SearchQuery>,
) -> Result<HttpResponse, ResponseError> {

View File

@@ -148,7 +148,7 @@
//);
//pub async fn update_all(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//index_uid: web::Path<String>,
//body: web::Json<Settings<Unchecked>>,
//) -> Result<HttpResponse, ResponseError> {
@@ -162,7 +162,7 @@
//}
//pub async fn get_all(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//index_uid: web::Path<String>,
//) -> Result<HttpResponse, ResponseError> {
//let settings = data.settings(index_uid.into_inner()).await?;
@@ -171,7 +171,7 @@
//}
//pub async fn delete_all(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//index_uid: web::Path<String>,
//) -> Result<HttpResponse, ResponseError> {
//let settings = Settings::cleared();

View File

@@ -1,12 +1,12 @@
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::routes::{IndexParam, UpdateStatusResponse};
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::get().to(get_all_updates_status)))
@@ -37,12 +37,12 @@ pub struct UpdateParam {
}
pub async fn get_update_status(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<UpdateParam>,
) -> Result<HttpResponse, ResponseError> {
let params = path.into_inner();
let meta = data
.get_update_status(params.index_uid, params.update_id)
.update_status(params.index_uid, params.update_id)
.await?;
let meta = UpdateStatusResponse::from(meta);
debug!("returns: {:?}", meta);
@@ -50,10 +50,10 @@ pub async fn get_update_status(
}
pub async fn get_all_updates_status(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let metas = data.get_updates_status(path.into_inner().index_uid).await?;
let metas = data.all_update_status(path.into_inner().index_uid).await?;
let metas = metas
.into_iter()
.map(UpdateStatusResponse::from)

View File

@@ -5,12 +5,12 @@ use chrono::{DateTime, Utc};
use log::debug;
use serde::{Deserialize, Serialize};
use meilisearch_lib::{MeiliSearch, UpdateResult, UpdateStatus, RegisterUpdate};
use meilisearch_lib::index::{Settings, Unchecked};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::index::{Settings, Unchecked};
use crate::index_controller::update_actor::RegisterUpdate;
use crate::index_controller::{UpdateResult, UpdateStatus};
use crate::{ApiKeys, Data};
use crate::ApiKeys;
mod dump;
mod indexes;
@@ -187,15 +187,17 @@ impl From<UpdateStatus> for UpdateStatusResponse {
let duration = Duration::from_millis(duration as u64).as_secs_f64();
let update_id = failed.id();
let response = failed.error;
let processed_at = failed.failed_at;
let enqueued_at = failed.from.from.enqueued_at;
let response = failed.into();
let content = FailedUpdateResult {
update_id,
update_type,
response,
duration,
enqueued_at: failed.from.from.enqueued_at,
processed_at: failed.failed_at,
enqueued_at,
processed_at,
};
UpdateStatusResponse::Failed { content }
}
@@ -230,7 +232,7 @@ pub async fn running() -> HttpResponse {
HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" }))
}
async fn get_stats(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
async fn get_stats(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
let response = data.get_all_stats().await?;
debug!("returns: {:?}", response);
@@ -245,7 +247,7 @@ struct VersionResponse {
pkg_version: String,
}
async fn get_version(_data: GuardedData<Private, Data>) -> HttpResponse {
async fn get_version(_data: GuardedData<Private, MeiliSearch>) -> HttpResponse {
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
@@ -288,7 +290,7 @@ mod test {
macro_rules! impl_is_policy {
($($param:ident)*) => {
impl<Policy, Func, $($param,)* Res> Is<Policy, (($($param,)*), Res)> for Func
where Func: Fn(GuardedData<Policy, Data>, $($param,)*) -> Res {}
where Func: Fn(GuardedData<Policy, MeiliSearch>, $($param,)*) -> Res {}
};
}

View File

@@ -0,0 +1,72 @@
[package]
name = "meilisearch-lib"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
actix-cors = { git = "https://github.com/MarinPostma/actix-extras.git", rev = "963ac94d" }
actix-web = { version = "4.0.0-beta.9", features = ["rustls"] }
actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true }
anyhow = { version = "1.0.43", features = ["backtrace"] }
async-stream = "0.3.2"
async-trait = "0.1.51"
arc-swap = "1.3.2"
byte-unit = { version = "4.0.12", default-features = false, features = ["std"] }
bytes = "1.1.0"
chrono = { version = "0.4.19", features = ["serde"] }
crossbeam-channel = "0.5.1"
either = "1.6.1"
env_logger = "0.9.0"
flate2 = "1.0.21"
fst = "0.4.7"
futures = "0.3.17"
futures-util = "0.3.17"
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
http = "0.2.4"
indexmap = { version = "1.7.0", features = ["serde-1"] }
itertools = "0.10.1"
log = "0.4.14"
main_error = "0.1.1"
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", rev = "6de1b41" }
mime = "0.3.16"
num_cpus = "1.13.0"
once_cell = "1.8.0"
parking_lot = "0.11.2"
rand = "0.8.4"
rayon = "1.5.1"
regex = "1.5.4"
rustls = "0.19.1"
serde = { version = "1.0.130", features = ["derive"] }
serde_json = { version = "1.0.67", features = ["preserve_order"] }
sha2 = "0.9.6"
siphasher = "0.3.7"
slice-group-by = "0.2.6"
structopt = "0.3.23"
tar = "0.4.37"
tempfile = "3.2.0"
thiserror = "1.0.28"
tokio = { version = "1.11.0", features = ["full"] }
uuid = { version = "0.8.2", features = ["serde"] }
walkdir = "2.3.2"
obkv = "0.2.0"
pin-project = "1.0.8"
whoami = { version = "1.1.3", optional = true }
reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true }
serdeval = "0.1.0"
sysinfo = "0.20.2"
tokio-stream = "0.1.7"
erased-serde = "0.3.16"
[dev-dependencies]
actix-rt = "2.2.0"
assert-json-diff = { branch = "master", git = "https://github.com/qdequele/assert-json-diff" }
mockall = "0.10.2"
paste = "1.0.5"
serde_url_params = "0.2.1"
tempdir = "0.3.7"
urlencoding = "2.1.0"

View File

@@ -0,0 +1,62 @@
use std::error::Error;
use std::fmt;
use meilisearch_error::{Code, ErrorCode};
use milli::UserError;
macro_rules! internal_error {
($target:ty : $($other:path), *) => {
$(
impl From<$other> for $target {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
#[derive(Debug)]
pub struct MilliError<'a>(pub &'a milli::Error);
impl Error for MilliError<'_> {}
impl fmt::Display for MilliError<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl ErrorCode for MilliError<'_> {
fn error_code(&self) -> Code {
match self.0 {
milli::Error::InternalError(_) => Code::Internal,
milli::Error::IoError(_) => Code::Internal,
milli::Error::UserError(ref error) => {
match error {
// TODO: wait for spec for new error codes.
| UserError::SerdeJson(_)
| UserError::MaxDatabaseSizeReached
| UserError::InvalidCriterionName { .. }
| UserError::InvalidDocumentId { .. }
| UserError::InvalidStoreFile
| UserError::NoSpaceLeftOnDevice
| UserError::InvalidAscDescSyntax { .. }
| UserError::DocumentLimitReached => Code::Internal,
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::Filter,
UserError::InvalidFilterAttribute(_) => Code::Filter,
UserError::InvalidSortName { .. } => Code::Sort,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent,
UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent,
UserError::SortRankingRuleMissing => Code::Sort,
UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound,
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
UserError::InvalidSortableAttribute { .. } => Code::Sort,
}
}
}
}
}

View File

@@ -6,7 +6,7 @@ use heed::RoTxn;
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use crate::option::IndexerOpts;
use crate::options::IndexerOpts;
use super::error::Result;
use super::{Index, Settings, Unchecked};

View File

@@ -14,7 +14,7 @@ use error::Result;
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Checked, Facets, Settings, Unchecked};
use crate::helpers::EnvSizer;
use crate::EnvSizer;
use crate::index_controller::update_file_store::UpdateFileStore;
use self::error::IndexError;

View File

@@ -5,7 +5,7 @@ use rayon::ThreadPool;
use crate::index_controller::update_actor::RegisterUpdate;
use crate::index_controller::{Failed, Processed, Processing};
use crate::option::IndexerOpts;
use crate::options::IndexerOpts;
pub struct UpdateHandler {
max_nb_chunks: Option<usize>,
@@ -66,7 +66,7 @@ impl UpdateHandler {
match result {
Ok(result) => Ok(meta.process(result)),
Err(e) => Err(meta.fail(e.into())),
Err(e) => Err(meta.fail(e)),
}
}
}

View File

@@ -11,7 +11,7 @@ use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata}
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
use crate::{
index::Unchecked,
option::IndexerOpts,
options::IndexerOpts,
};
#[derive(Serialize, Deserialize, Debug)]

View File

@@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
use crate::index::Index;
use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore};
use crate::option::IndexerOpts;
use crate::options::IndexerOpts;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]

View File

@@ -18,7 +18,7 @@ pub use message::DumpMsg;
use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle};
use crate::index_controller::dump_actor::error::DumpActorError;
use crate::{helpers::compression, option::IndexerOpts};
use crate::options::IndexerOpts;
use error::Result;
mod actor;
@@ -112,7 +112,7 @@ pub fn load_dump(
let tmp_src = tempfile::tempdir_in(".")?;
let tmp_src_path = tmp_src.path();
compression::from_tar_gz(&src_path, tmp_src_path)?;
crate::from_tar_gz(&src_path, tmp_src_path)?;
let meta_path = tmp_src_path.join(META_FILE_NAME);
let mut meta_file = File::open(&meta_path)?;
@@ -162,6 +162,7 @@ impl<U, P> DumpTask<U, P>
where
U: UuidResolverHandle + Send + Sync + Clone + 'static,
P: UpdateActorHandle + Send + Sync + Clone + 'static,
{
async fn run(self) -> Result<()> {
trace!("Performing dump.");
@@ -186,7 +187,7 @@ where
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
compression::to_tar_gz(temp_dump_path, temp_dump_file.path())
crate::to_tar_gz(temp_dump_path, temp_dump_file.path())
.map_err(|e| DumpActorError::Internal(e.into()))?;
let dump_path = self.path.join(self.uid).with_extension("dump");

View File

@@ -16,7 +16,7 @@ use crate::index::{
use crate::index_controller::{
get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing,
};
use crate::option::IndexerOpts;
use crate::options::IndexerOpts;
use super::error::{IndexActorError, Result};
use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore};
@@ -29,7 +29,9 @@ pub struct IndexActor<S> {
store: S,
}
impl<S: IndexStore + Sync + Send> IndexActor<S> {
impl<S> IndexActor<S>
where S: IndexStore + Sync + Send,
{
pub fn new(
receiver: mpsc::Receiver<IndexMsg>,
store: S,

View File

@@ -1,4 +1,4 @@
use crate::option::IndexerOpts;
use crate::options::IndexerOpts;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};

View File

@@ -22,8 +22,8 @@ use update_actor::UpdateActorHandle;
pub use updates::*;
use uuid_resolver::{error::UuidResolverError, UuidResolverHandle};
use crate::options::IndexerOpts;
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
use crate::option::Opt;
use error::Result;
use self::dump_actor::load_dump;
@@ -99,45 +99,58 @@ pub enum Update {
}
}
impl IndexController {
pub fn new(path: impl AsRef<Path>, options: &Opt) -> anyhow::Result<Self> {
let index_size = options.max_index_size.get_bytes() as usize;
let update_store_size = options.max_index_size.get_bytes() as usize;
#[derive(Default, Debug)]
pub struct IndexControllerBuilder {
max_index_size: Option<usize>,
max_update_store_size: Option<usize>,
snapshot_dir: Option<PathBuf>,
import_snapshot: Option<PathBuf>,
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool,
dump_src: Option<PathBuf>,
dump_dst: Option<PathBuf>,
}
if let Some(ref path) = options.import_snapshot {
impl IndexControllerBuilder {
pub fn build(self, db_path: impl AsRef<Path>, indexer_options: IndexerOpts) -> anyhow::Result<IndexController> {
let index_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
let update_store_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
if let Some(ref path) = self.import_snapshot {
info!("Loading from snapshot {:?}", path);
load_snapshot(
&options.db_path,
db_path.as_ref(),
path,
options.ignore_snapshot_if_db_exists,
options.ignore_missing_snapshot,
self.ignore_snapshot_if_db_exists,
self.ignore_missing_snapshot,
)?;
} else if let Some(ref src_path) = options.import_dump {
} else if let Some(ref src_path) = self.dump_src {
load_dump(
&options.db_path,
db_path.as_ref(),
src_path,
options.max_index_size.get_bytes() as usize,
options.max_udb_size.get_bytes() as usize,
&options.indexer_options,
index_size,
update_store_size,
&indexer_options,
)?;
}
std::fs::create_dir_all(&path)?;
std::fs::create_dir_all(db_path.as_ref())?;
let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&path)?;
let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&db_path)?;
let index_handle =
index_actor::IndexActorHandleImpl::new(&path, index_size, &options.indexer_options)?;
index_actor::IndexActorHandleImpl::new(&db_path, index_size, &indexer_options)?;
let update_handle = update_actor::UpdateActorHandleImpl::new(
index_handle.clone(),
&path,
&db_path,
update_store_size,
)?;
let dump_handle = dump_actor::DumpActorHandleImpl::new(
&options.dumps_dir,
&self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?,
uuid_resolver.clone(),
update_handle.clone(),
options.max_index_size.get_bytes() as usize,
options.max_udb_size.get_bytes() as usize,
index_size,
update_store_size,
)?;
//if options.schedule_snapshot {
@@ -156,7 +169,7 @@ impl IndexController {
//tokio::task::spawn(snapshot_service.run());
//}
Ok(Self {
Ok(IndexController {
uuid_resolver,
index_handle,
update_handle,
@@ -164,6 +177,59 @@ impl IndexController {
})
}
/// Set the index controller builder's max update store size.
pub fn set_max_update_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
self.max_update_store_size.replace(max_update_store_size);
self
}
pub fn set_max_index_size(&mut self, size: usize) -> &mut Self {
self.max_index_size.replace(size);
self
}
/// Set the index controller builder's snapshot path.
pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self {
self.snapshot_dir.replace(snapshot_dir);
self
}
/// Set the index controller builder's ignore snapshot if db exists.
pub fn set_ignore_snapshot_if_db_exists(&mut self, ignore_snapshot_if_db_exists: bool) -> &mut Self {
self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists;
self
}
/// Set the index controller builder's ignore missing snapshot.
pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self {
self.ignore_missing_snapshot = ignore_missing_snapshot;
self
}
/// Set the index controller builder's dump src.
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
self.dump_src.replace(dump_src);
self
}
/// Set the index controller builder's dump dst.
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
self.dump_dst.replace(dump_dst);
self
}
/// Set the index controller builder's import snapshot.
pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
self.import_snapshot.replace(import_snapshot);
self
}
}
impl IndexController {
pub fn builder() -> IndexControllerBuilder {
IndexControllerBuilder::default()
}
pub async fn register_update(&self, uid: &str, update: Update) -> Result<UpdateStatus> {
match self.uuid_resolver.get(uid.to_string()).await {
Ok(uuid) => {

View File

@@ -2,8 +2,6 @@ use std::path::Path;
use anyhow::bail;
use crate::helpers::compression;
//pub struct SnapshotService<U, R> {
//uuid_resolver_handle: R,
//update_handle: U,
@@ -93,7 +91,7 @@ pub fn load_snapshot(
ignore_missing_snapshot: bool,
) -> anyhow::Result<()> {
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
match compression::from_tar_gz(snapshot_path, &db_path) {
match crate::from_tar_gz(snapshot_path, &db_path) {
Ok(()) => Ok(()),
Err(e) => {
// clean created db folder

View File

@@ -41,6 +41,7 @@ impl UpdateActorHandle for UpdateActorHandleImpl {
self.sender.send(msg).await?;
receiver.await?
}
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetUpdate { uuid, id, ret };

View File

@@ -28,7 +28,7 @@ use codec::*;
use super::RegisterUpdate;
use super::error::Result;
use crate::helpers::EnvSizer;
use crate::EnvSizer;
use crate::index_controller::update_files_path;
use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle};
@@ -323,7 +323,7 @@ impl UpdateStore {
let result =
match handle.block_on(index_handle.update(index_uuid, processing.clone())) {
Ok(result) => result,
Err(e) => Err(processing.fail(e.into())),
Err(e) => Err(processing.fail(e)),
};
// Once the pending update have been successfully processed

View File

@@ -1,11 +1,12 @@
use std::{error::Error, fmt::Display};
use chrono::{DateTime, Utc};
use meilisearch_error::{Code, ErrorCode};
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
use serde::{Deserialize, Serialize};
use crate::{
error::ResponseError,
index::{Settings, Unchecked},
};
use crate::index::{Settings, Unchecked};
use super::update_actor::RegisterUpdate;
@@ -115,10 +116,13 @@ impl Processing {
}
}
pub fn fail(self, error: ResponseError) -> Failed {
pub fn fail(self, error: impl ErrorCode) -> Failed {
let msg = error.to_string();
let code = error.error_code();
Failed {
from: self,
error,
msg,
code,
failed_at: Utc::now(),
}
}
@@ -147,10 +151,25 @@ impl Aborted {
pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub error: ResponseError,
pub msg: String,
pub code: Code,
pub failed_at: DateTime<Utc>,
}
impl Display for Failed {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.msg.fmt(f)
}
}
impl Error for Failed { }
impl ErrorCode for Failed {
fn error_code(&self) -> Code {
self.code
}
}
impl Failed {
pub fn id(&self) -> u64 {
self.from.id()

View File

@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::{error::UuidResolverError, Result, UUID_STORE_SIZE};
use crate::helpers::EnvSizer;
use crate::EnvSizer;
#[derive(Serialize, Deserialize)]
struct DumpEntry {

View File

@@ -0,0 +1,53 @@
#[macro_use]
pub mod error;
pub mod options;
pub mod index;
pub mod index_controller;
pub use index_controller::{UpdateResult, UpdateStatus, IndexController as MeiliSearch, update_actor::RegisterUpdate};
use walkdir::WalkDir;
pub trait EnvSizer {
fn size(&self) -> u64;
}
impl EnvSizer for heed::Env {
fn size(&self) -> u64 {
WalkDir::new(self.path())
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len())
}
}
use std::fs::{create_dir_all, File};
use std::io::Write;
use std::path::Path;
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
use tar::{Archive, Builder};
pub fn to_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
let mut f = File::create(dest)?;
let gz_encoder = GzEncoder::new(&mut f, Compression::default());
let mut tar_encoder = Builder::new(gz_encoder);
tar_encoder.append_dir_all(".", src)?;
let gz_encoder = tar_encoder.into_inner()?;
gz_encoder.finish()?;
f.flush()?;
Ok(())
}
pub fn from_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
let f = File::open(&src)?;
let gz = GzDecoder::new(f);
let mut ar = Archive::new(gz);
create_dir_all(&dest)?;
ar.unpack(&dest)?;
Ok(())
}

View File

@@ -0,0 +1,115 @@
use core::fmt;
use std::{ops::Deref, str::FromStr};
use byte_unit::{Byte, ByteError};
use milli::CompressionType;
use structopt::StructOpt;
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
/// of the available memory. It is recommended to use something like 80%-90%
/// of the available memory, no more.
///
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[structopt(long, default_value)]
pub max_memory: MaxMemory,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: MaxMemory::default(),
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
indexing_jobs: None,
}
}
}
/// A type used to detect the max memory available and use 2/3 of it.
#[derive(Debug, Clone, Copy)]
pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory {
type Err = ByteError;
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
Byte::from_str(s).map(Some).map(MaxMemory)
}
}
impl Default for MaxMemory {
fn default() -> MaxMemory {
MaxMemory(
total_memory_bytes()
.map(|bytes| bytes * 2 / 3)
.map(Byte::from_bytes),
)
}
}
impl fmt::Display for MaxMemory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
None => f.write_str("unknown"),
}
}
}
impl Deref for MaxMemory {
type Target = Option<Byte>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl MaxMemory {
pub fn unlimited() -> Self {
Self(None)
}
}
/// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> {
if System::IS_SUPPORTED {
let memory_kind = RefreshKind::new().with_memory();
let mut system = System::new_with_specifics(memory_kind);
system.refresh_memory();
Some(system.total_memory() * 1024) // KiB into bytes
} else {
None
}
}