refactor index actor

This commit is contained in:
mpostma
2021-09-22 15:07:04 +02:00
parent 12542bf922
commit 5353be74c3
18 changed files with 590 additions and 596 deletions

12
Cargo.lock generated
View File

@@ -769,6 +769,17 @@ dependencies = [
"lazy_static",
]
[[package]]
name = "derivative"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
dependencies = [
"proc-macro2 1.0.29",
"quote 1.0.9",
"syn 1.0.76",
]
[[package]]
name = "derive_more"
version = "0.99.16"
@@ -1674,6 +1685,7 @@ dependencies = [
"bytes",
"chrono",
"crossbeam-channel",
"derivative",
"either",
"env_logger",
"erased-serde",

View File

@@ -3,9 +3,10 @@ use std::time::Duration;
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use meilisearch_lib::index_controller::updates::status::{UpdateResult, UpdateStatus};
use serde::{Deserialize, Serialize};
use meilisearch_lib::{MeiliSearch, UpdateResult, UpdateStatus, RegisterUpdate};
use meilisearch_lib::{MeiliSearch, RegisterUpdate};
use meilisearch_lib::index::{Settings, Unchecked};
use crate::error::ResponseError;

View File

@@ -61,6 +61,7 @@ serdeval = "0.1.0"
sysinfo = "0.20.2"
tokio-stream = "0.1.7"
erased-serde = "0.3.16"
derivative = "2.2.0"
[dev-dependencies]
actix-rt = "2.2.0"

View File

@@ -35,6 +35,9 @@ pub struct Checked;
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
pub struct Unchecked;
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]

View File

@@ -6,7 +6,7 @@ use meilisearch_error::ErrorCode;
use crate::index::error::IndexError;
use super::dump_actor::error::DumpActorError;
use super::index_actor::error::IndexActorError;
use super::indexes::error::IndexActorError;
use super::updates::error::UpdateActorError;
use super::uuid_resolver::error::UuidResolverError;

View File

@@ -1,161 +0,0 @@
use crate::{index_controller::updates::status::{Failed, Processed, Processing}, options::IndexerOpts};
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use crate::{
index::Checked,
index_controller::{IndexSettings, IndexStats},
};
use crate::{
index::{Document, SearchQuery, SearchResult, Settings},
};
use super::error::Result;
use super::{IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore};
#[derive(Clone)]
pub struct IndexActorHandleImpl {
sender: mpsc::Sender<IndexMsg>,
}
#[async_trait::async_trait]
impl IndexActorHandle for IndexActorHandleImpl {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::CreateIndex {
ret,
uuid,
primary_key,
};
let _ = self.sender.send(msg).await;
receiver.await.expect("IndexActor has been killed")
}
async fn update(
&self,
uuid: Uuid,
meta: Processing,
) -> Result<std::result::Result<Processed, Failed>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Update {
ret,
meta,
uuid,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Search { uuid, query, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Settings { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Documents {
uuid,
ret,
offset,
attributes_to_retrieve,
limit,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Document {
uuid,
ret,
doc_id,
attributes_to_retrieve,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Delete { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetMeta { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::UpdateIndex {
uuid,
index_settings,
ret,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Snapshot { uuid, path, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Dump { uuid, path, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetStats { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
}
impl IndexActorHandleImpl {
pub fn new(
path: impl AsRef<Path>,
index_size: usize,
options: &IndexerOpts,
) -> anyhow::Result<Self> {
let (sender, receiver) = mpsc::channel(100);
let store = MapIndexStore::new(&path, index_size);
let actor = IndexActor::new(receiver, store, options)?;
tokio::task::spawn(actor.run());
Ok(Self { sender })
}
}

View File

@@ -1,74 +0,0 @@
use std::path::PathBuf;
use tokio::sync::oneshot;
use uuid::Uuid;
use super::error::Result as IndexResult;
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
use crate::index_controller::IndexStats;
use crate::index_controller::updates::status::{Failed, Processed, Processing};
use super::{IndexMeta, IndexSettings};
#[allow(clippy::large_enum_variant)]
pub enum IndexMsg {
CreateIndex {
uuid: Uuid,
primary_key: Option<String>,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
Update {
uuid: Uuid,
meta: Processing,
ret: oneshot::Sender<IndexResult<Result<Processed, Failed>>>,
},
Search {
uuid: Uuid,
query: SearchQuery,
ret: oneshot::Sender<IndexResult<SearchResult>>,
},
Settings {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<Settings<Checked>>>,
},
Documents {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
offset: usize,
limit: usize,
ret: oneshot::Sender<IndexResult<Vec<Document>>>,
},
Document {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
doc_id: String,
ret: oneshot::Sender<IndexResult<Document>>,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<()>>,
},
GetMeta {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
UpdateIndex {
uuid: Uuid,
index_settings: IndexSettings,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
Snapshot {
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<IndexResult<()>>,
},
Dump {
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<IndexResult<()>>,
},
GetStats {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<IndexStats>>,
},
}

View File

@@ -1,166 +0,0 @@
use std::path::PathBuf;
use chrono::{DateTime, Utc};
#[cfg(test)]
use mockall::automock;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use actor::IndexActor;
pub use actor::CONCURRENT_INDEX_MSG;
pub use handle_impl::IndexActorHandleImpl;
use message::IndexMsg;
use store::{IndexStore, MapIndexStore};
use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings};
use error::Result;
use super::{IndexSettings, IndexStats, updates::status::{Failed, Processed, Processing}};
mod actor;
pub mod error;
mod handle_impl;
mod message;
mod store;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMeta {
created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub primary_key: Option<String>,
}
impl IndexMeta {
fn new(index: &Index) -> Result<Self> {
let txn = index.read_txn()?;
Self::new_txn(index, &txn)
}
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
let created_at = index.created_at(txn)?;
let updated_at = index.updated_at(txn)?;
let primary_key = index.primary_key(txn)?.map(String::from);
Ok(Self {
created_at,
updated_at,
primary_key,
})
}
}
#[async_trait::async_trait]
#[cfg_attr(test, automock)]
pub trait IndexActorHandle {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta>;
async fn update(
&self,
uuid: Uuid,
meta: Processing,
) -> Result<std::result::Result<Processed, Failed>>;
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult>;
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>>;
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>>;
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document>;
async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta>;
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta>;
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats>;
}
#[cfg(test)]
mod test {
use std::sync::Arc;
use super::*;
#[async_trait::async_trait]
/// Useful for passing around an `Arc<MockIndexActorHandle>` in tests.
impl IndexActorHandle for Arc<MockIndexActorHandle> {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
self.as_ref().create_index(uuid, primary_key).await
}
async fn update(
&self,
uuid: Uuid,
meta: Processing,
data: Option<std::fs::File>,
) -> Result<std::result::Result<Processed, Failed>> {
self.as_ref().update(uuid, meta, data).await
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
self.as_ref().search(uuid, query).await
}
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
self.as_ref().settings(uuid).await
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
self.as_ref()
.documents(uuid, offset, limit, attributes_to_retrieve)
.await
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
self.as_ref()
.document(uuid, doc_id, attributes_to_retrieve)
.await
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
self.as_ref().delete(uuid).await
}
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
self.as_ref().get_index_meta(uuid).await
}
async fn update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
self.as_ref().update_index(uuid, index_settings).await
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
self.as_ref().snapshot(uuid, path).await
}
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
self.as_ref().dump(uuid, path).await
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
self.as_ref().get_index_stats(uuid).await
}
}
}

View File

@@ -1,3 +1,5 @@
use std::fmt;
use meilisearch_error::{Code, ErrorCode};
use crate::{error::MilliError, index::error::IndexError};
@@ -20,6 +22,20 @@ pub enum IndexActorError {
Milli(#[from] milli::Error),
}
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for IndexActorError
where T: Send + Sync + 'static + fmt::Debug
{
fn from(other: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::Internal(Box::new(other))
}
}
impl From<tokio::sync::oneshot::error::RecvError> for IndexActorError {
fn from(other: tokio::sync::oneshot::error::RecvError) -> Self {
Self::Internal(Box::new(other))
}
}
macro_rules! internal_error {
($($other:path), *) => {
$(

View File

@@ -0,0 +1,212 @@
use std::path::PathBuf;
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use super::error::Result;
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
use crate::index_controller::updates::status::{Failed, Processed, Processing};
use crate::index_controller::{IndexSettings, IndexStats};
use super::IndexMeta;
#[allow(clippy::large_enum_variant)]
#[derive(Debug)]
pub enum IndexMsg {
CreateIndex {
uuid: Uuid,
primary_key: Option<String>,
ret: oneshot::Sender<Result<IndexMeta>>,
},
Update {
uuid: Uuid,
meta: Processing,
ret: oneshot::Sender<Result<std::result::Result<Processed, Failed>>>,
},
Search {
uuid: Uuid,
query: SearchQuery,
ret: oneshot::Sender<Result<SearchResult>>,
},
Settings {
uuid: Uuid,
ret: oneshot::Sender<Result<Settings<Checked>>>,
},
Documents {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
offset: usize,
limit: usize,
ret: oneshot::Sender<Result<Vec<Document>>>,
},
Document {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
doc_id: String,
ret: oneshot::Sender<Result<Document>>,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
GetMeta {
uuid: Uuid,
ret: oneshot::Sender<Result<IndexMeta>>,
},
UpdateIndex {
uuid: Uuid,
index_settings: IndexSettings,
ret: oneshot::Sender<Result<IndexMeta>>,
},
Snapshot {
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
Dump {
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
GetStats {
uuid: Uuid,
ret: oneshot::Sender<Result<IndexStats>>,
},
}
impl IndexMsg {
pub async fn search(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
query: SearchQuery,
) -> Result<SearchResult> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Search {
ret,
uuid,
query,
};
sender.send(msg).await?;
rcv.await?
}
pub async fn update_index(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
let (ret, rcv) = oneshot::channel();
let msg = Self::UpdateIndex {
ret,
uuid,
index_settings,
};
sender.send(msg).await?;
rcv.await?
}
pub async fn create_index(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
primary_key: Option<String>,
) -> Result<IndexMeta> {
let (ret, rcv) = oneshot::channel();
let msg = Self::CreateIndex {
ret,
uuid,
primary_key,
};
sender.send(msg).await?;
rcv.await?
}
pub async fn index_meta(sender: &mpsc::Sender<Self>, uuid: Uuid) -> Result<IndexMeta> {
let (ret, rcv) = oneshot::channel();
let msg = Self::GetMeta { ret, uuid };
sender.send(msg).await?;
rcv.await?
}
pub async fn index_stats(sender: &mpsc::Sender<Self>, uuid: Uuid) -> Result<IndexStats> {
let (ret, rcv) = oneshot::channel();
let msg = Self::GetStats { ret, uuid };
sender.send(msg).await?;
rcv.await?
}
pub async fn settings(sender: &mpsc::Sender<Self>, uuid: Uuid) -> Result<Settings<Checked>> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Settings { ret, uuid };
sender.send(msg).await?;
rcv.await?
}
pub async fn documents(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Documents {
ret,
uuid,
attributes_to_retrieve,
offset,
limit,
};
sender.send(msg).await?;
rcv.await?
}
pub async fn document(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
doc_id: String,
) -> Result<Document> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Document {
ret,
uuid,
attributes_to_retrieve,
doc_id,
};
sender.send(msg).await?;
rcv.await?
}
pub async fn update(sender: &mpsc::Sender<Self>, uuid: Uuid, meta: Processing) -> Result<std::result::Result<Processed, Failed>> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Update {
ret,
uuid,
meta,
};
sender.send(msg).await?;
rcv.await?
}
pub async fn snapshot(sender: &mpsc::Sender<IndexMsg>, uuid: Uuid, path: PathBuf) -> Result<()> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Snapshot {
uuid,
path,
ret,
};
sender.send(msg).await?;
rcv.await?
}
pub async fn dump(sender: &mpsc::Sender<Self>, uuid: Uuid, path: PathBuf) -> Result<()> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Dump {
uuid,
ret,
path,
};
sender.send(msg).await?;
rcv.await?
}
}

View File

@@ -1,4 +1,4 @@
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use async_stream::stream;
@@ -8,22 +8,74 @@ use log::debug;
use milli::update::UpdateBuilder;
use tokio::task::spawn_blocking;
use tokio::{fs, sync::mpsc};
use uuid::Uuid;
use crate::index::{
update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings,
};
use crate::index_controller::{
get_arc_ownership_blocking, IndexStats,
};
use crate::index::update_handler::UpdateHandler;
use crate::index_controller::updates::status::{Failed, Processed, Processing};
use crate::index_controller::{get_arc_ownership_blocking, IndexStats};
use crate::options::IndexerOpts;
use super::error::{IndexActorError, Result};
use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore};
pub const CONCURRENT_INDEX_MSG: usize = 10;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
pub use message::IndexMsg;
use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings};
use error::Result;
use self::error::IndexActorError;
use self::store::{IndexStore, MapIndexStore};
use super::IndexSettings;
pub mod error;
mod message;
mod store;
pub type IndexHandlerSender = mpsc::Sender<IndexMsg>;
pub fn create_indexes_handler(
db_path: impl AsRef<Path>,
index_size: usize,
indexer_options: &IndexerOpts,
) -> anyhow::Result<IndexHandlerSender> {
let (sender, receiver) = mpsc::channel(100);
let store = MapIndexStore::new(&db_path, index_size);
let actor = IndexActor::new(receiver, store, indexer_options)?;
tokio::task::spawn(actor.run());
Ok(sender)
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMeta {
created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub primary_key: Option<String>,
}
impl IndexMeta {
fn new(index: &Index) -> Result<Self> {
let txn = index.read_txn()?;
Self::new_txn(index, &txn)
}
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
let created_at = index.created_at(txn)?;
let updated_at = index.updated_at(txn)?;
let primary_key = index.primary_key(txn)?.map(String::from);
Ok(Self {
created_at,
updated_at,
primary_key,
})
}
}
pub struct IndexActor<S> {
receiver: Option<mpsc::Receiver<IndexMsg>>,
update_handler: Arc<UpdateHandler>,
@@ -31,15 +83,15 @@ pub struct IndexActor<S> {
}
impl<S> IndexActor<S>
where S: IndexStore + Sync + Send,
where
S: IndexStore + Sync + Send,
{
pub fn new(
receiver: mpsc::Receiver<IndexMsg>,
store: S,
options: &IndexerOpts,
) -> anyhow::Result<Self> {
let update_handler = UpdateHandler::new(options)?;
let update_handler = Arc::new(update_handler);
let update_handler = Arc::new(UpdateHandler::new(options)?);
let receiver = Some(receiver);
Ok(Self {
@@ -82,11 +134,7 @@ where S: IndexStore + Sync + Send,
} => {
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
}
Update {
ret,
meta,
uuid,
} => {
Update { ret, meta, uuid } => {
let _ = ret.send(self.handle_update(uuid, meta).await);
}
Search { ret, query, uuid } => {
@@ -350,3 +398,86 @@ where S: IndexStore + Sync + Send,
.await?
}
}
#[cfg(test)]
mod test {
use std::sync::Arc;
use super::*;
#[async_trait::async_trait]
/// Useful for passing around an `Arc<MockIndexActorHandle>` in tests.
impl IndexActorHandle for Arc<MockIndexActorHandle> {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
self.as_ref().create_index(uuid, primary_key).await
}
async fn update(
&self,
uuid: Uuid,
meta: Processing,
data: Option<std::fs::File>,
) -> Result<std::result::Result<Processed, Failed>> {
self.as_ref().update(uuid, meta, data).await
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
self.as_ref().search(uuid, query).await
}
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
self.as_ref().settings(uuid).await
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
self.as_ref()
.documents(uuid, offset, limit, attributes_to_retrieve)
.await
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
self.as_ref()
.document(uuid, doc_id, attributes_to_retrieve)
.await
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
self.as_ref().delete(uuid).await
}
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
self.as_ref().get_index_meta(uuid).await
}
async fn update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
self.as_ref().update_index(uuid, index_settings).await
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
self.as_ref().snapshot(uuid, path).await
}
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
self.as_ref().dump(uuid, path).await
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
self.as_ref().get_index_stats(uuid).await
}
}
}

View File

@@ -8,36 +8,38 @@ use bytes::Bytes;
use chrono::{DateTime, Utc};
use futures::Stream;
use log::info;
use milli::FieldDistribution;
use milli::update::IndexDocumentsMethod;
use milli::FieldDistribution;
use serde::{Deserialize, Serialize};
use tokio::time::sleep;
use uuid::Uuid;
use dump_actor::DumpActorHandle;
pub use dump_actor::{DumpInfo, DumpStatus};
use index_actor::IndexActorHandle;
use snapshot::load_snapshot;
use uuid_resolver::error::UuidResolverError;
use crate::options::IndexerOpts;
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
use crate::options::IndexerOpts;
use error::Result;
use self::dump_actor::load_dump;
use self::updates::UpdateMsg;
use self::indexes::IndexMsg;
use self::updates::status::UpdateStatus;
use self::updates::UpdateMsg;
use self::uuid_resolver::UuidResolverMsg;
mod dump_actor;
pub mod error;
pub mod index_actor;
pub mod indexes;
mod snapshot;
pub mod update_file_store;
pub mod updates;
mod uuid_resolver;
pub mod update_file_store;
pub type Payload = Box<dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin>;
pub type Payload = Box<
dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin,
>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
@@ -47,7 +49,7 @@ pub struct IndexMetadata {
pub uid: String,
name: String,
#[serde(flatten)]
pub meta: index_actor::IndexMeta,
pub meta: indexes::IndexMeta,
}
#[derive(Clone, Debug)]
@@ -72,16 +74,16 @@ pub struct IndexStats {
#[derive(Clone)]
pub struct IndexController {
uuid_resolver: uuid_resolver::UuidResolverSender,
index_handle: index_actor::IndexActorHandleImpl,
index_handle: indexes::IndexHandlerSender,
update_handle: updates::UpdateSender,
dump_handle: dump_actor::DumpActorHandleImpl,
}
#[derive(Debug)]
pub enum DocumentAdditionFormat {
Json,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Stats {
@@ -90,13 +92,16 @@ pub struct Stats {
pub indexes: BTreeMap<String, IndexStats>,
}
#[derive(derivative::Derivative)]
#[derivative(Debug)]
pub enum Update {
DocumentAddition {
#[derivative(Debug="ignore")]
payload: Payload,
primary_key: Option<String>,
method: IndexDocumentsMethod,
format: DocumentAdditionFormat,
}
},
}
#[derive(Default, Debug)]
@@ -112,9 +117,17 @@ pub struct IndexControllerBuilder {
}
impl IndexControllerBuilder {
pub fn build(self, db_path: impl AsRef<Path>, indexer_options: IndexerOpts) -> anyhow::Result<IndexController> {
let index_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
let update_store_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
pub fn build(
self,
db_path: impl AsRef<Path>,
indexer_options: IndexerOpts,
) -> anyhow::Result<IndexController> {
let index_size = self
.max_index_size
.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
let update_store_size = self
.max_index_size
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
if let Some(ref path) = self.import_snapshot {
info!("Loading from snapshot {:?}", path);
@@ -137,18 +150,15 @@ impl IndexControllerBuilder {
std::fs::create_dir_all(db_path.as_ref())?;
let uuid_resolver = uuid_resolver::create_uuid_resolver(&db_path)?;
let index_handle =
index_actor::IndexActorHandleImpl::new(&db_path, index_size, &indexer_options)?;
let index_handle = indexes::create_indexes_handler(&db_path, index_size, &indexer_options)?;
#[allow(unreachable_code)]
let update_handle = updates::create_update_handler(
todo!(),
&db_path,
update_store_size,
)?;
let update_handle = updates::create_update_handler(index_handle.clone(), &db_path, update_store_size)?;
let dump_handle = dump_actor::DumpActorHandleImpl::new(
&self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?,
&self
.dump_dst
.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?,
uuid_resolver.clone(),
update_handle.clone(),
index_size,
@@ -197,7 +207,10 @@ impl IndexControllerBuilder {
}
/// Set the index controller builder's ignore snapshot if db exists.
pub fn set_ignore_snapshot_if_db_exists(&mut self, ignore_snapshot_if_db_exists: bool) -> &mut Self {
pub fn set_ignore_snapshot_if_db_exists(
&mut self,
ignore_snapshot_if_db_exists: bool,
) -> &mut Self {
self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists;
self
}
@@ -238,12 +251,12 @@ impl IndexController {
Ok(uuid) => {
let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?;
Ok(update_result)
},
}
Err(UuidResolverError::UnexistingIndex(name)) => {
let uuid = Uuid::new_v4();
let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?;
// ignore if index creation fails now, since it may already have been created
let _ = self.index_handle.create_index(uuid, None).await;
let _ = IndexMsg::create_index(&self.index_handle, uuid, None).await?;
UuidResolverMsg::insert(&self.uuid_resolver, uuid, name).await?;
Ok(update_result)
@@ -393,7 +406,7 @@ impl IndexController {
let uuids = UuidResolverMsg::list(&self.uuid_resolver).await?;
let mut ret = Vec::new();
for (uid, uuid) in uuids {
let meta = self.index_handle.get_index_meta(uuid).await?;
let meta = IndexMsg::index_meta(&self.index_handle, uuid).await?;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
@@ -408,7 +421,7 @@ impl IndexController {
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?;
let settings = self.index_handle.settings(uuid).await?;
let settings = IndexMsg::settings(&self.index_handle, uuid).await?;
Ok(settings)
}
@@ -420,9 +433,13 @@ impl IndexController {
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?;
let documents = self
.index_handle
.documents(uuid, offset, limit, attributes_to_retrieve)
let documents = IndexMsg::documents(
&self.index_handle,
uuid,
offset,
limit,
attributes_to_retrieve,
)
.await?;
Ok(documents)
}
@@ -434,10 +451,7 @@ impl IndexController {
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?;
let document = self
.index_handle
.document(uuid, doc_id, attributes_to_retrieve)
.await?;
let document = IndexMsg::document(&self.index_handle, uuid, attributes_to_retrieve, doc_id).await?;
Ok(document)
}
@@ -451,7 +465,7 @@ impl IndexController {
}
let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.clone()).await?;
let meta = self.index_handle.update_index(uuid, index_settings).await?;
let meta = IndexMsg::update_index(&self.index_handle, uuid, index_settings).await?;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
@@ -463,13 +477,13 @@ impl IndexController {
pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?;
let result = self.index_handle.search(uuid, query).await?;
let result = IndexMsg::search(&self.index_handle, uuid, query).await?;
Ok(result)
}
pub async fn get_index(&self, uid: String) -> Result<IndexMetadata> {
let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.clone()).await?;
let meta = self.index_handle.get_index_meta(uuid).await?;
let meta = IndexMsg::index_meta(&self.index_handle, uuid).await?;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
@@ -487,7 +501,7 @@ impl IndexController {
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?;
let update_infos = UpdateMsg::get_info(&self.update_handle).await?;
let mut stats = self.index_handle.get_index_stats(uuid).await?;
let mut stats = IndexMsg::index_stats(&self.index_handle, uuid).await?;
// Check if the currently indexing update is from out index.
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
Ok(stats)
@@ -500,7 +514,7 @@ impl IndexController {
let mut indexes = BTreeMap::new();
for index in self.list_indexes().await? {
let mut index_stats = self.index_handle.get_index_stats(index.uuid).await?;
let mut index_stats = IndexMsg::index_stats(&self.index_handle, index.uuid).await?;
database_size += index_stats.size;
last_update = last_update.map_or(Some(index.meta.updated_at), |last| {

View File

@@ -1,8 +1,9 @@
use std::fmt;
use std::error::Error;
use meilisearch_error::{Code, ErrorCode};
use crate::index_controller::index_actor::error::IndexActorError;
use crate::index_controller::indexes::error::IndexActorError;
pub type Result<T> = std::result::Result<T, UpdateActorError>;
@@ -25,15 +26,17 @@ pub enum UpdateActorError {
PayloadError(#[from] actix_web::error::PayloadError),
}
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for UpdateActorError {
fn from(_: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::FatalUpdateStoreError
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for UpdateActorError
where T: Sync + Send + 'static + fmt::Debug
{
fn from(other: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::Internal(Box::new(other))
}
}
impl From<tokio::sync::oneshot::error::RecvError> for UpdateActorError {
fn from(_: tokio::sync::oneshot::error::RecvError) -> Self {
Self::FatalUpdateStoreError
fn from(other: tokio::sync::oneshot::error::RecvError) -> Self {
Self::Internal(Box::new(other))
}
}

View File

@@ -7,6 +7,7 @@ use uuid::Uuid;
use super::error::Result;
use super::{Update, UpdateStatus, UpdateStoreInfo};
#[derive(Debug)]
pub enum UpdateMsg {
Update {
uuid: Uuid,

View File

@@ -27,19 +27,19 @@ use self::store::{UpdateStore, UpdateStoreInfo};
use crate::index_controller::update_file_store::UpdateFileStore;
use status::UpdateStatus;
use super::indexes::IndexHandlerSender;
use super::{DocumentAdditionFormat, Payload, Update};
pub type UpdateSender = mpsc::Sender<UpdateMsg>;
type IndexSender = mpsc::Sender<()>;
pub fn create_update_handler(
index_sender: IndexSender,
index_sender: IndexHandlerSender,
db_path: impl AsRef<Path>,
update_store_size: usize,
) -> anyhow::Result<UpdateSender> {
let path = db_path.as_ref().to_owned();
let (sender, receiver) = mpsc::channel(100);
let actor = UpdateHandler::new(update_store_size, receiver, path, index_sender)?;
let actor = UpdateLoop::new(update_store_size, receiver, path, index_sender)?;
tokio::task::spawn_local(actor.run());
@@ -96,20 +96,20 @@ impl<S: Stream<Item = std::result::Result<Bytes, PayloadError>> + Unpin> io::Rea
}
}
pub struct UpdateHandler {
pub struct UpdateLoop {
store: Arc<UpdateStore>,
inbox: Option<mpsc::Receiver<UpdateMsg>>,
update_file_store: UpdateFileStore,
index_handle: IndexSender,
index_handle: IndexHandlerSender,
must_exit: Arc<AtomicBool>,
}
impl UpdateHandler {
impl UpdateLoop {
pub fn new(
update_db_size: usize,
inbox: mpsc::Receiver<UpdateMsg>,
path: impl AsRef<Path>,
index_handle: IndexSender,
index_handle: IndexHandlerSender,
) -> anyhow::Result<Self> {
let path = path.as_ref().to_owned();
std::fs::create_dir_all(&path)?;

View File

@@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::{Result, State, UpdateStore};
use crate::index_controller::{updates::{IndexSender, status::UpdateStatus}};
use crate::index_controller::{indexes::{IndexHandlerSender, IndexMsg}, updates::{status::UpdateStatus}};
#[derive(Serialize, Deserialize)]
struct UpdateEntry {
@@ -23,7 +23,7 @@ impl UpdateStore {
&self,
uuids: &HashSet<Uuid>,
path: PathBuf,
handle: IndexSender,
handle: IndexHandlerSender,
) -> Result<()> {
let state_lock = self.state.write();
state_lock.swap(State::Dumping);
@@ -172,12 +172,11 @@ impl UpdateStore {
async fn dump_indexes(
uuids: &HashSet<Uuid>,
handle: IndexSender,
handle: IndexHandlerSender,
path: impl AsRef<Path>,
) -> Result<()> {
for uuid in uuids {
//handle.dump(*uuid, path.as_ref().to_owned()).await?;
todo!()
IndexMsg::dump(&handle, *uuid, path.as_ref().to_owned()).await?;
}
Ok(())

View File

@@ -30,14 +30,16 @@ use super::RegisterUpdate;
use super::error::Result;
use super::status::{Enqueued, Processing};
use crate::EnvSizer;
use crate::index_controller::indexes::{CONCURRENT_INDEX_MSG, IndexHandlerSender, IndexMsg};
use crate::index_controller::update_files_path;
use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*};
use crate::index_controller::updates::*;
#[allow(clippy::upper_case_acronyms)]
type BEU64 = U64<heed::byteorder::BE>;
const UPDATE_DIR: &str = "update_files";
#[derive(Debug)]
pub struct UpdateStoreInfo {
/// Size of the update store in bytes.
pub size: u64,
@@ -146,7 +148,7 @@ impl UpdateStore {
pub fn open(
options: EnvOpenOptions,
path: impl AsRef<Path>,
index_handle: IndexSender,
index_handle: IndexHandlerSender,
must_exit: Arc<AtomicBool>,
) -> anyhow::Result<Arc<Self>> {
let (update_store, mut notification_receiver) = Self::new(options, path)?;
@@ -284,7 +286,7 @@ impl UpdateStore {
/// Executes the user provided function on the next pending update (the one with the lowest id).
/// This is asynchronous as it let the user process the update with a read-only txn and
/// only writing the result meta to the processed-meta store *after* it has been processed.
fn process_pending_update(&self, index_handle: IndexSender) -> Result<Option<()>> {
fn process_pending_update(&self, index_handle: IndexHandlerSender) -> Result<Option<()>> {
// Create a read transaction to be able to retrieve the pending update in order.
let rtxn = self.env.read_txn()?;
let first_meta = self.pending_queue.first(&rtxn)?;
@@ -314,7 +316,7 @@ impl UpdateStore {
fn perform_update(
&self,
processing: Processing,
index_handle: IndexSender,
index_handle: IndexHandlerSender,
index_uuid: Uuid,
global_id: u64,
) -> Result<Option<()>> {
@@ -322,7 +324,7 @@ impl UpdateStore {
let handle = Handle::current();
let update_id = processing.id();
let result =
match handle.block_on(/*index_handle.update(index_uuid, processing.clone())*/ todo!()) {
match handle.block_on(IndexMsg::update(&index_handle, index_uuid, processing.clone())) {
Ok(result) => result,
Err(e) => Err(processing.fail(e)),
};
@@ -484,7 +486,7 @@ impl UpdateStore {
&self,
uuids: &HashSet<Uuid>,
path: impl AsRef<Path>,
handle: IndexSender,
handle: IndexHandlerSender,
) -> Result<()> {
let state_lock = self.state.write();
state_lock.swap(State::Snapshoting);
@@ -525,7 +527,7 @@ impl UpdateStore {
// Perform the snapshot of each index concurently. Only a third of the capabilities of
// the index actor at a time not to put too much pressure on the index actor
let mut stream = futures::stream::iter(uuids.iter())
.map(move |uuid| todo!() /*handle.snapshot(*uuid, path.clone())*/)
.map(move |uuid| IndexMsg::snapshot(handle,*uuid, path.clone()))
.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
Handle::current().block_on(async {