Compare commits

..

12 Commits

16 changed files with 562 additions and 236 deletions

317
Cargo.lock generated
View File

@ -149,7 +149,7 @@ dependencies = [
"impl-more",
"pin-project-lite",
"tokio",
"tokio-rustls 0.24.1",
"tokio-rustls",
"tokio-util",
"tracing",
"webpki-roots 0.25.3",
@ -262,6 +262,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"const-random",
"getrandom",
"once_cell",
"version_check",
@ -429,9 +430,9 @@ dependencies = [
[[package]]
name = "async-trait"
version = "0.1.80"
version = "0.1.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9"
dependencies = [
"proc-macro2",
"quote",
@ -512,7 +513,7 @@ dependencies = [
"mimalloc",
"rand",
"rand_chacha",
"reqwest 0.11.23",
"reqwest",
"roaring",
"serde_json",
]
@ -1049,6 +1050,26 @@ dependencies = [
"windows-sys 0.45.0",
]
[[package]]
name = "const-random"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
dependencies = [
"const-random-macro",
]
[[package]]
name = "const-random-macro"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [
"getrandom",
"once_cell",
"tiny-keccak",
]
[[package]]
name = "constant_time_eq"
version = "0.1.5"
@ -2368,29 +2389,6 @@ dependencies = [
"pin-project-lite",
]
[[package]]
name = "http-body"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643"
dependencies = [
"bytes",
"http 1.0.0",
]
[[package]]
name = "http-body-util"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d"
dependencies = [
"bytes",
"futures-core",
"http 1.0.0",
"http-body 1.0.0",
"pin-project-lite",
]
[[package]]
name = "httparse"
version = "1.8.0"
@ -2421,7 +2419,7 @@ dependencies = [
"futures-util",
"h2",
"http 0.2.11",
"http-body 0.4.5",
"http-body",
"httparse",
"httpdate",
"itoa",
@ -2433,25 +2431,6 @@ dependencies = [
"want",
]
[[package]]
name = "hyper"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"http 1.0.0",
"http-body 1.0.0",
"httparse",
"itoa",
"pin-project-lite",
"smallvec",
"tokio",
"want",
]
[[package]]
name = "hyper-rustls"
version = "0.24.1"
@ -2460,47 +2439,10 @@ checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97"
dependencies = [
"futures-util",
"http 0.2.11",
"hyper 0.14.27",
"hyper",
"rustls 0.21.12",
"tokio",
"tokio-rustls 0.24.1",
]
[[package]]
name = "hyper-rustls"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
dependencies = [
"futures-util",
"http 1.0.0",
"hyper 1.3.1",
"hyper-util",
"rustls 0.22.4",
"rustls-pki-types",
"tokio",
"tokio-rustls 0.25.0",
"tower-service",
]
[[package]]
name = "hyper-util"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa"
dependencies = [
"bytes",
"futures-channel",
"futures-util",
"http 1.0.0",
"http-body 1.0.0",
"hyper 1.3.1",
"pin-project-lite",
"socket2 0.5.5",
"tokio",
"tower",
"tower-service",
"tracing",
"tokio-rustls",
]
[[package]]
@ -3426,10 +3368,9 @@ dependencies = [
"rand",
"rayon",
"regex",
"reqwest 0.11.23",
"reqwest 0.12.4",
"reqwest",
"rustls 0.21.12",
"rustls-pemfile 1.0.4",
"rustls-pemfile",
"segment",
"serde",
"serde_json",
@ -3595,6 +3536,7 @@ dependencies = [
"rand",
"rand_pcg",
"rayon",
"rhai",
"roaring",
"rstar",
"serde",
@ -4467,9 +4409,9 @@ dependencies = [
"futures-util",
"h2",
"http 0.2.11",
"http-body 0.4.5",
"hyper 0.14.27",
"hyper-rustls 0.24.1",
"http-body",
"hyper",
"hyper-rustls",
"ipnet",
"js-sys",
"log",
@ -4478,13 +4420,13 @@ dependencies = [
"percent-encoding",
"pin-project-lite",
"rustls 0.21.12",
"rustls-pemfile 1.0.4",
"rustls-pemfile",
"serde",
"serde_json",
"serde_urlencoded",
"system-configuration",
"tokio",
"tokio-rustls 0.24.1",
"tokio-rustls",
"tokio-util",
"tower-service",
"url",
@ -4493,48 +4435,7 @@ dependencies = [
"wasm-streams",
"web-sys",
"webpki-roots 0.25.3",
"winreg 0.50.0",
]
[[package]]
name = "reqwest"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10"
dependencies = [
"base64 0.22.1",
"bytes",
"futures-core",
"futures-util",
"http 1.0.0",
"http-body 1.0.0",
"http-body-util",
"hyper 1.3.1",
"hyper-rustls 0.26.0",
"hyper-util",
"ipnet",
"js-sys",
"log",
"mime",
"once_cell",
"percent-encoding",
"pin-project-lite",
"rustls 0.22.4",
"rustls-pemfile 2.1.2",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tokio-rustls 0.25.0",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"webpki-roots 0.26.1",
"winreg 0.52.0",
"winreg",
]
[[package]]
@ -4543,6 +4444,35 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
[[package]]
name = "rhai"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a7d88770120601ba1e548bb6bc2a05019e54ff01b51479e38e64ec3b59d4759"
dependencies = [
"ahash",
"bitflags 2.5.0",
"instant",
"num-traits",
"once_cell",
"rhai_codegen",
"serde",
"smallvec",
"smartstring",
"thin-vec",
]
[[package]]
name = "rhai_codegen"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59aecf17969c04b9c0c5d21f6bc9da9fec9dd4980e64d1871443a476589d8c86"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.60",
]
[[package]]
name = "ring"
version = "0.17.8"
@ -4665,16 +4595,6 @@ dependencies = [
"base64 0.21.7",
]
[[package]]
name = "rustls-pemfile"
version = "2.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d"
dependencies = [
"base64 0.22.1",
"rustls-pki-types",
]
[[package]]
name = "rustls-pki-types"
version = "1.4.1"
@ -4751,12 +4671,12 @@ dependencies = [
[[package]]
name = "segment"
version = "0.2.4"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bdca318192c89bb31bffa2ef8e9e9898bc80f15a78db2fdd41cd051f1b41d01"
checksum = "12485833e00457a6bbba60397d3f19362751a0caefe27f6755fff1a2be4fd601"
dependencies = [
"async-trait",
"reqwest 0.12.4",
"reqwest",
"serde",
"serde_json",
"thiserror",
@ -4780,9 +4700,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
[[package]]
name = "serde"
version = "1.0.201"
version = "1.0.198"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "780f1cebed1629e4753a1a38a3c72d30b97ec044f0aef68cb26650a3c5cf363c"
checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc"
dependencies = [
"serde_derive",
]
@ -4798,9 +4718,9 @@ dependencies = [
[[package]]
name = "serde_derive"
version = "1.0.201"
version = "1.0.198"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5e405930b9796f1c00bee880d03fc7e0bb4b9a11afc776885ffe84320da2865"
checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9"
dependencies = [
"proc-macro2",
"quote",
@ -4966,6 +4886,9 @@ name = "smallvec"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e"
dependencies = [
"serde",
]
[[package]]
name = "smartstring"
@ -4974,6 +4897,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29"
dependencies = [
"autocfg",
"serde",
"static_assertions",
"version_check",
]
@ -5109,12 +5033,6 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "sync_wrapper"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
[[package]]
name = "synchronoise"
version = "1.0.1"
@ -5227,19 +5145,28 @@ dependencies = [
]
[[package]]
name = "thiserror"
version = "1.0.60"
name = "thin-vec"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "579e9083ca58dd9dcf91a9923bb9054071b9ebbd800b342194c9feb0ee89fc18"
checksum = "a38c90d48152c236a3ab59271da4f4ae63d678c5d7ad6b7714d7cb9760be5e4b"
dependencies = [
"serde",
]
[[package]]
name = "thiserror"
version = "1.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.60"
version = "1.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524"
checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
dependencies = [
"proc-macro2",
"quote",
@ -5273,9 +5200,9 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.36"
version = "0.3.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749"
dependencies = [
"deranged",
"itoa",
@ -5296,14 +5223,23 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]]
name = "time-macros"
version = "0.2.18"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf"
checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774"
dependencies = [
"num-conv",
"time-core",
]
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]]
name = "tinytemplate"
version = "1.2.1"
@ -5399,17 +5335,6 @@ dependencies = [
"tokio",
]
[[package]]
name = "tokio-rustls"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f"
dependencies = [
"rustls 0.22.4",
"rustls-pki-types",
"tokio",
]
[[package]]
name = "tokio-stream"
version = "0.1.14"
@ -5469,28 +5394,6 @@ dependencies = [
"winnow",
]
[[package]]
name = "tower"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
dependencies = [
"futures-core",
"futures-util",
"pin-project",
"pin-project-lite",
"tokio",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tower-layer"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0"
[[package]]
name = "tower-service"
version = "0.3.2"
@ -6239,16 +6142,6 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "winreg"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5"
dependencies = [
"cfg-if",
"windows-sys 0.48.0",
]
[[package]]
name = "xattr"
version = "1.0.1"
@ -6268,7 +6161,7 @@ dependencies = [
"clap",
"futures-core",
"futures-util",
"reqwest 0.11.23",
"reqwest",
"serde",
"serde_json",
"sha2",

View File

@ -166,6 +166,7 @@ impl From<KindWithContent> for KindDump {
documents_count,
allow_index_creation,
},
KindWithContent::DocumentEdition { .. } => todo!(),
KindWithContent::DocumentDeletion { documents_ids, .. } => {
KindDump::DocumentDeletion { documents_ids }
}

View File

@ -24,6 +24,7 @@ enum AutobatchKind {
allow_index_creation: bool,
primary_key: Option<String>,
},
DocumentEdition,
DocumentDeletion,
DocumentDeletionByFilter,
DocumentClear,
@ -63,6 +64,7 @@ impl From<KindWithContent> for AutobatchKind {
primary_key,
..
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
KindWithContent::DocumentDeletionByFilter { .. } => {
@ -98,6 +100,9 @@ pub enum BatchKind {
primary_key: Option<String>,
operation_ids: Vec<TaskId>,
},
DocumentEdition {
id: TaskId,
},
DocumentDeletion {
deletion_ids: Vec<TaskId>,
},
@ -199,6 +204,7 @@ impl BatchKind {
}),
allow_index_creation,
),
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
K::DocumentDeletion => {
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
}
@ -222,7 +228,7 @@ impl BatchKind {
match (self, kind) {
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this),
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::DocumentDeletionByFilter) => Break(this),
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
Break(this)
@ -519,6 +525,7 @@ impl BatchKind {
| BatchKind::IndexDeletion { .. }
| BatchKind::IndexUpdate { .. }
| BatchKind::IndexSwap { .. }
| BatchKind::DocumentEdition { .. }
| BatchKind::DocumentDeletionByFilter { .. },
_,
) => {

View File

@ -31,7 +31,7 @@ use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
};
use meilisearch_types::milli::{self, Filter};
use meilisearch_types::milli::{self, Filter, Object};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@ -103,6 +103,10 @@ pub(crate) enum IndexOperation {
operations: Vec<DocumentOperation>,
tasks: Vec<Task>,
},
DocumentEdition {
index_uid: String,
task: Task,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
@ -161,7 +165,8 @@ impl Batch {
| IndexOperation::DocumentClear { tasks, .. } => {
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
}
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
IndexOperation::DocumentEdition { task, .. }
| IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
IndexOperation::SettingsAndDocumentOperation {
@ -225,6 +230,7 @@ impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentEdition { index_uid, .. }
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
@ -240,6 +246,9 @@ impl fmt::Display for IndexOperation {
IndexOperation::DocumentOperation { .. } => {
f.write_str("IndexOperation::DocumentOperation")
}
IndexOperation::DocumentEdition { .. } => {
f.write_str("IndexOperation::DocumentEdition")
}
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
}
@ -292,6 +301,21 @@ impl IndexScheduler {
_ => unreachable!(),
}
}
BatchKind::DocumentEdition { id } => {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
match &task.kind {
KindWithContent::DocumentEdition { index_uid, .. } => {
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentEdition {
index_uid: index_uid.clone(),
task,
},
must_create_index: false,
}))
}
_ => unreachable!(),
}
}
BatchKind::DocumentOperation { method, operation_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
let primary_key = tasks
@ -1334,6 +1358,64 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::DocumentEdition { mut task, .. } => {
let (filter, context, function) =
if let KindWithContent::DocumentEdition {
filter_expr, context, function, ..
} = &task.kind
{
(filter_expr, context, function)
} else {
unreachable!()
};
let result_count = edit_documents_by_function(
index_wtxn,
filter,
context.clone(),
function,
self.index_mapper.indexer_config(),
self.must_stop_processing.clone(),
index,
);
let (original_filter, context, function) = if let Some(Details::DocumentEdition {
original_filter,
context,
function,
..
}) = task.details
{
(original_filter, context, function)
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
match result_count {
Ok((deleted_documents, edited_documents)) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentEdition {
original_filter,
context,
function,
deleted_documents: Some(deleted_documents),
edited_documents: Some(edited_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentEdition {
original_filter,
context,
function,
deleted_documents: Some(0),
edited_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok(vec![task])
}
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let filter =
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
@ -1622,3 +1704,44 @@ fn delete_document_by_filter<'a>(
0
})
}
fn edit_documents_by_function<'a>(
wtxn: &mut RwTxn<'a>,
filter: &Option<serde_json::Value>,
context: Option<Object>,
code: &str,
indexer_config: &IndexerConfig,
must_stop_processing: MustStopProcessing,
index: &'a Index,
) -> Result<(u64, u64)> {
let candidates = match filter.as_ref().map(Filter::from_json) {
Some(Ok(Some(filter))) => filter.evaluate(wtxn, index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
}
e => e.into(),
})?,
None | Some(Ok(None)) => index.documents_ids(wtxn)?,
Some(Err(e)) => return Err(e.into()),
};
let config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let mut builder = milli::update::IndexDocuments::new(
wtxn,
index,
indexer_config,
config,
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)?;
let (new_builder, count) = builder.edit_documents(&candidates, context, code)?;
builder = new_builder;
let _ = builder.execute()?;
Ok(count.unwrap())
}

View File

@ -178,6 +178,17 @@ fn snapshot_details(d: &Details) -> String {
} => {
format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}")
}
Details::DocumentEdition {
deleted_documents,
edited_documents,
original_filter,
context,
function,
} => {
format!(
"{{ deleted_documents: {deleted_documents:?}, edited_documents: {edited_documents:?}, context: {context:?}, function: {function:?}, original_filter: {original_filter:?} }}"
)
}
Details::SettingsUpdate { settings } => {
format!("{{ settings: {settings:?} }}")
}

View File

@ -4749,6 +4749,7 @@ mod tests {
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
@ -4780,6 +4781,7 @@ mod tests {
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
@ -4818,6 +4820,7 @@ mod tests {
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,
@ -4857,6 +4860,7 @@ mod tests {
"types": {
"documentAdditionOrUpdate": 0,
"documentDeletion": 0,
"documentEdition": 0,
"dumpCreation": 0,
"indexCreation": 3,
"indexDeletion": 0,

View File

@ -238,6 +238,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
let mut index_uids = vec![];
match &mut task.kind {
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
K::DocumentEdition { index_uid, .. } => index_uids.push(index_uid),
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
K::DocumentClear { index_uid } => index_uids.push(index_uid),
@ -408,7 +409,26 @@ impl IndexScheduler {
match status {
Status::Succeeded => assert!(indexed_documents <= received_documents),
Status::Failed | Status::Canceled => assert_eq!(indexed_documents, 0),
status => panic!("DocumentAddition can't have an indexed_document set if it's {}", status),
status => panic!("DocumentAddition can't have an indexed_documents set if it's {}", status),
}
}
None => {
assert!(matches!(status, Status::Enqueued | Status::Processing))
}
}
}
Details::DocumentEdition { edited_documents, .. } => {
assert_eq!(kind.as_kind(), Kind::DocumentEdition);
match edited_documents {
Some(edited_documents) => {
assert!(matches!(
status,
Status::Succeeded | Status::Failed | Status::Canceled
));
match status {
Status::Succeeded => (),
Status::Failed | Status::Canceled => assert_eq!(edited_documents, 0),
status => panic!("DocumentEdition can't have an edited_documents set if it's {}", status),
}
}
None => {

View File

@ -1,3 +1,4 @@
use milli::Object;
use serde::Serialize;
use time::{Duration, OffsetDateTime};
@ -54,6 +55,8 @@ pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub indexed_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub edited_documents: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub provided_ids: Option<usize>,
@ -70,6 +73,10 @@ pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub context: Option<Option<Object>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub function: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub settings: Option<Box<Settings<Unchecked>>>,
#[serde(skip_serializing_if = "Option::is_none")]
@ -86,6 +93,20 @@ impl From<Details> for DetailsView {
..DetailsView::default()
}
}
Details::DocumentEdition {
deleted_documents,
edited_documents,
original_filter,
context,
function,
} => DetailsView {
deleted_documents: Some(deleted_documents),
edited_documents: Some(edited_documents),
original_filter: Some(original_filter),
context: Some(context),
function: Some(function),
..DetailsView::default()
},
Details::SettingsUpdate { mut settings } => {
settings.hide_secrets();
DetailsView { settings: Some(settings), ..DetailsView::default() }

View File

@ -5,6 +5,7 @@ use std::str::FromStr;
use enum_iterator::Sequence;
use milli::update::IndexDocumentsMethod;
use milli::Object;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize, Serializer};
use time::{Duration, OffsetDateTime};
@ -48,6 +49,7 @@ impl Task {
| TaskDeletion { .. }
| IndexSwap { .. } => None,
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
| DocumentDeletion { index_uid, .. }
| DocumentDeletionByFilter { index_uid, .. }
| DocumentClear { index_uid }
@ -67,7 +69,8 @@ impl Task {
pub fn content_uuid(&self) -> Option<Uuid> {
match self.kind {
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file),
KindWithContent::DocumentDeletion { .. }
KindWithContent::DocumentEdition { .. }
| KindWithContent::DocumentDeletion { .. }
| KindWithContent::DocumentDeletionByFilter { .. }
| KindWithContent::DocumentClear { .. }
| KindWithContent::SettingsUpdate { .. }
@ -94,6 +97,12 @@ pub enum KindWithContent {
documents_count: u64,
allow_index_creation: bool,
},
DocumentEdition {
index_uid: String,
filter_expr: Option<serde_json::Value>,
context: Option<milli::Object>,
function: String,
},
DocumentDeletion {
index_uid: String,
documents_ids: Vec<String>,
@ -150,6 +159,7 @@ impl KindWithContent {
pub fn as_kind(&self) -> Kind {
match self {
KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate,
KindWithContent::DocumentEdition { .. } => Kind::DocumentEdition,
KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion,
KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion,
KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion,
@ -174,6 +184,7 @@ impl KindWithContent {
| TaskCancelation { .. }
| TaskDeletion { .. } => vec![],
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
| DocumentDeletion { index_uid, .. }
| DocumentDeletionByFilter { index_uid, .. }
| DocumentClear { index_uid }
@ -202,6 +213,15 @@ impl KindWithContent {
indexed_documents: None,
})
}
KindWithContent::DocumentEdition { index_uid: _, filter_expr, context, function } => {
Some(Details::DocumentEdition {
deleted_documents: None,
edited_documents: None,
original_filter: filter_expr.as_ref().map(|v| v.to_string()),
context: context.clone(),
function: function.clone(),
})
}
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
Some(Details::DocumentDeletion {
provided_ids: documents_ids.len(),
@ -250,6 +270,15 @@ impl KindWithContent {
indexed_documents: Some(0),
})
}
KindWithContent::DocumentEdition { index_uid: _, filter_expr, context, function } => {
Some(Details::DocumentEdition {
deleted_documents: Some(0),
edited_documents: Some(0),
original_filter: filter_expr.as_ref().map(|v| v.to_string()),
context: context.clone(),
function: function.clone(),
})
}
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
Some(Details::DocumentDeletion {
provided_ids: documents_ids.len(),
@ -301,6 +330,7 @@ impl From<&KindWithContent> for Option<Details> {
indexed_documents: None,
})
}
KindWithContent::DocumentEdition { .. } => None,
KindWithContent::DocumentDeletion { .. } => None,
KindWithContent::DocumentDeletionByFilter { .. } => None,
KindWithContent::DocumentClear { .. } => None,
@ -394,6 +424,7 @@ impl std::error::Error for ParseTaskStatusError {}
#[serde(rename_all = "camelCase")]
pub enum Kind {
DocumentAdditionOrUpdate,
DocumentEdition,
DocumentDeletion,
SettingsUpdate,
IndexCreation,
@ -410,6 +441,7 @@ impl Kind {
pub fn related_to_one_index(&self) -> bool {
match self {
Kind::DocumentAdditionOrUpdate
| Kind::DocumentEdition
| Kind::DocumentDeletion
| Kind::SettingsUpdate
| Kind::IndexCreation
@ -427,6 +459,7 @@ impl Display for Kind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"),
Kind::DocumentEdition => write!(f, "documentEdition"),
Kind::DocumentDeletion => write!(f, "documentDeletion"),
Kind::SettingsUpdate => write!(f, "settingsUpdate"),
Kind::IndexCreation => write!(f, "indexCreation"),
@ -454,6 +487,8 @@ impl FromStr for Kind {
Ok(Kind::IndexDeletion)
} else if kind.eq_ignore_ascii_case("documentAdditionOrUpdate") {
Ok(Kind::DocumentAdditionOrUpdate)
} else if kind.eq_ignore_ascii_case("documentEdition") {
Ok(Kind::DocumentEdition)
} else if kind.eq_ignore_ascii_case("documentDeletion") {
Ok(Kind::DocumentDeletion)
} else if kind.eq_ignore_ascii_case("settingsUpdate") {
@ -495,16 +530,50 @@ impl std::error::Error for ParseTaskKindError {}
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub enum Details {
DocumentAdditionOrUpdate { received_documents: u64, indexed_documents: Option<u64> },
SettingsUpdate { settings: Box<Settings<Unchecked>> },
IndexInfo { primary_key: Option<String> },
DocumentDeletion { provided_ids: usize, deleted_documents: Option<u64> },
DocumentDeletionByFilter { original_filter: String, deleted_documents: Option<u64> },
ClearAll { deleted_documents: Option<u64> },
TaskCancelation { matched_tasks: u64, canceled_tasks: Option<u64>, original_filter: String },
TaskDeletion { matched_tasks: u64, deleted_tasks: Option<u64>, original_filter: String },
Dump { dump_uid: Option<String> },
IndexSwap { swaps: Vec<IndexSwap> },
DocumentAdditionOrUpdate {
received_documents: u64,
indexed_documents: Option<u64>,
},
DocumentEdition {
deleted_documents: Option<u64>,
edited_documents: Option<u64>,
original_filter: Option<String>,
context: Option<Object>,
function: String,
},
SettingsUpdate {
settings: Box<Settings<Unchecked>>,
},
IndexInfo {
primary_key: Option<String>,
},
DocumentDeletion {
provided_ids: usize,
deleted_documents: Option<u64>,
},
DocumentDeletionByFilter {
original_filter: String,
deleted_documents: Option<u64>,
},
ClearAll {
deleted_documents: Option<u64>,
},
TaskCancelation {
matched_tasks: u64,
canceled_tasks: Option<u64>,
original_filter: String,
},
TaskDeletion {
matched_tasks: u64,
deleted_tasks: Option<u64>,
original_filter: String,
},
Dump {
dump_uid: Option<String>,
},
IndexSwap {
swaps: Vec<IndexSwap>,
},
}
impl Details {
@ -514,6 +583,7 @@ impl Details {
Self::DocumentAdditionOrUpdate { indexed_documents, .. } => {
*indexed_documents = Some(0)
}
Self::DocumentEdition { edited_documents, .. } => *edited_documents = Some(0),
Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0),
Self::DocumentDeletionByFilter { deleted_documents, .. } => {
*deleted_documents = Some(0)

View File

@ -71,13 +71,13 @@ puffin = { version = "0.16.0", features = ["serialization"] }
rand = "0.8.5"
rayon = "1.8.0"
regex = "1.10.2"
reqwest = { version = "0.12.4", features = [
reqwest = { version = "0.11.23", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = "0.21.12"
rustls-pemfile = "1.0.2"
segment = { version = "0.2.4", optional = true }
segment = { version = "0.2.3", optional = true }
serde = { version = "1.0.195", features = ["derive"] }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
sha2 = "0.10.8"

View File

@ -81,6 +81,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
)
.service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
.service(web::resource("/edit").route(web::post().to(SeqHandler(edit_documents_by_function))))
.service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
.service(
web::resource("/{document_id}")
@ -553,6 +554,66 @@ pub async fn delete_documents_by_filter(
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct DocumentEditionByFunction {
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
context: Option<Value>,
#[deserr(error = DeserrJsonError<InvalidDocumentFilter>, missing_field_error = DeserrJsonError::missing_document_filter)]
function: String,
}
pub async fn edit_documents_by_function(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
_analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Edit documents by function");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let DocumentEditionByFunction { filter, context, function } = body.into_inner();
// analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
let engine = milli::rhai::Engine::new();
if let Err(e) = engine.compile(&function) {
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
}
if let Some(ref filter) = filter {
// we ensure the filter is well formed before enqueuing it
|| -> Result<_, ResponseError> {
Ok(crate::search::parse_filter(filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
}()
// and whatever was the error, the error code should always be an InvalidDocumentFilter
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
}
let task = KindWithContent::DocumentEdition {
index_uid,
filter_expr: filter,
context: context.map(|v| match v {
serde_json::Value::Object(m) => m,
_ => panic!("The context must be an Object"),
}),
function,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Delete documents by filter");
Ok(HttpResponse::Accepted().json(task))
}
pub async fn clear_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,

View File

@ -591,7 +591,7 @@ mod tests {
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"

View File

@ -97,7 +97,7 @@ async fn task_bad_types() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
@ -108,7 +108,7 @@ async fn task_bad_types() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
@ -119,7 +119,7 @@ async fn task_bad_types() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"

View File

@ -87,6 +87,7 @@ rand = "0.8.5"
tracing = "0.1.40"
ureq = { version = "2.9.7", features = ["json"] }
url = "2.5.0"
rhai = { version = "1.18.0", features = ["serde", "no_module", "no_custom_syntax"] }
[dev-dependencies]
mimalloc = { version = "0.1.39", default-features = false }

View File

@ -44,7 +44,7 @@ pub use search::new::{
};
use serde_json::Value;
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
pub use {charabia as tokenizer, heed};
pub use {charabia as tokenizer, heed, rhai};
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
pub use self::criterion::{default_criteria, Criterion, CriterionError};

View File

@ -15,6 +15,7 @@ use grenad::{Merger, MergerBuilder};
use heed::types::Str;
use heed::Database;
use rand::SeedableRng;
use rhai::{Dynamic, Engine, OptimizationLevel, Scope};
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use slice_group_by::GroupBy;
@ -31,7 +32,7 @@ pub use self::helpers::{
};
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
pub use self::transform::{Transform, TransformOutput};
use crate::documents::{obkv_to_object, DocumentsBatchReader};
use crate::documents::{obkv_to_object, DocumentsBatchBuilder, DocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
pub use crate::update::index_documents::helpers::CursorClonableMmap;
@ -39,7 +40,7 @@ use crate::update::{
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
};
use crate::vector::EmbeddingConfigs;
use crate::{CboRoaringBitmapCodec, Index, Result};
use crate::{all_obkv_to_json, CboRoaringBitmapCodec, FieldsIdsMap, Index, Object, Result};
static MERGED_DATABASE_COUNT: usize = 7;
static PREFIX_DATABASE_COUNT: usize = 4;
@ -173,6 +174,119 @@ where
Ok((self, Ok(indexed_documents)))
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn edit_documents(
self,
documents: &RoaringBitmap,
context: Option<Object>,
code: &str,
) -> Result<(Self, StdResult<(u64, u64), UserError>)> {
// Early return when there is no document to add
if documents.is_empty() {
return Ok((self, Ok((0, 0))));
}
/// Transform every field of a raw obkv store into a Rhai Map.
pub fn all_obkv_to_rhaimap(
obkv: obkv::KvReaderU16,
fields_ids_map: &FieldsIdsMap,
) -> Result<rhai::Map> {
let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
all_keys
.iter()
.copied()
.flat_map(|id| obkv.get(id).map(|value| (id, value)))
.map(|(id, value)| {
let name = fields_ids_map.name(id).ok_or(
crate::error::FieldIdMapMissingEntry::FieldId {
field_id: id,
process: "allobkv_to_rhaimap",
},
)?;
let value = serde_json::from_slice(value)
.map_err(crate::error::InternalError::SerdeJson)?;
Ok((name.into(), value))
})
.collect()
}
fn rhaimap_to_object(map: rhai::Map) -> Object {
let mut output = Object::new();
for (key, value) in map {
let value = serde_json::to_value(&value).unwrap();
output.insert(key.into(), value);
}
output
}
let mut engine = Engine::new();
engine.set_optimization_level(OptimizationLevel::Full);
//It is an arbitrary value. We need to let users define this in the settings.
engine.set_max_operations(1_000_000);
let ast = engine.compile(code).unwrap();
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let primary_key = self.index.primary_key(self.wtxn)?.unwrap();
let primary_key_id = fields_ids_map.id(primary_key).unwrap();
let mut documents_batch_builder = tempfile::tempfile().map(DocumentsBatchBuilder::new)?;
let mut documents_to_remove = RoaringBitmap::new();
let context: Dynamic = match context {
Some(context) => serde_json::from_value(context.into()).unwrap(),
None => Dynamic::from(()),
};
for docid in documents {
let (document, document_object, document_id) =
match self.index.documents.get(self.wtxn, &docid)? {
Some(obkv) => {
let document_id_bytes = obkv.get(primary_key_id).unwrap();
let document_id: serde_json::Value =
serde_json::from_slice(document_id_bytes).unwrap();
let document = all_obkv_to_rhaimap(obkv, &fields_ids_map)?;
let document_object = all_obkv_to_json(obkv, &fields_ids_map)?;
(document, document_object, document_id)
}
None => panic!("documents must exist"),
};
let mut scope = Scope::new();
scope.push_constant_dynamic("context", context.clone());
scope.push("doc", document);
let _ = engine.eval_ast_with_scope::<Dynamic>(&mut scope, &ast).unwrap();
let new_document = match scope.remove::<Dynamic>("doc") {
// If the "doc" variable has been removed from the scope
// or set to (), we effectively delete the document.
Some(doc) if doc.is_unit() => {
documents_to_remove.push(docid);
continue;
}
None => unreachable!(),
Some(document) => match document.try_cast() {
Some(document) => rhaimap_to_object(document),
None => panic!("Why is \"doc\" no longer a Map?"),
},
};
if document_object != new_document {
assert_eq!(
Some(&document_id),
new_document.get(primary_key),
"you cannot change the document id when editing documents"
);
documents_batch_builder.append_json_object(&new_document)?;
}
}
let file = documents_batch_builder.into_inner()?;
let reader = DocumentsBatchReader::from_reader(file)?;
let (this, removed) = self.remove_documents_from_db_no_batch(&documents_to_remove)?;
let (this, result) = this.add_documents(reader)?;
Ok((this, result.map(|added| (removed, added))))
}
pub fn with_embedders(mut self, embedders: EmbeddingConfigs) -> Self {
self.embedders = embedders;
self