mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-23 21:26:02 +00:00
Compare commits
34 Commits
prototype-
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
685663af3c | ||
|
|
14de657d36 | ||
|
|
9a36c090bf | ||
|
|
3aca010b42 | ||
|
|
62c11ce3f3 | ||
|
|
f358538f4f | ||
|
|
9068857ba1 | ||
|
|
d241157084 | ||
|
|
69f73b1d74 | ||
|
|
202794f620 | ||
|
|
38cbd54604 | ||
|
|
3877e0043c | ||
|
|
f95398420b | ||
|
|
53905c1362 | ||
|
|
113aac8815 | ||
|
|
4502af5aed | ||
|
|
06af68aa07 | ||
|
|
6d378c6397 | ||
|
|
ec0c0cf779 | ||
|
|
851694e323 | ||
|
|
ea92c64fdc | ||
|
|
dc36f681be | ||
|
|
48f1987a8d | ||
|
|
9f79ce82af | ||
|
|
5f18a9b2ee | ||
|
|
7f8a1ac0be | ||
|
|
1a67163ee8 | ||
|
|
38141de68d | ||
|
|
7a98b80687 | ||
|
|
229a12c8e6 | ||
|
|
2fdfe79400 | ||
|
|
9184b12a26 | ||
|
|
742378d8e1 | ||
|
|
6dcd739a8b |
21
Cargo.lock
generated
21
Cargo.lock
generated
@@ -453,9 +453,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
|
||||
|
||||
[[package]]
|
||||
name = "arroy"
|
||||
version = "0.6.3"
|
||||
version = "0.6.4-nested-rtxns"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8578a72223dfa13dfd9fc144d15260d134361789ebdea9b16e85a511edc73c7d"
|
||||
checksum = "fb8b6b34d9d83e3b837cb7f72a439dbd2293b102393c084af5e5b097212e1532"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@@ -1075,9 +1075,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cellulite"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1-nested-rtxns-2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71a41aa2cd021bc3f23d97cc1e645848ca8c279fc757d1570ba7fe7ddc021290"
|
||||
checksum = "f22d721963ead1a144f10cb8b53dc9469e760723b069123c7c7bc675c7354270"
|
||||
dependencies = [
|
||||
"crossbeam",
|
||||
"geo",
|
||||
@@ -2758,9 +2758,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hannoy"
|
||||
version = "0.0.8"
|
||||
version = "0.0.9-nested-rtxns"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0dba13a271c49a119a97862ebf0a74131d879832868400d9fcd937b790058fdd"
|
||||
checksum = "cc5a945b92b063e677d658cfcc7cb6dec2502fe44631f017084938f14d6ce30e"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@@ -2838,9 +2838,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "heed"
|
||||
version = "0.22.0"
|
||||
version = "0.22.1-nested-rtxns"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393"
|
||||
checksum = "0ff115ba5712b1f1fc7617b195f5c2f139e29c397ff79da040cd19db75ccc240"
|
||||
dependencies = [
|
||||
"bitflags 2.9.4",
|
||||
"byteorder",
|
||||
@@ -2850,7 +2850,6 @@ dependencies = [
|
||||
"lmdb-master-sys",
|
||||
"once_cell",
|
||||
"page_size",
|
||||
"serde",
|
||||
"synchronoise",
|
||||
"url",
|
||||
]
|
||||
@@ -3889,9 +3888,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
|
||||
|
||||
[[package]]
|
||||
name = "lmdb-master-sys"
|
||||
version = "0.2.5"
|
||||
version = "0.2.6-nested-rtxns"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df"
|
||||
checksum = "f4ff85130e3c994b36877045fbbb138d521dea7197bfc19dc3d5d95101a8e20a"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"doxygen-rs",
|
||||
|
||||
@@ -158,6 +158,9 @@ pub enum KindDump {
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Task> for TaskDump {
|
||||
@@ -240,6 +243,9 @@ impl From<KindWithContent> for KindDump {
|
||||
KindWithContent::UpgradeDatabase { from: version } => {
|
||||
KindDump::UpgradeDatabase { from: version }
|
||||
}
|
||||
KindWithContent::IndexCompaction { index_uid } => {
|
||||
KindDump::IndexCompaction { index_uid }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -234,6 +234,9 @@ impl<'a> Dump<'a> {
|
||||
}
|
||||
}
|
||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||
KindDump::IndexCompaction { index_uid } => {
|
||||
KindWithContent::IndexCompaction { index_uid }
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -341,6 +341,26 @@ impl IndexMapper {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Closes the specified index.
|
||||
///
|
||||
/// This operation involves closing the underlying environment and so can take a long time to complete.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
|
||||
/// in memory hash map.
|
||||
pub fn close_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
|
||||
let uuid = self
|
||||
.index_mapping
|
||||
.get(rtxn, name)?
|
||||
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
|
||||
|
||||
// We remove the index from the in-memory index map.
|
||||
self.index_map.write().unwrap().close_for_resize(&uuid, self.enable_mdb_writemap, 0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return an index, may open it if it wasn't already opened.
|
||||
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
|
||||
if let Some((current_name, current_index)) =
|
||||
|
||||
@@ -317,6 +317,9 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
|
||||
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -138,6 +138,17 @@ make_enum_progress! {
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum IndexCompaction {
|
||||
RetrieveTheIndex,
|
||||
CreateTemporaryFile,
|
||||
CopyAndCompactTheIndex,
|
||||
PersistTheCompactedIndex,
|
||||
CloseTheIndex,
|
||||
ReopenTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum InnerSwappingTwoIndexes {
|
||||
RetrieveTheTasks,
|
||||
|
||||
@@ -25,6 +25,7 @@ enum AutobatchKind {
|
||||
IndexDeletion,
|
||||
IndexUpdate,
|
||||
IndexSwap,
|
||||
IndexCompaction,
|
||||
}
|
||||
|
||||
impl AutobatchKind {
|
||||
@@ -68,6 +69,7 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
|
||||
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
|
||||
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
|
||||
KindWithContent::IndexCompaction { .. } => AutobatchKind::IndexCompaction,
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
@@ -118,6 +120,9 @@ pub enum BatchKind {
|
||||
IndexSwap {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexCompaction {
|
||||
id: TaskId,
|
||||
},
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
@@ -183,6 +188,13 @@ impl BatchKind {
|
||||
)),
|
||||
false,
|
||||
),
|
||||
K::IndexCompaction => (
|
||||
Break((
|
||||
BatchKind::IndexCompaction { id: task_id },
|
||||
BatchStopReason::TaskCannotBeBatched { kind, id: task_id },
|
||||
)),
|
||||
false,
|
||||
),
|
||||
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
|
||||
K::DocumentImport { allow_index_creation, primary_key: pk }
|
||||
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
|
||||
@@ -287,8 +299,10 @@ impl BatchKind {
|
||||
};
|
||||
|
||||
match (self, autobatch_kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break((this, BatchStopReason::TaskCannotBeBatched { kind, id })),
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::IndexCompaction) => {
|
||||
Break((this, BatchStopReason::TaskCannotBeBatched { kind, id }))
|
||||
},
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||
Break((this, BatchStopReason::IndexCreationMismatch { id }))
|
||||
@@ -483,6 +497,7 @@ impl BatchKind {
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
| BatchKind::IndexUpdate { .. }
|
||||
| BatchKind::IndexSwap { .. }
|
||||
| BatchKind::IndexCompaction { .. }
|
||||
| BatchKind::DocumentEdition { .. },
|
||||
_,
|
||||
) => {
|
||||
|
||||
@@ -55,6 +55,10 @@ pub(crate) enum Batch {
|
||||
UpgradeDatabase {
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -110,7 +114,8 @@ impl Batch {
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::Export { task }
|
||||
| Batch::IndexUpdate { task, .. } => {
|
||||
| Batch::IndexUpdate { task, .. }
|
||||
| Batch::IndexCompaction { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
Batch::SnapshotCreation(tasks)
|
||||
@@ -155,7 +160,8 @@ impl Batch {
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid, .. } => Some(index_uid),
|
||||
| IndexDeletion { index_uid, .. }
|
||||
| IndexCompaction { index_uid, .. } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -175,6 +181,7 @@ impl fmt::Display for Batch {
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
Batch::IndexCompaction { .. } => f.write_str("IndexCompaction")?,
|
||||
Batch::Export { .. } => f.write_str("Export")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
};
|
||||
@@ -430,6 +437,12 @@ impl IndexScheduler {
|
||||
current_batch.processing(Some(&mut task));
|
||||
Ok(Some(Batch::IndexSwap { task }))
|
||||
}
|
||||
BatchKind::IndexCompaction { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
Ok(Some(Batch::IndexCompaction { index_uid, task }))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,22 +1,26 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::io::{Seek, SeekFrom};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use byte_unit::Byte;
|
||||
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::heed::CompactionOption;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self, ChannelCongestion};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use milli::update::Settings as MilliSettings;
|
||||
use roaring::RoaringBitmap;
|
||||
use tempfile::PersistError;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
|
||||
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
||||
UpdateIndexProgress,
|
||||
IndexCompaction, InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress,
|
||||
TaskDeletionProgress, UpdateIndexProgress,
|
||||
};
|
||||
use crate::utils::{
|
||||
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
|
||||
@@ -418,6 +422,47 @@ impl IndexScheduler {
|
||||
task.status = Status::Succeeded;
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexCompaction { index_uid: _, mut task } => {
|
||||
let KindWithContent::IndexCompaction { index_uid } = &task.kind else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let ret = catch_unwind(AssertUnwindSafe(|| {
|
||||
self.apply_compaction(&rtxn, &progress, index_uid)
|
||||
}));
|
||||
|
||||
let (pre_size, post_size) = match ret {
|
||||
Ok(Ok(stats)) => stats,
|
||||
Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask),
|
||||
Ok(Err(e)) => return Err(e),
|
||||
Err(e) => {
|
||||
let msg = match e.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match e.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
return Err(Error::Export(Box::new(Error::ProcessBatchPanicked(
|
||||
msg.to_string(),
|
||||
))));
|
||||
}
|
||||
};
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
if let Some(Details::IndexCompaction {
|
||||
index_uid: _,
|
||||
pre_compaction_size,
|
||||
post_compaction_size,
|
||||
}) = task.details.as_mut()
|
||||
{
|
||||
*pre_compaction_size = Some(Byte::from_u64(pre_size));
|
||||
*post_compaction_size = Some(Byte::from_u64(post_size));
|
||||
}
|
||||
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::Export { mut task } => {
|
||||
let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind
|
||||
else {
|
||||
@@ -493,6 +538,91 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_compaction(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
progress: &Progress,
|
||||
index_uid: &str,
|
||||
) -> Result<(u64, u64)> {
|
||||
// 1. Verify that the index exists
|
||||
if !self.index_mapper.index_exists(rtxn, index_uid)? {
|
||||
return Err(Error::IndexNotFound(index_uid.to_owned()));
|
||||
}
|
||||
|
||||
// 2. We retrieve the index and create a temporary file in the index directory
|
||||
progress.update_progress(IndexCompaction::RetrieveTheIndex);
|
||||
let index = self.index_mapper.index(rtxn, index_uid)?;
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
self.index_mapper
|
||||
.set_currently_updating_index(Some((index_uid.to_string(), index.clone())));
|
||||
|
||||
progress.update_progress(IndexCompaction::CreateTemporaryFile);
|
||||
let pre_size = std::fs::metadata(index.path().join("data.mdb"))?.len();
|
||||
let mut file = tempfile::Builder::new()
|
||||
.suffix("data.")
|
||||
.prefix(".mdb.cpy")
|
||||
.tempfile_in(index.path())?;
|
||||
|
||||
// 3. We copy the index data to the temporary file
|
||||
progress.update_progress(IndexCompaction::CopyAndCompactTheIndex);
|
||||
index
|
||||
.copy_to_file(file.as_file_mut(), CompactionOption::Enabled)
|
||||
.map_err(|error| Error::Milli { error, index_uid: Some(index_uid.to_string()) })?;
|
||||
// ...and reset the file position as specified in the documentation
|
||||
file.seek(SeekFrom::Start(0))?;
|
||||
|
||||
// 4. We replace the index data file with the temporary file
|
||||
progress.update_progress(IndexCompaction::PersistTheCompactedIndex);
|
||||
match file.persist(index.path().join("data.mdb")) {
|
||||
Ok(file) => file.sync_all()?,
|
||||
// TODO see if we have a _resource busy_ error and probably handle this by:
|
||||
// 1. closing the index, 2. replacing and 3. reopening it
|
||||
Err(PersistError { error, file: _ }) => return Err(Error::IoError(error)),
|
||||
};
|
||||
|
||||
// 5. Prepare to close the index
|
||||
progress.update_progress(IndexCompaction::CloseTheIndex);
|
||||
|
||||
// unmark that the index is the processing one so we don't keep a handle to it, preventing its closing
|
||||
self.index_mapper.set_currently_updating_index(None);
|
||||
|
||||
self.index_mapper.close_index(rtxn, index_uid)?;
|
||||
drop(index);
|
||||
|
||||
progress.update_progress(IndexCompaction::ReopenTheIndex);
|
||||
// 6. Reopen the index
|
||||
// The index will use the compacted data file when being reopened
|
||||
let index = self.index_mapper.index(rtxn, index_uid)?;
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let res = || -> Result<_> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, index_uid, &stats)?;
|
||||
wtxn.commit()?;
|
||||
Ok(stats.database_size)
|
||||
}();
|
||||
|
||||
let post_size = match res {
|
||||
Ok(post_size) => post_size,
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
);
|
||||
0
|
||||
}
|
||||
};
|
||||
|
||||
Ok((pre_size, post_size))
|
||||
}
|
||||
|
||||
/// Swap the index `lhs` with the index `rhs`.
|
||||
fn apply_index_swap(
|
||||
&self,
|
||||
|
||||
@@ -722,7 +722,7 @@ fn basic_get_stats() {
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -742,6 +742,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -753,10 +754,10 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -776,6 +777,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -787,7 +789,7 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
@@ -797,7 +799,7 @@ fn basic_get_stats() {
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -817,6 +819,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -828,7 +831,7 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
@@ -839,7 +842,7 @@ fn basic_get_stats() {
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
@@ -859,6 +862,7 @@ fn basic_get_stats() {
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCompaction": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -870,7 +874,7 @@ fn basic_get_stats() {
|
||||
"upgradeDatabase": 0
|
||||
}
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -256,14 +256,15 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
||||
use KindWithContent as K;
|
||||
let mut index_uids = vec![];
|
||||
match &mut task.kind {
|
||||
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentEdition { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentClear { index_uid } => index_uids.push(index_uid),
|
||||
K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::IndexDeletion { index_uid } => index_uids.push(index_uid),
|
||||
K::IndexCreation { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| K::DocumentEdition { index_uid, .. }
|
||||
| K::DocumentDeletion { index_uid, .. }
|
||||
| K::DocumentDeletionByFilter { index_uid, .. }
|
||||
| K::DocumentClear { index_uid }
|
||||
| K::SettingsUpdate { index_uid, .. }
|
||||
| K::IndexDeletion { index_uid }
|
||||
| K::IndexCreation { index_uid, .. }
|
||||
| K::IndexCompaction { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::IndexUpdate { index_uid, new_index_uid, .. } => {
|
||||
index_uids.push(index_uid);
|
||||
if let Some(new_uid) = new_index_uid {
|
||||
@@ -618,6 +619,13 @@ impl crate::IndexScheduler {
|
||||
Details::UpgradeDatabase { from: _, to: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
|
||||
}
|
||||
Details::IndexCompaction {
|
||||
index_uid: _,
|
||||
pre_compaction_size: _,
|
||||
post_compaction_size: _,
|
||||
} => {
|
||||
assert_eq!(kind.as_kind(), Kind::IndexCompaction);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -109,6 +109,7 @@ impl HeedAuthStore {
|
||||
Action::IndexesGet,
|
||||
Action::IndexesUpdate,
|
||||
Action::IndexesSwap,
|
||||
Action::IndexesCompact,
|
||||
]
|
||||
.iter(),
|
||||
);
|
||||
|
||||
@@ -380,6 +380,9 @@ pub enum Action {
|
||||
#[serde(rename = "webhooks.*")]
|
||||
#[deserr(rename = "webhooks.*")]
|
||||
WebhooksAll,
|
||||
#[serde(rename = "indexes.compact")]
|
||||
#[deserr(rename = "indexes.compact")]
|
||||
IndexesCompact,
|
||||
}
|
||||
|
||||
impl Action {
|
||||
@@ -398,6 +401,7 @@ impl Action {
|
||||
INDEXES_UPDATE => Some(Self::IndexesUpdate),
|
||||
INDEXES_DELETE => Some(Self::IndexesDelete),
|
||||
INDEXES_SWAP => Some(Self::IndexesSwap),
|
||||
INDEXES_COMPACT => Some(Self::IndexesCompact),
|
||||
TASKS_ALL => Some(Self::TasksAll),
|
||||
TASKS_CANCEL => Some(Self::TasksCancel),
|
||||
TASKS_DELETE => Some(Self::TasksDelete),
|
||||
@@ -462,6 +466,7 @@ impl Action {
|
||||
IndexesUpdate => false,
|
||||
IndexesDelete => false,
|
||||
IndexesSwap => false,
|
||||
IndexesCompact => false,
|
||||
TasksCancel => false,
|
||||
TasksDelete => false,
|
||||
TasksGet => true,
|
||||
@@ -513,6 +518,7 @@ pub mod actions {
|
||||
pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr();
|
||||
pub const INDEXES_DELETE: u8 = IndexesDelete.repr();
|
||||
pub const INDEXES_SWAP: u8 = IndexesSwap.repr();
|
||||
pub const INDEXES_COMPACT: u8 = IndexesCompact.repr();
|
||||
pub const TASKS_ALL: u8 = TasksAll.repr();
|
||||
pub const TASKS_CANCEL: u8 = TasksCancel.repr();
|
||||
pub const TASKS_DELETE: u8 = TasksDelete.repr();
|
||||
@@ -614,6 +620,7 @@ pub(crate) mod test {
|
||||
assert!(WebhooksDelete.repr() == 47 && WEBHOOKS_DELETE == 47);
|
||||
assert!(WebhooksCreate.repr() == 48 && WEBHOOKS_CREATE == 48);
|
||||
assert!(WebhooksAll.repr() == 49 && WEBHOOKS_ALL == 49);
|
||||
assert!(IndexesCompact.repr() == 50 && INDEXES_COMPACT == 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -142,6 +142,11 @@ pub struct DetailsView {
|
||||
pub old_index_uid: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub new_index_uid: Option<String>,
|
||||
// index compaction
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub pre_compaction_size: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub post_compaction_size: Option<String>,
|
||||
}
|
||||
|
||||
impl DetailsView {
|
||||
@@ -314,6 +319,24 @@ impl DetailsView {
|
||||
// We should never be able to batch multiple renames at the same time.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
pre_compaction_size: match (
|
||||
self.pre_compaction_size.clone(),
|
||||
other.pre_compaction_size.clone(),
|
||||
) {
|
||||
(None, None) => None,
|
||||
(None, Some(size)) | (Some(size), None) => Some(size),
|
||||
// We should never be able to batch multiple compactions at the same time.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
post_compaction_size: match (
|
||||
self.post_compaction_size.clone(),
|
||||
other.post_compaction_size.clone(),
|
||||
) {
|
||||
(None, None) => None,
|
||||
(None, Some(size)) | (Some(size), None) => Some(size),
|
||||
// We should never be able to batch multiple compactions at the same time.
|
||||
(Some(left), Some(_right)) => Some(left),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -415,6 +438,15 @@ impl From<Details> for DetailsView {
|
||||
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
|
||||
..Default::default()
|
||||
},
|
||||
Details::IndexCompaction { pre_compaction_size, post_compaction_size, .. } => {
|
||||
DetailsView {
|
||||
pre_compaction_size: pre_compaction_size
|
||||
.map(|size| size.get_appropriate_unit(UnitType::Both).to_string()),
|
||||
post_compaction_size: post_compaction_size
|
||||
.map(|size| size.get_appropriate_unit(UnitType::Both).to_string()),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,7 +67,8 @@ impl Task {
|
||||
| SettingsUpdate { index_uid, .. }
|
||||
| IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid } => Some(index_uid),
|
||||
| IndexDeletion { index_uid }
|
||||
| IndexCompaction { index_uid } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,7 +95,8 @@ impl Task {
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::SnapshotCreation
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. } => None,
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::IndexCompaction { .. } => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -170,6 +172,9 @@ pub enum KindWithContent {
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
@@ -206,6 +211,7 @@ impl KindWithContent {
|
||||
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
|
||||
KindWithContent::Export { .. } => Kind::Export,
|
||||
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
|
||||
KindWithContent::IndexCompaction { .. } => Kind::IndexCompaction,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -226,7 +232,8 @@ impl KindWithContent {
|
||||
| DocumentClear { index_uid }
|
||||
| SettingsUpdate { index_uid, .. }
|
||||
| IndexCreation { index_uid, .. }
|
||||
| IndexDeletion { index_uid } => vec![index_uid],
|
||||
| IndexDeletion { index_uid }
|
||||
| IndexCompaction { index_uid } => vec![index_uid],
|
||||
IndexUpdate { index_uid, new_index_uid, .. } => {
|
||||
let mut indexes = vec![index_uid.as_str()];
|
||||
if let Some(new_uid) = new_index_uid {
|
||||
@@ -325,6 +332,11 @@ impl KindWithContent {
|
||||
versioning::VERSION_PATCH,
|
||||
),
|
||||
}),
|
||||
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
|
||||
index_uid: index_uid.clone(),
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -407,6 +419,11 @@ impl KindWithContent {
|
||||
versioning::VERSION_PATCH,
|
||||
),
|
||||
}),
|
||||
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
|
||||
index_uid: index_uid.clone(),
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -469,6 +486,11 @@ impl From<&KindWithContent> for Option<Details> {
|
||||
versioning::VERSION_PATCH,
|
||||
),
|
||||
}),
|
||||
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
|
||||
index_uid: index_uid.clone(),
|
||||
pre_compaction_size: None,
|
||||
post_compaction_size: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -579,6 +601,7 @@ pub enum Kind {
|
||||
SnapshotCreation,
|
||||
Export,
|
||||
UpgradeDatabase,
|
||||
IndexCompaction,
|
||||
}
|
||||
|
||||
impl Kind {
|
||||
@@ -590,7 +613,8 @@ impl Kind {
|
||||
| Kind::SettingsUpdate
|
||||
| Kind::IndexCreation
|
||||
| Kind::IndexDeletion
|
||||
| Kind::IndexUpdate => true,
|
||||
| Kind::IndexUpdate
|
||||
| Kind::IndexCompaction => true,
|
||||
Kind::IndexSwap
|
||||
| Kind::TaskCancelation
|
||||
| Kind::TaskDeletion
|
||||
@@ -618,6 +642,7 @@ impl Display for Kind {
|
||||
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
|
||||
Kind::Export => write!(f, "export"),
|
||||
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
|
||||
Kind::IndexCompaction => write!(f, "indexCompaction"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -653,6 +678,8 @@ impl FromStr for Kind {
|
||||
Ok(Kind::Export)
|
||||
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
|
||||
Ok(Kind::UpgradeDatabase)
|
||||
} else if kind.eq_ignore_ascii_case("indexCompaction") {
|
||||
Ok(Kind::IndexCompaction)
|
||||
} else {
|
||||
Err(ParseTaskKindError(kind.to_owned()))
|
||||
}
|
||||
@@ -738,6 +765,11 @@ pub enum Details {
|
||||
from: (u32, u32, u32),
|
||||
to: (u32, u32, u32),
|
||||
},
|
||||
IndexCompaction {
|
||||
index_uid: String,
|
||||
pre_compaction_size: Option<Byte>,
|
||||
post_compaction_size: Option<Byte>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
@@ -800,6 +832,10 @@ impl Details {
|
||||
Self::ClearAll { deleted_documents } => *deleted_documents = Some(0),
|
||||
Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0),
|
||||
Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0),
|
||||
Self::IndexCompaction { pre_compaction_size, post_compaction_size, .. } => {
|
||||
*pre_compaction_size = None;
|
||||
*post_compaction_size = None;
|
||||
}
|
||||
Self::SettingsUpdate { .. }
|
||||
| Self::IndexInfo { .. }
|
||||
| Self::Dump { .. }
|
||||
|
||||
@@ -205,6 +205,8 @@ struct Infos {
|
||||
experimental_no_snapshot_compaction: bool,
|
||||
experimental_no_edition_2024_for_dumps: bool,
|
||||
experimental_no_edition_2024_for_settings: bool,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||
experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
experimental_vector_store_setting: bool,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
@@ -296,6 +298,8 @@ impl Infos {
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
} = indexer_options;
|
||||
|
||||
let RuntimeTogglableFeatures {
|
||||
@@ -365,6 +369,8 @@ impl Infos {
|
||||
ssl_resumption,
|
||||
ssl_tickets,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,6 +55,10 @@ const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LO
|
||||
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
||||
@@ -772,6 +776,22 @@ pub struct IndexerOpts {
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_dumps: bool,
|
||||
|
||||
/// Experimental no edition 2024 to compute prefixes. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/862>
|
||||
///
|
||||
/// Enables the experimental no edition 2024 to compute prefixes.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||
|
||||
/// Experimental no edition 2024 to compute facets. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/862>
|
||||
///
|
||||
/// Enables the experimental no edition 2024 to compute facets.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
}
|
||||
|
||||
impl IndexerOpts {
|
||||
@@ -783,6 +803,8 @@ impl IndexerOpts {
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
} = self;
|
||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||
export_to_env_if_not_present(
|
||||
@@ -808,6 +830,18 @@ impl IndexerOpts {
|
||||
experimental_no_edition_2024_for_dumps.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_no_edition_2024_for_prefix_post_processing {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING,
|
||||
experimental_no_edition_2024_for_prefix_post_processing.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_no_edition_2024_for_facet_post_processing {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING,
|
||||
experimental_no_edition_2024_for_facet_post_processing.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -833,6 +867,10 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
chunk_compression_level: Default::default(),
|
||||
documents_chunk_size: Default::default(),
|
||||
max_nb_chunks: Default::default(),
|
||||
experimental_no_edition_2024_for_prefix_post_processing: other
|
||||
.experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing: other
|
||||
.experimental_no_edition_2024_for_facet_post_processing,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
84
crates/meilisearch/src/routes/indexes/compact.rs
Normal file
84
crates/meilisearch/src/routes/indexes/compact.rs
Normal file
@@ -0,0 +1,84 @@
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use tracing::debug;
|
||||
use utoipa::OpenApi;
|
||||
|
||||
use super::ActionPolicy;
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
paths(compact),
|
||||
tags(
|
||||
(
|
||||
name = "Compact an index",
|
||||
description = "The /compact route uses compacts the database to reorganize and make it smaller and more efficient.",
|
||||
external_docs(url = "https://www.meilisearch.com/docs/reference/api/compact"),
|
||||
),
|
||||
),
|
||||
)]
|
||||
pub struct CompactApi;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(compact))));
|
||||
}
|
||||
|
||||
/// Compact an index
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "{indexUid}/compact",
|
||||
tag = "Compact an index",
|
||||
security(("Bearer" = ["search", "*"])),
|
||||
params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)),
|
||||
responses(
|
||||
(status = ACCEPTED, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
|
||||
{
|
||||
"taskUid": 147,
|
||||
"indexUid": null,
|
||||
"status": "enqueued",
|
||||
"type": "documentDeletion",
|
||||
"enqueuedAt": "2024-08-08T17:05:55.791772Z"
|
||||
}
|
||||
)),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
"message": "The Authorization header is missing. It must use the bearer authorization method.",
|
||||
"code": "missing_authorization_header",
|
||||
"type": "auth",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||
}
|
||||
)),
|
||||
)
|
||||
)]
|
||||
pub async fn compact(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_COMPACT }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
analytics.publish(IndexCompacted::default(), &req);
|
||||
|
||||
let task = KindWithContent::IndexCompaction { index_uid: index_uid.to_string() };
|
||||
let task =
|
||||
match tokio::task::spawn_blocking(move || index_scheduler.register(task, None, false))
|
||||
.await?
|
||||
{
|
||||
Ok(task) => task,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
debug!(returns = ?task, "Compact the {index_uid} index");
|
||||
Ok(HttpResponse::Accepted().json(SummarizedTaskView::from(task)))
|
||||
}
|
||||
|
||||
crate::empty_analytics!(IndexCompacted, "Index Compacted");
|
||||
@@ -28,6 +28,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::is_dry_run;
|
||||
use crate::Opt;
|
||||
|
||||
pub mod compact;
|
||||
pub mod documents;
|
||||
mod enterprise_edition;
|
||||
pub mod facet_search;
|
||||
@@ -49,8 +50,9 @@ pub use enterprise_edition::proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TAS
|
||||
(path = "/", api = facet_search::FacetSearchApi),
|
||||
(path = "/", api = similar::SimilarApi),
|
||||
(path = "/", api = settings::SettingsApi),
|
||||
(path = "/", api = compact::CompactApi),
|
||||
),
|
||||
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats),
|
||||
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats, compact::compact),
|
||||
tags(
|
||||
(
|
||||
name = "Indexes",
|
||||
@@ -80,7 +82,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||
.service(web::scope("/similar").configure(similar::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
.service(web::scope("/settings").configure(settings::configure))
|
||||
.service(web::scope("/compact").configure(compact::configure)),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -226,14 +226,14 @@ mod tests {
|
||||
{
|
||||
let params = "types=createIndex";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r#"
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
|
||||
@@ -419,14 +419,14 @@ async fn error_add_api_key_invalid_parameters_actions() {
|
||||
let (response, code) = server.add_api_key(content).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r#"
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
|
||||
{
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`",
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`, `indexes.compact`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -91,14 +91,14 @@ async fn create_api_key_bad_actions() {
|
||||
// can't parse
|
||||
let (response, code) = server.add_api_key(json!({ "actions": ["doggo"] })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r#"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`",
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`, `webhooks.get`, `webhooks.update`, `webhooks.delete`, `webhooks.create`, `webhooks.*`, `indexes.compact`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -40,14 +40,14 @@ async fn batch_bad_types() {
|
||||
|
||||
let (response, code) = server.batches_filter("types=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r#"
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
}
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -490,6 +490,8 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
max_indexing_threads: MaxThreads::from_str("2").unwrap(),
|
||||
experimental_no_edition_2024_for_settings: false,
|
||||
experimental_no_edition_2024_for_dumps: false,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: false,
|
||||
experimental_no_edition_2024_for_facet_post_processing: false,
|
||||
},
|
||||
experimental_enable_metrics: false,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
|
||||
@@ -97,7 +97,7 @@ async fn task_bad_types() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r#"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
@@ -108,7 +108,7 @@ async fn task_bad_types() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r#"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
@@ -119,7 +119,7 @@ async fn task_bad_types() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r#"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
|
||||
@@ -126,7 +126,7 @@ enum Command {
|
||||
/// before running the copy and compaction. This way the current indexation must finish before
|
||||
/// the compaction operation can start. Once the compaction is done, the big index is replaced
|
||||
/// by the compacted one and the mutable transaction is released.
|
||||
CompactIndex { index_name: String },
|
||||
IndexCompaction { index_name: String },
|
||||
|
||||
/// Uses the hair dryer the dedicate pages hot in cache
|
||||
///
|
||||
@@ -165,7 +165,7 @@ fn main() -> anyhow::Result<()> {
|
||||
let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
|
||||
OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade()
|
||||
}
|
||||
Command::CompactIndex { index_name } => compact_index(db_path, &index_name),
|
||||
Command::IndexCompaction { index_name } => compact_index(db_path, &index_name),
|
||||
Command::HairDryer { index_name, index_part } => {
|
||||
hair_dryer(db_path, &index_name, &index_part)
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ bstr = "1.12.0"
|
||||
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.9.7", default-features = false }
|
||||
cellulite = "0.3.0"
|
||||
cellulite = "0.3.1-nested-rtxns-2"
|
||||
concat-arrays = "0.1.2"
|
||||
convert_case = "0.8.0"
|
||||
crossbeam-channel = "0.5.15"
|
||||
@@ -34,7 +34,7 @@ grenad = { version = "0.5.0", default-features = false, features = [
|
||||
"rayon",
|
||||
"tempfile",
|
||||
] }
|
||||
heed = { version = "0.22.0", default-features = false, features = [
|
||||
heed = { version = "0.22.1-nested-rtxns", default-features = false, features = [
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
] }
|
||||
@@ -89,8 +89,8 @@ rhai = { version = "1.22.2", features = [
|
||||
"no_time",
|
||||
"sync",
|
||||
] }
|
||||
arroy = "0.6.3"
|
||||
hannoy = { version = "0.0.8", features = ["arroy"] }
|
||||
arroy = "0.6.4-nested-rtxns"
|
||||
hannoy = { version = "0.0.9-nested-rtxns", features = ["arroy"] }
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
|
||||
@@ -21,7 +21,7 @@ mod vector_sort;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::ops::AddAssign;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -64,6 +64,12 @@ use crate::{
|
||||
UserError, Weight,
|
||||
};
|
||||
|
||||
/// Cache for synonyms to avoid repeated database access
|
||||
#[derive(Default)]
|
||||
pub struct SynonymCache {
|
||||
pub cache: Option<HashMap<Vec<String>, Vec<Vec<String>>>>,
|
||||
}
|
||||
|
||||
/// A structure used throughout the execution of a search query.
|
||||
pub struct SearchContext<'ctx> {
|
||||
pub index: &'ctx Index,
|
||||
@@ -73,6 +79,7 @@ pub struct SearchContext<'ctx> {
|
||||
pub phrase_interner: DedupInterner<Phrase>,
|
||||
pub term_interner: Interner<QueryTerm>,
|
||||
pub phrase_docids: PhraseDocIdsCache,
|
||||
pub synonym_cache: SynonymCache,
|
||||
pub restricted_fids: Option<RestrictedFids>,
|
||||
pub prefix_search: PrefixSearch,
|
||||
pub vector_store_stats: Option<VectorStoreStats>,
|
||||
@@ -103,6 +110,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
phrase_interner: <_>::default(),
|
||||
term_interner: <_>::default(),
|
||||
phrase_docids: <_>::default(),
|
||||
synonym_cache: <_>::default(),
|
||||
restricted_fids: None,
|
||||
prefix_search,
|
||||
vector_store_stats: None,
|
||||
@@ -113,6 +121,17 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
self.prefix_search != PrefixSearch::Disabled
|
||||
}
|
||||
|
||||
/// Get synonyms with caching to avoid repeated database access
|
||||
pub fn get_synonyms(&mut self) -> Result<&HashMap<Vec<String>, Vec<Vec<String>>>> {
|
||||
match self.synonym_cache.cache {
|
||||
Some(ref synonyms) => Ok(synonyms),
|
||||
None => {
|
||||
let synonyms = self.index.synonyms(self.txn)?;
|
||||
Ok(self.synonym_cache.cache.insert(synonyms))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn attributes_to_search_on(
|
||||
&mut self,
|
||||
attributes_to_search_on: &'ctx [String],
|
||||
|
||||
@@ -214,7 +214,7 @@ pub fn partially_initialized_term_from_word(
|
||||
if is_prefix && use_prefix_db.is_none() {
|
||||
find_zero_typo_prefix_derivations(ctx, word_interned, &mut prefix_of)?;
|
||||
}
|
||||
let synonyms = ctx.index.synonyms(ctx.txn)?;
|
||||
let synonyms = ctx.get_synonyms()?;
|
||||
let mut synonym_word_count = 0;
|
||||
let synonyms = synonyms
|
||||
.get(&vec![word.to_owned()])
|
||||
|
||||
@@ -258,7 +258,7 @@ pub fn make_ngram(
|
||||
partially_initialized_term_from_word(ctx, &ngram_str, max_nbr_typos, is_prefix, true)?;
|
||||
|
||||
// Now add the synonyms
|
||||
let index_synonyms = ctx.index.synonyms(ctx.txn)?;
|
||||
let index_synonyms = ctx.get_synonyms()?;
|
||||
|
||||
term.zero_typo.synonyms.extend(
|
||||
index_synonyms.get(&words).cloned().unwrap_or_default().into_iter().map(|words| {
|
||||
|
||||
@@ -101,6 +101,8 @@ pub struct GrenadParameters {
|
||||
pub chunk_compression_level: Option<u32>,
|
||||
pub max_memory: Option<usize>,
|
||||
pub max_nb_chunks: Option<usize>,
|
||||
pub experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
}
|
||||
|
||||
impl Default for GrenadParameters {
|
||||
@@ -110,6 +112,8 @@ impl Default for GrenadParameters {
|
||||
chunk_compression_level: None,
|
||||
max_memory: None,
|
||||
max_nb_chunks: None,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: false,
|
||||
experimental_no_edition_2024_for_facet_post_processing: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -254,6 +254,12 @@ where
|
||||
chunk_compression_level: self.indexer_config.chunk_compression_level,
|
||||
max_memory: self.indexer_config.max_memory,
|
||||
max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
|
||||
experimental_no_edition_2024_for_prefix_post_processing: self
|
||||
.indexer_config
|
||||
.experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing: self
|
||||
.indexer_config
|
||||
.experimental_no_edition_2024_for_facet_post_processing,
|
||||
};
|
||||
let documents_chunk_size = match self.indexer_config.documents_chunk_size {
|
||||
Some(chunk_size) => chunk_size,
|
||||
|
||||
@@ -983,6 +983,12 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
chunk_compression_level: self.indexer_settings.chunk_compression_level,
|
||||
max_memory: self.indexer_settings.max_memory,
|
||||
max_nb_chunks: self.indexer_settings.max_nb_chunks, // default value, may be chosen.
|
||||
experimental_no_edition_2024_for_prefix_post_processing: self
|
||||
.indexer_settings
|
||||
.experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing: self
|
||||
.indexer_settings
|
||||
.experimental_no_edition_2024_for_facet_post_processing,
|
||||
};
|
||||
|
||||
// Once we have written all the documents, we merge everything into a Reader.
|
||||
|
||||
@@ -18,6 +18,8 @@ pub struct IndexerConfig {
|
||||
pub skip_index_budget: bool,
|
||||
pub experimental_no_edition_2024_for_settings: bool,
|
||||
pub experimental_no_edition_2024_for_dumps: bool,
|
||||
pub experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
}
|
||||
|
||||
impl IndexerConfig {
|
||||
@@ -27,6 +29,10 @@ impl IndexerConfig {
|
||||
chunk_compression_level: self.chunk_compression_level,
|
||||
max_memory: self.max_memory,
|
||||
max_nb_chunks: self.max_nb_chunks,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: self
|
||||
.experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing: self
|
||||
.experimental_no_edition_2024_for_facet_post_processing,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -68,6 +74,8 @@ impl Default for IndexerConfig {
|
||||
skip_index_budget: false,
|
||||
experimental_no_edition_2024_for_settings: false,
|
||||
experimental_no_edition_2024_for_dumps: false,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: false,
|
||||
experimental_no_edition_2024_for_facet_post_processing: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,12 +180,15 @@ where
|
||||
})
|
||||
.unwrap()?;
|
||||
|
||||
post_processing::post_process(
|
||||
indexing_context,
|
||||
wtxn,
|
||||
global_fields_ids_map,
|
||||
facet_field_ids_delta,
|
||||
)?;
|
||||
pool.install(|| {
|
||||
post_processing::post_process(
|
||||
indexing_context,
|
||||
wtxn,
|
||||
global_fields_ids_map,
|
||||
facet_field_ids_delta,
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
|
||||
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
|
||||
index.cellulite.build(
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{iter, mem};
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::{Bytes, LazyDecode};
|
||||
use heed::{Database, RwTxn};
|
||||
use rayon::prelude::*;
|
||||
use roaring::MultiOps;
|
||||
use tempfile::tempfile;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::update::facet::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use crate::update::{create_writer, writer_into_reader};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, Index};
|
||||
|
||||
/// Generate the facet level based on the level 0.
|
||||
///
|
||||
/// The function will generate all the group levels from
|
||||
/// the group 1 to the level n until the number of group
|
||||
/// is smaller than the minimum required size.
|
||||
pub fn generate_facet_levels(
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn,
|
||||
field_id: FieldId,
|
||||
facet_type: FacetType,
|
||||
) -> crate::Result<()> {
|
||||
let db = match facet_type {
|
||||
FacetType::String => index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||
.lazily_decode_data(),
|
||||
FacetType::Number => index
|
||||
.facet_id_f64_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||
.lazily_decode_data(),
|
||||
};
|
||||
|
||||
clear_levels(db, wtxn, field_id)?;
|
||||
|
||||
let mut base_level: u8 = 0;
|
||||
// That's a do-while loop
|
||||
while {
|
||||
let mut level_size = 0;
|
||||
let level = base_level.checked_add(1).unwrap();
|
||||
for reader in compute_level(index, wtxn, db, field_id, base_level)? {
|
||||
let mut cursor = reader.into_cursor()?;
|
||||
while let Some((left_bound, facet_group_value)) = cursor.move_on_next()? {
|
||||
level_size += 1;
|
||||
let key = FacetGroupKey { field_id, level, left_bound };
|
||||
debug_assert!(
|
||||
db.get(wtxn, &key).transpose().is_none(),
|
||||
"entry must not be there and must have already been deleted: {key:?}"
|
||||
);
|
||||
db.remap_data_type::<Bytes>().put(wtxn, &key, facet_group_value)?;
|
||||
}
|
||||
}
|
||||
|
||||
base_level = level;
|
||||
|
||||
// If the next level will have the minimum required groups, continue.
|
||||
(level_size / FACET_GROUP_SIZE as usize) >= FACET_MIN_LEVEL_SIZE as usize
|
||||
} {}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compute the groups of facets from the provided base level
|
||||
/// and write the content into different grenad files.
|
||||
fn compute_level(
|
||||
index: &Index,
|
||||
wtxn: &heed::RwTxn,
|
||||
db: Database<FacetGroupKeyCodec<BytesRefCodec>, LazyDecode<FacetGroupValueCodec>>,
|
||||
field_id: FieldId,
|
||||
base_level: u8,
|
||||
) -> Result<Vec<grenad::Reader<BufReader<File>>>, crate::Error> {
|
||||
let thread_count = rayon::current_num_threads();
|
||||
let rtxns = iter::repeat_with(|| index.env.nested_read_txn(wtxn))
|
||||
.take(thread_count)
|
||||
.collect::<heed::Result<Vec<_>>>()?;
|
||||
|
||||
let range = {
|
||||
// Based on the first possible value for the base level up to
|
||||
// the first possible value for the next level *excluded*.
|
||||
let left = FacetGroupKey::<&[u8]> { field_id, level: base_level, left_bound: &[] };
|
||||
let right = FacetGroupKey::<&[u8]> {
|
||||
field_id,
|
||||
level: base_level.checked_add(1).unwrap(),
|
||||
left_bound: &[],
|
||||
};
|
||||
left..right
|
||||
};
|
||||
|
||||
rtxns
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.map(|(thread_id, rtxn)| {
|
||||
let mut writer = tempfile().map(|f| create_writer(CompressionType::None, None, f))?;
|
||||
|
||||
let mut left_bound = None;
|
||||
let mut group_docids = Vec::new();
|
||||
let mut ser_buffer = Vec::new();
|
||||
for (i, result) in db.range(&rtxn, &range)?.enumerate() {
|
||||
let (key, lazy_value) = result?;
|
||||
|
||||
let start_of_group = i % FACET_GROUP_SIZE as usize == 0;
|
||||
let group_index = i / FACET_GROUP_SIZE as usize;
|
||||
let group_for_thread = group_index % thread_count == thread_id;
|
||||
|
||||
if group_for_thread {
|
||||
if start_of_group {
|
||||
if let Some(left_bound) = left_bound.take() {
|
||||
// We store the bitmaps in a Vec this way we can use
|
||||
// the MultiOps operations that tends to be more efficient
|
||||
// for unions. The Vec is empty after the operation.
|
||||
//
|
||||
// We also don't forget to store the group size corresponding
|
||||
// to the number of entries merged in this group.
|
||||
ser_buffer.clear();
|
||||
let group_len: u8 = group_docids.len().try_into().unwrap();
|
||||
ser_buffer.push(group_len);
|
||||
let group_docids = mem::take(&mut group_docids);
|
||||
let docids = group_docids.into_iter().union();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&docids, &mut ser_buffer);
|
||||
writer.insert(left_bound, &ser_buffer)?;
|
||||
}
|
||||
left_bound = Some(key.left_bound);
|
||||
}
|
||||
|
||||
// Lazily decode the bitmaps we are interested in.
|
||||
let value = lazy_value.decode().map_err(heed::Error::Decoding)?;
|
||||
group_docids.push(value.bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(left_bound) = left_bound.take() {
|
||||
ser_buffer.clear();
|
||||
// We don't forget to store the group size corresponding
|
||||
// to the number of entries merged in this group.
|
||||
let group_len: u8 = group_docids.len().try_into().unwrap();
|
||||
ser_buffer.push(group_len);
|
||||
let group_docids = group_docids.into_iter().union();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&group_docids, &mut ser_buffer);
|
||||
writer.insert(left_bound, &ser_buffer)?;
|
||||
}
|
||||
|
||||
writer_into_reader(writer)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Clears all the levels and only keeps the level 0 of the specified field id.
|
||||
fn clear_levels(
|
||||
db: Database<FacetGroupKeyCodec<BytesRefCodec>, LazyDecode<FacetGroupValueCodec>>,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<()> {
|
||||
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
|
||||
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
|
||||
let range = left..=right;
|
||||
db.delete_range(wtxn, &range).map(drop)
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use facet_bulk::generate_facet_levels;
|
||||
use heed::types::{Bytes, DecodeIgnore, Str};
|
||||
use heed::RwTxn;
|
||||
use itertools::{merge_join_by, EitherOrBoth};
|
||||
@@ -23,6 +24,8 @@ use crate::update::new::FacetFieldIdsDelta;
|
||||
use crate::update::{FacetsUpdateBulk, GrenadParameters};
|
||||
use crate::{GlobalFieldsIdsMap, Index, Result};
|
||||
|
||||
mod facet_bulk;
|
||||
|
||||
pub(super) fn post_process<MSP>(
|
||||
indexing_context: IndexingContext<MSP>,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
@@ -39,6 +42,7 @@ where
|
||||
wtxn,
|
||||
facet_field_ids_delta,
|
||||
&mut global_fields_ids_map,
|
||||
indexing_context.grenad_parameters,
|
||||
indexing_context.progress,
|
||||
)?;
|
||||
compute_facet_search_database(index, wtxn, global_fields_ids_map, indexing_context.progress)?;
|
||||
@@ -216,6 +220,7 @@ fn compute_facet_level_database(
|
||||
wtxn: &mut RwTxn,
|
||||
mut facet_field_ids_delta: FacetFieldIdsDelta,
|
||||
global_fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||
grenad_parameters: &GrenadParameters,
|
||||
progress: &Progress,
|
||||
) -> Result<()> {
|
||||
let rtxn = index.read_txn()?;
|
||||
@@ -239,9 +244,14 @@ fn compute_facet_level_database(
|
||||
match delta {
|
||||
FacetFieldIdDelta::Bulk => {
|
||||
progress.update_progress(PostProcessingFacets::StringsBulk);
|
||||
tracing::debug!(%fid, "bulk string facet processing");
|
||||
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::String)
|
||||
.execute(wtxn)?
|
||||
if grenad_parameters.experimental_no_edition_2024_for_facet_post_processing {
|
||||
tracing::debug!(%fid, "bulk string facet processing");
|
||||
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::String)
|
||||
.execute(wtxn)?
|
||||
} else {
|
||||
tracing::debug!(%fid, "bulk string facet processing in parallel");
|
||||
generate_facet_levels(index, wtxn, fid, FacetType::String)?
|
||||
}
|
||||
}
|
||||
FacetFieldIdDelta::Incremental(delta_data) => {
|
||||
progress.update_progress(PostProcessingFacets::StringsIncremental);
|
||||
@@ -1,11 +1,12 @@
|
||||
use std::cell::RefCell;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
||||
use std::iter;
|
||||
|
||||
use hashbrown::HashMap;
|
||||
use heed::types::Bytes;
|
||||
use heed::types::{Bytes, DecodeIgnore};
|
||||
use heed::{BytesDecode, Database, Error, RoTxn, RwTxn};
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
|
||||
use rayon::iter::{IndexedParallelIterator as _, IntoParallelIterator, ParallelIterator as _};
|
||||
use roaring::MultiOps;
|
||||
use tempfile::spooled_tempfile;
|
||||
use thread_local::ThreadLocal;
|
||||
@@ -151,22 +152,29 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
|
||||
|
||||
unsafe impl Sync for FrozenPrefixBitmaps<'_, '_> {}
|
||||
|
||||
struct WordPrefixIntegerDocids {
|
||||
struct WordPrefixIntegerDocids<'i> {
|
||||
index: &'i Index,
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
max_memory_by_thread: Option<usize>,
|
||||
/// Do not use an experimental LMDB feature to read uncommitted data in parallel.
|
||||
no_experimental_post_processing: bool,
|
||||
}
|
||||
|
||||
impl WordPrefixIntegerDocids {
|
||||
impl<'i> WordPrefixIntegerDocids<'i> {
|
||||
fn new(
|
||||
index: &'i Index,
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
grenad_parameters: &GrenadParameters,
|
||||
) -> WordPrefixIntegerDocids {
|
||||
grenad_parameters: &'_ GrenadParameters,
|
||||
) -> WordPrefixIntegerDocids<'i> {
|
||||
WordPrefixIntegerDocids {
|
||||
index,
|
||||
database,
|
||||
prefix_database,
|
||||
max_memory_by_thread: grenad_parameters.max_memory_by_thread(),
|
||||
no_experimental_post_processing: grenad_parameters
|
||||
.experimental_no_edition_2024_for_prefix_post_processing,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,7 +185,131 @@ impl WordPrefixIntegerDocids {
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||
if self.no_experimental_post_processing {
|
||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||
} else {
|
||||
self.recompute_modified_prefixes_no_frozen(wtxn, prefix_to_compute)
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the same as `recompute_modified_prefixes`.
|
||||
///
|
||||
/// ...but without aggregating the prefixes mmap pointers into a static HashMap
|
||||
/// beforehand and rather use an experimental LMDB feature to read the subset
|
||||
/// of prefixes in parallel from the uncommitted transaction.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||
fn recompute_modified_prefixes_no_frozen(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
let thread_count = rayon::current_num_threads();
|
||||
let rtxns = iter::repeat_with(|| self.index.env.nested_read_txn(wtxn))
|
||||
.take(thread_count)
|
||||
.collect::<heed::Result<Vec<_>>>()?;
|
||||
|
||||
let outputs = rtxns
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.map(|(thread_id, rtxn)| {
|
||||
// `indexes` represent offsets at which prefixes computations were stored in the `file`.
|
||||
let mut indexes = Vec::new();
|
||||
let mut file = BufWriter::new(spooled_tempfile(
|
||||
self.max_memory_by_thread.unwrap_or(usize::MAX),
|
||||
));
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
for (prefix_index, prefix) in prefixes.iter().enumerate() {
|
||||
// Is prefix for another thread?
|
||||
if prefix_index % thread_count != thread_id {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut bitmap_bytes_at_positions = HashMap::new();
|
||||
for result in self
|
||||
.database
|
||||
.prefix_iter(&rtxn, prefix.as_bytes())?
|
||||
.remap_types::<StrBEU16Codec, Bytes>()
|
||||
{
|
||||
let ((_word, pos), bitmap_bytes) = result?;
|
||||
bitmap_bytes_at_positions
|
||||
.entry(pos)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(bitmap_bytes);
|
||||
}
|
||||
|
||||
// We track positions with no corresponding bitmap bytes,
|
||||
// these means that the prefix no longer exists in the database
|
||||
// and must, therefore, be removed from the index.
|
||||
for result in self
|
||||
.prefix_database
|
||||
.prefix_iter(&rtxn, prefix.as_bytes())?
|
||||
.remap_types::<StrBEU16Codec, DecodeIgnore>()
|
||||
{
|
||||
let ((_word, pos), ()) = result?;
|
||||
// They are represented by an empty set of bitmaps.
|
||||
bitmap_bytes_at_positions.entry(pos).or_insert_with(Vec::new);
|
||||
}
|
||||
|
||||
for (pos, bitmaps_bytes) in bitmap_bytes_at_positions {
|
||||
if bitmaps_bytes.is_empty() {
|
||||
indexes.push(PrefixIntegerEntry {
|
||||
prefix,
|
||||
pos,
|
||||
serialized_length: None,
|
||||
});
|
||||
} else {
|
||||
let output = bitmaps_bytes
|
||||
.into_iter()
|
||||
.map(CboRoaringBitmapCodec::deserialize_from)
|
||||
.union()?;
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, &mut buffer);
|
||||
indexes.push(PrefixIntegerEntry {
|
||||
prefix,
|
||||
pos,
|
||||
serialized_length: Some(buffer.len()),
|
||||
});
|
||||
file.write_all(&buffer)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((indexes, file))
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
// We iterate over all the collected and serialized bitmaps through
|
||||
// the files and entries to eventually put them in the final database.
|
||||
let mut key_buffer = Vec::new();
|
||||
let mut buffer = Vec::new();
|
||||
for (index, file) in outputs {
|
||||
let mut file = file.into_inner().map_err(|e| e.into_error())?;
|
||||
file.rewind()?;
|
||||
let mut file = BufReader::new(file);
|
||||
for PrefixIntegerEntry { prefix, pos, serialized_length } in index {
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(prefix.as_bytes());
|
||||
key_buffer.push(0);
|
||||
key_buffer.extend_from_slice(&pos.to_be_bytes());
|
||||
match serialized_length {
|
||||
Some(serialized_length) => {
|
||||
buffer.resize(serialized_length, 0);
|
||||
file.read_exact(&mut buffer)?;
|
||||
self.prefix_database.remap_data_type::<Bytes>().put(
|
||||
wtxn,
|
||||
&key_buffer,
|
||||
&buffer,
|
||||
)?;
|
||||
}
|
||||
None => {
|
||||
self.prefix_database.delete(wtxn, &key_buffer)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||
@@ -262,7 +394,7 @@ impl WordPrefixIntegerDocids {
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a prefix and the lenght the bitmap takes on disk.
|
||||
/// Represents a prefix and the length the bitmap takes on disk.
|
||||
struct PrefixIntegerEntry<'a> {
|
||||
prefix: &'a str,
|
||||
pos: u16,
|
||||
@@ -363,6 +495,7 @@ pub fn compute_word_prefix_fid_docids(
|
||||
grenad_parameters: &GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixIntegerDocids::new(
|
||||
index,
|
||||
index.word_fid_docids.remap_key_type(),
|
||||
index.word_prefix_fid_docids.remap_key_type(),
|
||||
grenad_parameters,
|
||||
@@ -379,6 +512,7 @@ pub fn compute_word_prefix_position_docids(
|
||||
grenad_parameters: &GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixIntegerDocids::new(
|
||||
index,
|
||||
index.word_position_docids.remap_key_type(),
|
||||
index.word_prefix_position_docids.remap_key_type(),
|
||||
grenad_parameters,
|
||||
|
||||
Reference in New Issue
Block a user