mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-03 11:15:35 +00:00
Compare commits
60 Commits
v1.12.0-rc
...
try-merge-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
14a980e54e | ||
|
|
cbc453c6d1 | ||
|
|
2fb065b9fb | ||
|
|
07f42e8057 | ||
|
|
71f59749dc | ||
|
|
3b0b9967f6 | ||
|
|
123b54a178 | ||
|
|
f5dd8dfc3e | ||
|
|
bcfed70888 | ||
|
|
503ef3bbc9 | ||
|
|
08f2c696b0 | ||
|
|
b75f1f4c17 | ||
|
|
95ed079761 | ||
|
|
4a082683df | ||
|
|
26be5e0733 | ||
|
|
bd5110a2fe | ||
|
|
fa8b9acdf6 | ||
|
|
2b74d1824b | ||
|
|
c77b00d3ac | ||
|
|
c77073efcc | ||
|
|
1537323eb9 | ||
|
|
a0a3b55700 | ||
|
|
214b51de87 | ||
|
|
95975944d7 | ||
|
|
9a9383643f | ||
|
|
cac355bfa7 | ||
|
|
9020a50df8 | ||
|
|
52843123d4 | ||
|
|
6298db5bea | ||
|
|
a003a0934a | ||
|
|
3a11e39c01 | ||
|
|
5f896b1050 | ||
|
|
d0c4e6da6b | ||
|
|
2da5584bb5 | ||
|
|
b7eb802ae6 | ||
|
|
2e32d0474c | ||
|
|
cb99ac6f7e | ||
|
|
be411435f5 | ||
|
|
29ef164530 | ||
|
|
739c52a3cd | ||
|
|
7a2af06b1e | ||
|
|
cb0c3a5aad | ||
|
|
8388698993 | ||
|
|
cbcf6c9ba3 | ||
|
|
bf742d81cf | ||
|
|
7458f0386c | ||
|
|
fc1df5793c | ||
|
|
3ded069042 | ||
|
|
261d2ceb06 | ||
|
|
1a17e2e572 | ||
|
|
5b8cd68abe | ||
|
|
5ce9acb0b9 | ||
|
|
953a82ca04 | ||
|
|
54341c2e80 | ||
|
|
96831ed9bb | ||
|
|
0459b1a242 | ||
|
|
8ecb726683 | ||
|
|
297e72e262 | ||
|
|
0ad2f57a92 | ||
|
|
71d53f413f |
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -492,7 +492,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
||||
[[package]]
|
||||
name = "bbqueue"
|
||||
version = "0.5.1"
|
||||
source = "git+https://github.com/kerollmops/bbqueue#cbb87cc707b5af415ef203bdaf2443e06ba0d6d4"
|
||||
source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2443e06ba0d6d4"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
|
||||
@@ -29,7 +29,6 @@ use bumpalo::collections::CollectIn;
|
||||
use bumpalo::Bump;
|
||||
use dump::IndexMetadata;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::error::Code;
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
|
||||
use meilisearch_types::milli::heed::CompactionOption;
|
||||
@@ -497,7 +496,6 @@ impl IndexScheduler {
|
||||
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
|
||||
// If the task is not associated with any index, verify that it is an index swap and
|
||||
// create the batch directly. Otherwise, get the index name associated with the task
|
||||
@@ -507,6 +505,7 @@ impl IndexScheduler {
|
||||
index_name
|
||||
} else {
|
||||
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
|
||||
};
|
||||
|
||||
@@ -689,7 +688,9 @@ impl IndexScheduler {
|
||||
let index = self.index_mapper.index(&rtxn, name)?;
|
||||
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
||||
fs::create_dir_all(&dst)?;
|
||||
index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||
index
|
||||
.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)
|
||||
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
|
||||
}
|
||||
|
||||
drop(rtxn);
|
||||
@@ -791,16 +792,19 @@ impl IndexScheduler {
|
||||
let content_file = self.file_store.get_update(content_file)?;
|
||||
|
||||
let reader = DocumentsBatchReader::from_reader(content_file)
|
||||
.map_err(milli::Error::from)?;
|
||||
.map_err(|e| Error::from_milli(e.into(), None))?;
|
||||
|
||||
let (mut cursor, documents_batch_index) =
|
||||
reader.into_cursor_and_fields_index();
|
||||
|
||||
while let Some(doc) =
|
||||
cursor.next_document().map_err(milli::Error::from)?
|
||||
while let Some(doc) = cursor
|
||||
.next_document()
|
||||
.map_err(|e| Error::from_milli(e.into(), None))?
|
||||
{
|
||||
dump_content_file
|
||||
.push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
|
||||
dump_content_file.push_document(
|
||||
&obkv_to_object(doc, &documents_batch_index)
|
||||
.map_err(|e| Error::from_milli(e, None))?,
|
||||
)?;
|
||||
}
|
||||
dump_content_file.flush()?;
|
||||
}
|
||||
@@ -814,27 +818,41 @@ impl IndexScheduler {
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
created_at: index.created_at(&rtxn)?,
|
||||
updated_at: index.updated_at(&rtxn)?,
|
||||
created_at: index
|
||||
.created_at(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
|
||||
updated_at: index
|
||||
.updated_at(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
|
||||
};
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(&rtxn)?;
|
||||
let embedding_configs = index
|
||||
.embedding_configs(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// 3.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
for ret in documents {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (id, doc) = ret?;
|
||||
let (id, doc) =
|
||||
ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
let mut document =
|
||||
milli::obkv_to_json(&all_fields, &fields_ids_map, doc)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
'inject_vectors: {
|
||||
let embeddings = index.embeddings(&rtxn, id)?;
|
||||
let embeddings = index
|
||||
.embeddings(&rtxn, id)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
@@ -845,7 +863,7 @@ impl IndexScheduler {
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(milli::Error::UserError(
|
||||
let user_err = milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
@@ -859,8 +877,9 @@ impl IndexScheduler {
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
)
|
||||
.into());
|
||||
);
|
||||
|
||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
@@ -890,7 +909,8 @@ impl IndexScheduler {
|
||||
index,
|
||||
&rtxn,
|
||||
meilisearch_types::settings::SecretPolicy::RevealSecrets,
|
||||
)?;
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
index_dumper.settings(&settings)?;
|
||||
Ok(())
|
||||
})?;
|
||||
@@ -946,7 +966,8 @@ impl IndexScheduler {
|
||||
// the entire batch.
|
||||
let res = || -> Result<()> {
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
|
||||
wtxn.commit()?;
|
||||
@@ -988,10 +1009,12 @@ impl IndexScheduler {
|
||||
);
|
||||
builder.set_primary_key(primary_key);
|
||||
let must_stop_processing = self.must_stop_processing.clone();
|
||||
builder.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
|
||||
@@ -1008,7 +1031,8 @@ impl IndexScheduler {
|
||||
let res = || -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
@@ -1031,7 +1055,9 @@ impl IndexScheduler {
|
||||
let number_of_documents = || -> Result<u64> {
|
||||
let index = self.index_mapper.index(&wtxn, &index_uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
Ok(index.number_of_documents(&index_rtxn)?)
|
||||
index
|
||||
.number_of_documents(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))
|
||||
}()
|
||||
.unwrap_or_default();
|
||||
|
||||
@@ -1188,8 +1214,10 @@ impl IndexScheduler {
|
||||
};
|
||||
|
||||
match operation {
|
||||
IndexOperation::DocumentClear { mut tasks, .. } => {
|
||||
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
|
||||
IndexOperation::DocumentClear { index_uid, mut tasks } => {
|
||||
let count = milli::update::ClearDocuments::new(index_wtxn, index)
|
||||
.execute()
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid)))?;
|
||||
|
||||
let mut first_clear_found = false;
|
||||
for task in &mut tasks {
|
||||
@@ -1209,7 +1237,7 @@ impl IndexScheduler {
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::DocumentOperation {
|
||||
index_uid: _,
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
operations,
|
||||
@@ -1235,13 +1263,17 @@ impl IndexScheduler {
|
||||
|
||||
let mut content_files_iter = content_files.iter();
|
||||
let mut indexer = indexer::DocumentOperation::new(method);
|
||||
let embedders = index.embedding_configs(index_wtxn)?;
|
||||
let embedders = self.embedders(embedders)?;
|
||||
let embedders = index
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
for operation in operations {
|
||||
match operation {
|
||||
DocumentOperation::Add(_content_uuid) => {
|
||||
let mmap = content_files_iter.next().unwrap();
|
||||
indexer.add_documents(mmap)?;
|
||||
indexer
|
||||
.add_documents(mmap)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
}
|
||||
DocumentOperation::Delete(document_ids) => {
|
||||
let document_ids: bumpalo::collections::vec::Vec<_> = document_ids
|
||||
@@ -1266,15 +1298,17 @@ impl IndexScheduler {
|
||||
}
|
||||
};
|
||||
|
||||
let (document_changes, operation_stats, primary_key) = indexer.into_changes(
|
||||
&indexer_alloc,
|
||||
index,
|
||||
&rtxn,
|
||||
primary_key.as_deref(),
|
||||
&mut new_fields_ids_map,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
let (document_changes, operation_stats, primary_key) = indexer
|
||||
.into_changes(
|
||||
&indexer_alloc,
|
||||
index,
|
||||
&rtxn,
|
||||
primary_key.as_deref(),
|
||||
&mut new_fields_ids_map,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
|
||||
let mut addition = 0;
|
||||
for (stats, task) in operation_stats.into_iter().zip(&mut tasks) {
|
||||
@@ -1321,14 +1355,15 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
|
||||
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::DocumentEdition { mut task, .. } => {
|
||||
IndexOperation::DocumentEdition { index_uid, mut task } => {
|
||||
let (filter, code) = if let KindWithContent::DocumentEdition {
|
||||
filter_expr,
|
||||
context: _,
|
||||
@@ -1342,16 +1377,11 @@ impl IndexScheduler {
|
||||
};
|
||||
|
||||
let candidates = match filter.as_ref().map(Filter::from_json) {
|
||||
Some(Ok(Some(filter))) => {
|
||||
filter.evaluate(index_wtxn, index).map_err(|err| match err {
|
||||
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
}
|
||||
e => e.into(),
|
||||
})?
|
||||
}
|
||||
Some(Ok(Some(filter))) => filter
|
||||
.evaluate(index_wtxn, index)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
None | Some(Ok(None)) => index.documents_ids(index_wtxn)?,
|
||||
Some(Err(e)) => return Err(e.into()),
|
||||
Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))),
|
||||
};
|
||||
|
||||
let (original_filter, context, function) = if let Some(Details::DocumentEdition {
|
||||
@@ -1386,8 +1416,9 @@ impl IndexScheduler {
|
||||
// candidates not empty => index not empty => a primary key is set
|
||||
let primary_key = index.primary_key(&rtxn)?.unwrap();
|
||||
|
||||
let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||
.map_err(milli::Error::from)?;
|
||||
let primary_key =
|
||||
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
|
||||
let result_count = Ok((candidates.len(), candidates.len())) as Result<_>;
|
||||
|
||||
@@ -1406,11 +1437,17 @@ impl IndexScheduler {
|
||||
};
|
||||
|
||||
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
|
||||
let document_changes =
|
||||
pool.install(|| indexer.into_changes(&primary_key)).unwrap()?;
|
||||
|
||||
let embedders = index.embedding_configs(index_wtxn)?;
|
||||
let embedders = self.embedders(embedders)?;
|
||||
let document_changes = pool
|
||||
.install(|| {
|
||||
indexer
|
||||
.into_changes(&primary_key)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))
|
||||
})
|
||||
.unwrap()?;
|
||||
let embedders = index
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
@@ -1424,7 +1461,8 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
@@ -1455,7 +1493,7 @@ impl IndexScheduler {
|
||||
|
||||
Ok(vec![task])
|
||||
}
|
||||
IndexOperation::DocumentDeletion { mut tasks, index_uid: _ } => {
|
||||
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
|
||||
let mut to_delete = RoaringBitmap::new();
|
||||
let external_documents_ids = index.external_documents_ids();
|
||||
|
||||
@@ -1476,35 +1514,23 @@ impl IndexScheduler {
|
||||
deleted_documents: Some(will_be_removed),
|
||||
});
|
||||
}
|
||||
KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
|
||||
KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => {
|
||||
let before = to_delete.len();
|
||||
let filter = match Filter::from_json(filter_expr) {
|
||||
Ok(filter) => filter,
|
||||
Err(err) => {
|
||||
// theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens
|
||||
task.status = Status::Failed;
|
||||
task.error = match err {
|
||||
milli::Error::UserError(
|
||||
milli::UserError::InvalidFilterExpression { .. },
|
||||
) => Some(
|
||||
Error::from(err)
|
||||
.with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
.into(),
|
||||
),
|
||||
e => Some(e.into()),
|
||||
};
|
||||
task.error = Some(
|
||||
Error::from_milli(err, Some(index_uid.clone())).into(),
|
||||
);
|
||||
None
|
||||
}
|
||||
};
|
||||
if let Some(filter) = filter {
|
||||
let candidates =
|
||||
filter.evaluate(index_wtxn, index).map_err(|err| match err {
|
||||
milli::Error::UserError(
|
||||
milli::UserError::InvalidFilter(_),
|
||||
) => Error::from(err)
|
||||
.with_custom_error_code(Code::InvalidDocumentFilter),
|
||||
e => e.into(),
|
||||
});
|
||||
let candidates = filter
|
||||
.evaluate(index_wtxn, index)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())));
|
||||
match candidates {
|
||||
Ok(candidates) => to_delete |= candidates,
|
||||
Err(err) => {
|
||||
@@ -1540,8 +1566,9 @@ impl IndexScheduler {
|
||||
// to_delete not empty => index not empty => primary key set
|
||||
let primary_key = index.primary_key(&rtxn)?.unwrap();
|
||||
|
||||
let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||
.map_err(milli::Error::from)?;
|
||||
let primary_key =
|
||||
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
|
||||
if !tasks.iter().all(|res| res.error.is_some()) {
|
||||
let local_pool;
|
||||
@@ -1560,8 +1587,10 @@ impl IndexScheduler {
|
||||
let mut indexer = indexer::DocumentDeletion::new();
|
||||
indexer.delete_documents_by_docids(to_delete);
|
||||
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
||||
let embedders = index.embedding_configs(index_wtxn)?;
|
||||
let embedders = self.embedders(embedders)?;
|
||||
let embedders = index
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
indexer::index(
|
||||
index_wtxn,
|
||||
@@ -1575,14 +1604,15 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
&send_progress,
|
||||
)?;
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
|
||||
IndexOperation::Settings { index_uid, settings, mut tasks } => {
|
||||
let indexer_config = self.index_mapper.indexer_config();
|
||||
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||
|
||||
@@ -1596,10 +1626,12 @@ impl IndexScheduler {
|
||||
task.status = Status::Succeeded;
|
||||
}
|
||||
|
||||
builder.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
use crate::TaskId;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use meilisearch_types::{heed, milli};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::TaskId;
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum DateField {
|
||||
BeforeEnqueuedAt,
|
||||
@@ -122,8 +121,11 @@ pub enum Error {
|
||||
Dump(#[from] dump::Error),
|
||||
#[error(transparent)]
|
||||
Heed(#[from] heed::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] milli::Error),
|
||||
#[error("{}", match .index_uid {
|
||||
Some(uid) if !uid.is_empty() => format!("Index `{}`: {error}", uid),
|
||||
_ => format!("{error}")
|
||||
})]
|
||||
Milli { error: milli::Error, index_uid: Option<String> },
|
||||
#[error("An unexpected crash occurred when processing the task.")]
|
||||
ProcessBatchPanicked,
|
||||
#[error(transparent)]
|
||||
@@ -190,7 +192,7 @@ impl Error {
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli(_)
|
||||
| Error::Milli { .. }
|
||||
| Error::ProcessBatchPanicked
|
||||
| Error::FileStore(_)
|
||||
| Error::IoError(_)
|
||||
@@ -209,6 +211,20 @@ impl Error {
|
||||
pub fn with_custom_error_code(self, code: Code) -> Self {
|
||||
Self::WithCustomErrorCode(code, Box::new(self))
|
||||
}
|
||||
|
||||
pub fn from_milli(err: milli::Error, index_uid: Option<String>) -> Self {
|
||||
match err {
|
||||
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||
Self::Milli { error: err, index_uid }
|
||||
.with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
}
|
||||
milli::Error::UserError(milli::UserError::InvalidFilterExpression { .. }) => {
|
||||
Self::Milli { error: err, index_uid }
|
||||
.with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
}
|
||||
_ => Self::Milli { error: err, index_uid },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for Error {
|
||||
@@ -236,7 +252,7 @@ impl ErrorCode for Error {
|
||||
// TODO: not sure of the Code to use
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli(e) => e.error_code(),
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
|
||||
@@ -3,14 +3,13 @@ use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
|
||||
use meilisearch_types::milli::Index;
|
||||
use meilisearch_types::milli::{Index, Result};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing};
|
||||
use crate::clamp_to_page_size;
|
||||
use crate::lru::{InsertionOutcome, LruMap};
|
||||
use crate::{clamp_to_page_size, Result};
|
||||
|
||||
/// Keep an internally consistent view of the open indexes in memory.
|
||||
///
|
||||
/// This view is made of an LRU cache that will evict the least frequently used indexes when new indexes are opened.
|
||||
|
||||
@@ -3,8 +3,13 @@ use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
use std::{fs, thread};
|
||||
|
||||
use self::index_map::IndexMap;
|
||||
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
||||
use crate::uuid_codec::UuidCodec;
|
||||
use crate::{Error, Result};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{FieldDistribution, Index};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -12,11 +17,6 @@ use time::OffsetDateTime;
|
||||
use tracing::error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use self::index_map::IndexMap;
|
||||
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
||||
use crate::uuid_codec::UuidCodec;
|
||||
use crate::{Error, Result};
|
||||
|
||||
mod index_map;
|
||||
|
||||
const INDEX_MAPPING: &str = "index-mapping";
|
||||
@@ -121,7 +121,7 @@ impl IndexStats {
|
||||
/// # Parameters
|
||||
///
|
||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
||||
Ok(IndexStats {
|
||||
number_of_documents: index.number_of_documents(rtxn)?,
|
||||
database_size: index.on_disk_size()?,
|
||||
@@ -183,13 +183,18 @@ impl IndexMapper {
|
||||
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
|
||||
// This is very unlikely to happen in practice.
|
||||
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
|
||||
let index = self.index_map.write().unwrap().create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
date,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)?;
|
||||
let index = self
|
||||
.index_map
|
||||
.write()
|
||||
.unwrap()
|
||||
.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
date,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
@@ -357,7 +362,9 @@ impl IndexMapper {
|
||||
};
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
// take the lock to reopen the environment.
|
||||
reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)?;
|
||||
reopen
|
||||
.reopen(&mut self.index_map.write().unwrap(), &index_path)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
continue;
|
||||
}
|
||||
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
|
||||
@@ -372,13 +379,15 @@ impl IndexMapper {
|
||||
Missing => {
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
|
||||
break index_map.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
None,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)?;
|
||||
break index_map
|
||||
.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
None,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
}
|
||||
Available(index) => break index,
|
||||
Closing(_) => {
|
||||
@@ -460,6 +469,7 @@ impl IndexMapper {
|
||||
let index = self.index(rtxn, index_uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1440,7 +1440,7 @@ impl IndexScheduler {
|
||||
|
||||
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40
|
||||
{
|
||||
return Err(Error::NoSpaceLeftInTaskQueue);
|
||||
}
|
||||
@@ -1678,9 +1678,10 @@ impl IndexScheduler {
|
||||
tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks.");
|
||||
}
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli(milli::Error::InternalError(
|
||||
milli::InternalError::AbortedIndexation,
|
||||
)))
|
||||
Err(Error::Milli {
|
||||
error: milli::Error::InternalError(milli::InternalError::AbortedIndexation),
|
||||
..
|
||||
})
|
||||
| Err(Error::AbortedTask) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(Breakpoint::AbortedIndexation);
|
||||
@@ -1699,9 +1700,10 @@ impl IndexScheduler {
|
||||
// 2. close the associated environment
|
||||
// 3. resize it
|
||||
// 4. re-schedule tasks
|
||||
Err(Error::Milli(milli::Error::UserError(
|
||||
milli::UserError::MaxDatabaseSizeReached,
|
||||
))) if index_uid.is_some() => {
|
||||
Err(Error::Milli {
|
||||
error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached),
|
||||
..
|
||||
}) if index_uid.is_some() => {
|
||||
// fixme: add index_uid to match to avoid the unwrap
|
||||
let index_uid = index_uid.unwrap();
|
||||
// fixme: handle error more gracefully? not sure when this could happen
|
||||
@@ -1943,6 +1945,7 @@ impl IndexScheduler {
|
||||
// TODO: consider using a type alias or a struct embedder/template
|
||||
pub fn embedders(
|
||||
&self,
|
||||
index_uid: String,
|
||||
embedding_configs: Vec<IndexEmbeddingConfig>,
|
||||
) -> Result<EmbeddingConfigs> {
|
||||
let res: Result<_> = embedding_configs
|
||||
@@ -1953,8 +1956,12 @@ impl IndexScheduler {
|
||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
||||
..
|
||||
}| {
|
||||
let prompt =
|
||||
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
|
||||
let prompt = Arc::new(
|
||||
prompt
|
||||
.try_into()
|
||||
.map_err(meilisearch_types::milli::Error::from)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
// optimistically return existing embedder
|
||||
{
|
||||
let embedders = self.embedders.read().unwrap();
|
||||
@@ -1970,7 +1977,9 @@ impl IndexScheduler {
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(embedder_options.clone())
|
||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||
.map_err(meilisearch_types::milli::Error::from)?,
|
||||
.map_err(|err| {
|
||||
Error::from_milli(err.into(), Some(index_uid.clone()))
|
||||
})?,
|
||||
);
|
||||
{
|
||||
let mut embedders = self.embedders.write().unwrap();
|
||||
@@ -4319,10 +4328,35 @@ mod tests {
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default())
|
||||
let (mut batches, _) = index_scheduler
|
||||
.get_batches_from_authorized_indexes(query.clone(), &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]"); // only the processing batch in the first tick
|
||||
assert_eq!(batches.len(), 1);
|
||||
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
|
||||
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
|
||||
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
|
||||
snapshot!(batch, @r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"details": {
|
||||
"primaryKey": "mouse"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"processing": 1
|
||||
},
|
||||
"types": {
|
||||
"indexCreation": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"catto": 1
|
||||
}
|
||||
},
|
||||
"startedAt": "1970-01-01T00:00:00Z",
|
||||
"finishedAt": null
|
||||
}
|
||||
"#);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
@@ -6146,7 +6180,7 @@ mod tests {
|
||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||
let simple_hf_name = name.clone();
|
||||
|
||||
let configs = index_scheduler.embedders(configs).unwrap();
|
||||
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
|
||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let beagle_embed =
|
||||
hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap();
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
|
||||
{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
|
||||
{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"dumpCreation":1},"indexUids":{}}, }
|
||||
{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"dumpCreation":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -9,8 +9,8 @@ source: crates/index-scheduler/src/lib.rs
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
|
||||
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
|
||||
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
|
||||
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
|
||||
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
|
||||
5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
|
||||
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[0,]
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
|
||||
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"doggo":2}}, }
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
|
||||
@@ -5,7 +5,7 @@ snapshot_kind: text
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(0):
|
||||
[3,]
|
||||
{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"taskDeletion":1},"indexUids":{}}, }
|
||||
{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"taskDeletion":1},"indexUids":{}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
|
||||
@@ -67,7 +67,7 @@ impl ProcessingBatch {
|
||||
task.batch_uid = Some(self.uid);
|
||||
// We don't store the statuses in the map since they're all enqueued but we must
|
||||
// still store them in the stats since that can be displayed.
|
||||
*self.stats.status.entry(task.status).or_default() += 1;
|
||||
*self.stats.status.entry(Status::Processing).or_default() += 1;
|
||||
|
||||
self.kinds.insert(task.kind.as_kind());
|
||||
*self.stats.types.entry(task.kind.as_kind()).or_default() += 1;
|
||||
|
||||
@@ -279,6 +279,7 @@ InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
||||
FacetSearchDisabled , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
|
||||
@@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType};
|
||||
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
|
||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::OrderBy;
|
||||
use serde_json::Value;
|
||||
use tokio::task::JoinError;
|
||||
@@ -62,8 +63,11 @@ pub enum MeilisearchHttpError {
|
||||
HeedError(#[from] meilisearch_types::heed::Error),
|
||||
#[error(transparent)]
|
||||
IndexScheduler(#[from] index_scheduler::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] meilisearch_types::milli::Error),
|
||||
#[error("{}", match .index_name {
|
||||
Some(name) if !name.is_empty() => format!("Index `{}`: {error}", name),
|
||||
_ => format!("{error}")
|
||||
})]
|
||||
Milli { error: milli::Error, index_name: Option<String> },
|
||||
#[error(transparent)]
|
||||
Payload(#[from] PayloadError),
|
||||
#[error(transparent)]
|
||||
@@ -76,6 +80,12 @@ pub enum MeilisearchHttpError {
|
||||
MissingSearchHybrid,
|
||||
}
|
||||
|
||||
impl MeilisearchHttpError {
|
||||
pub(crate) fn from_milli(error: milli::Error, index_name: Option<String>) -> Self {
|
||||
Self::Milli { error, index_name }
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for MeilisearchHttpError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
@@ -95,7 +105,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
|
||||
MeilisearchHttpError::HeedError(_) => Code::Internal,
|
||||
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
|
||||
MeilisearchHttpError::Milli(e) => e.error_code(),
|
||||
MeilisearchHttpError::Milli { error, .. } => error.error_code(),
|
||||
MeilisearchHttpError::Payload(e) => e.error_code(),
|
||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
|
||||
@@ -395,6 +395,7 @@ fn import_dump(
|
||||
for index_reader in dump_reader.indexes()? {
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
let uid = metadata.uid.clone();
|
||||
tracing::info!("Importing index `{}`.", metadata.uid);
|
||||
|
||||
let date = Some((metadata.created_at, metadata.updated_at));
|
||||
@@ -432,7 +433,7 @@ fn import_dump(
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let embedder_configs = index.embedding_configs(&wtxn)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
let embedders = index_scheduler.embedders(uid, embedder_configs)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
&mut wtxn,
|
||||
|
||||
@@ -185,7 +185,8 @@ pub async fn search(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
|
||||
let search_kind =
|
||||
search_kind(&search_query, &index_scheduler, index_uid.to_string(), &index, features)?;
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(
|
||||
|
||||
@@ -5,7 +5,7 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use index_scheduler::{Error, IndexScheduler};
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
@@ -107,7 +107,10 @@ pub async fn list_indexes(
|
||||
if !filters.is_index_authorized(uid) {
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(Some(IndexView::new(uid.to_string(), index)?))
|
||||
Ok(Some(
|
||||
IndexView::new(uid.to_string(), index)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
|
||||
))
|
||||
})?;
|
||||
// Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
|
||||
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
|
||||
|
||||
@@ -243,11 +243,19 @@ pub async fn search_with_url_query(
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
index_scheduler.features(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
@@ -287,12 +295,20 @@ pub async fn search_with_post(
|
||||
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
@@ -314,6 +330,7 @@ pub async fn search_with_post(
|
||||
pub fn search_kind(
|
||||
query: &SearchQuery,
|
||||
index_scheduler: &IndexScheduler,
|
||||
index_uid: String,
|
||||
index: &milli::Index,
|
||||
features: RoFeatures,
|
||||
) -> Result<SearchKind, ResponseError> {
|
||||
@@ -332,7 +349,7 @@ pub fn search_kind(
|
||||
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
||||
// hybrid.semantic_ratio == 1.0 => vector
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// hybrid.semantic_ratio == 0.0 => keyword
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||
@@ -340,13 +357,14 @@ pub fn search_kind(
|
||||
}
|
||||
// no query, hybrid, vector => semantic
|
||||
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
||||
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
|
||||
SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len()))
|
||||
}
|
||||
// query, no hybrid, no vector => keyword
|
||||
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
||||
// query, hybrid, maybe vector => hybrid
|
||||
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
index,
|
||||
embedder,
|
||||
**semantic_ratio,
|
||||
|
||||
@@ -103,8 +103,13 @@ async fn similar(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let (embedder_name, embedder, quantized) =
|
||||
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
|
||||
let (embedder_name, embedder, quantized) = SearchKind::embedder(
|
||||
&index_scheduler,
|
||||
index_uid.to_string(),
|
||||
&index,
|
||||
&query.embedder,
|
||||
None,
|
||||
)?;
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
perform_similar(
|
||||
|
||||
@@ -125,14 +125,28 @@ pub async fn multi_search_with_post(
|
||||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), &index, features)
|
||||
.with_index(query_index)?;
|
||||
let index_uid_str = index_uid.to_string();
|
||||
|
||||
let search_kind = search_kind(
|
||||
&query,
|
||||
index_scheduler.get_ref(),
|
||||
index_uid_str.clone(),
|
||||
&index,
|
||||
features,
|
||||
)
|
||||
.with_index(query_index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vector, features)
|
||||
perform_search(
|
||||
index_uid_str.clone(),
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
features,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
@@ -560,7 +560,8 @@ pub fn perform_federated_search(
|
||||
// use an immediately invoked lambda to capture the result without returning from the function
|
||||
|
||||
let res: Result<(), ResponseError> = (|| {
|
||||
let search_kind = search_kind(&query, index_scheduler, &index, features)?;
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler, index_uid.to_string(), &index, features)?;
|
||||
|
||||
let canonicalization_kind = match (&search_kind, &query.q) {
|
||||
(SearchKind::SemanticOnly { .. }, _) => {
|
||||
@@ -636,7 +637,8 @@ pub fn perform_federated_search(
|
||||
search.offset(0);
|
||||
search.limit(required_hit_count);
|
||||
|
||||
let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
|
||||
let (result, _semantic_hit_count) =
|
||||
super::search_from_kind(index_uid.to_string(), search_kind, search)?;
|
||||
let format = AttributesFormat {
|
||||
attributes_to_retrieve: query.attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
@@ -670,7 +672,10 @@ pub fn perform_federated_search(
|
||||
|
||||
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
||||
|
||||
let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
|
||||
let hit_maker =
|
||||
HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| {
|
||||
MeilisearchHttpError::from_milli(e, Some(index_uid.to_string()))
|
||||
})?;
|
||||
|
||||
results_by_query.push(SearchResultByQuery {
|
||||
federation_options,
|
||||
|
||||
@@ -19,7 +19,9 @@ use meilisearch_types::locales::Locale;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::vector::Embedder;
|
||||
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
||||
use meilisearch_types::milli::{
|
||||
FacetValueHit, InternalError, OrderBy, SearchForFacetValues, TimeBudget,
|
||||
};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
use milli::tokenizer::{Language, TokenizerBuilder};
|
||||
@@ -281,35 +283,38 @@ pub enum SearchKind {
|
||||
impl SearchKind {
|
||||
pub(crate) fn semantic(
|
||||
index_scheduler: &index_scheduler::IndexScheduler,
|
||||
index_uid: String,
|
||||
index: &Index,
|
||||
embedder_name: &str,
|
||||
vector_len: Option<usize>,
|
||||
) -> Result<Self, ResponseError> {
|
||||
let (embedder_name, embedder, quantized) =
|
||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||
Self::embedder(index_scheduler, index_uid, index, embedder_name, vector_len)?;
|
||||
Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
|
||||
}
|
||||
|
||||
pub(crate) fn hybrid(
|
||||
index_scheduler: &index_scheduler::IndexScheduler,
|
||||
index_uid: String,
|
||||
index: &Index,
|
||||
embedder_name: &str,
|
||||
semantic_ratio: f32,
|
||||
vector_len: Option<usize>,
|
||||
) -> Result<Self, ResponseError> {
|
||||
let (embedder_name, embedder, quantized) =
|
||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||
Self::embedder(index_scheduler, index_uid, index, embedder_name, vector_len)?;
|
||||
Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
|
||||
}
|
||||
|
||||
pub(crate) fn embedder(
|
||||
index_scheduler: &index_scheduler::IndexScheduler,
|
||||
index_uid: String,
|
||||
index: &Index,
|
||||
embedder_name: &str,
|
||||
vector_len: Option<usize>,
|
||||
) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
|
||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
let embedders = index_scheduler.embedders(index_uid, embedder_configs)?;
|
||||
|
||||
let (embedder, _, quantized) = embedders
|
||||
.get(embedder_name)
|
||||
@@ -890,6 +895,7 @@ fn prepare_search<'t>(
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index_uid: String,
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
search_kind: SearchKind,
|
||||
@@ -916,7 +922,7 @@ pub fn perform_search(
|
||||
used_negative_operator,
|
||||
},
|
||||
semantic_hit_count,
|
||||
) = search_from_kind(search_kind, search)?;
|
||||
) = search_from_kind(index_uid, search_kind, search)?;
|
||||
|
||||
let SearchQuery {
|
||||
q,
|
||||
@@ -1069,17 +1075,27 @@ fn compute_facet_distribution_stats<S: AsRef<str>>(
|
||||
}
|
||||
|
||||
pub fn search_from_kind(
|
||||
index_uid: String,
|
||||
search_kind: SearchKind,
|
||||
search: milli::Search<'_>,
|
||||
) -> Result<(milli::SearchResult, Option<u32>), MeilisearchHttpError> {
|
||||
let (milli_result, semantic_hit_count) = match &search_kind {
|
||||
SearchKind::KeywordOnly => (search.execute()?, None),
|
||||
SearchKind::KeywordOnly => {
|
||||
let results = search
|
||||
.execute()
|
||||
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
|
||||
(results, None)
|
||||
}
|
||||
SearchKind::SemanticOnly { .. } => {
|
||||
let results = search.execute()?;
|
||||
let results = search
|
||||
.execute()
|
||||
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
|
||||
let semantic_hit_count = results.document_scores.len() as u32;
|
||||
(results, Some(semantic_hit_count))
|
||||
}
|
||||
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
||||
SearchKind::Hybrid { semantic_ratio, .. } => search
|
||||
.execute_hybrid(*semantic_ratio)
|
||||
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid)))?,
|
||||
};
|
||||
Ok((milli_result, semantic_hit_count))
|
||||
}
|
||||
@@ -1181,7 +1197,7 @@ impl<'a> HitMaker<'a> {
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
format: AttributesFormat,
|
||||
mut formatter_builder: MatcherBuilder<'a>,
|
||||
) -> Result<Self, MeilisearchHttpError> {
|
||||
) -> milli::Result<Self> {
|
||||
formatter_builder.crop_marker(format.crop_marker);
|
||||
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
||||
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
||||
@@ -1276,11 +1292,7 @@ impl<'a> HitMaker<'a> {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn make_hit(
|
||||
&self,
|
||||
id: u32,
|
||||
score: &[ScoreDetails],
|
||||
) -> Result<SearchHit, MeilisearchHttpError> {
|
||||
pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
|
||||
let (_, obkv) =
|
||||
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
|
||||
|
||||
@@ -1323,7 +1335,10 @@ impl<'a> HitMaker<'a> {
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
let embeddings =
|
||||
ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
|
||||
vectors.insert(name, serde_json::to_value(embeddings)?);
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,
|
||||
);
|
||||
}
|
||||
document.insert("_vectors".into(), vectors.into());
|
||||
}
|
||||
@@ -1369,7 +1384,7 @@ fn make_hits<'a>(
|
||||
format: AttributesFormat,
|
||||
matching_words: milli::MatchingWords,
|
||||
documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
|
||||
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
||||
) -> milli::Result<Vec<SearchHit>> {
|
||||
let mut documents = Vec::new();
|
||||
|
||||
let dictionary = index.dictionary(rtxn)?;
|
||||
@@ -1407,6 +1422,13 @@ pub fn perform_facet_search(
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
if !index.facet_search(&rtxn)? {
|
||||
return Err(ResponseError::from_msg(
|
||||
"The facet search is disabled for this index".to_string(),
|
||||
Code::FacetSearchDisabled,
|
||||
));
|
||||
}
|
||||
|
||||
// In the faceted search context, we want to use the intersection between the locales provided by the user
|
||||
// and the locales of the facet string.
|
||||
// If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.
|
||||
@@ -1690,12 +1712,12 @@ fn make_document(
|
||||
displayed_attributes: &BTreeSet<FieldId>,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
obkv: &obkv::KvReaderU16,
|
||||
) -> Result<Document, MeilisearchHttpError> {
|
||||
) -> milli::Result<Document> {
|
||||
let mut document = serde_json::Map::new();
|
||||
|
||||
// recreate the original json
|
||||
for (key, value) in obkv.iter() {
|
||||
let value = serde_json::from_slice(value)?;
|
||||
let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
|
||||
let key = field_ids_map.name(key).expect("Missing field name").to_string();
|
||||
|
||||
document.insert(key, value);
|
||||
@@ -1720,7 +1742,7 @@ fn format_fields(
|
||||
displayable_ids: &BTreeSet<FieldId>,
|
||||
locales: Option<&[Language]>,
|
||||
localized_attributes: &[LocalizedAttributesRule],
|
||||
) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
|
||||
) -> milli::Result<(Option<MatchesPosition>, Document)> {
|
||||
let mut matches_position = compute_matches.then(BTreeMap::new);
|
||||
let mut document = document.clone();
|
||||
|
||||
@@ -1898,7 +1920,7 @@ fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpEr
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Filter::from_array(ands)?)
|
||||
Filter::from_array(ands).map_err(|e| MeilisearchHttpError::from_milli(e, None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -52,6 +52,25 @@ impl Value {
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Return `true` if the `status` field is set to `failed`.
|
||||
/// Panic if the `status` field doesn't exists.
|
||||
#[track_caller]
|
||||
pub fn is_fail(&self) -> bool {
|
||||
if !self["status"].is_string() {
|
||||
panic!("Called `is_fail` on {}", serde_json::to_string_pretty(&self.0).unwrap());
|
||||
}
|
||||
self["status"] == serde_json::Value::String(String::from("failed"))
|
||||
}
|
||||
|
||||
// Panic if the json doesn't contain the `status` field set to "succeeded"
|
||||
#[track_caller]
|
||||
pub fn failed(&self) -> &Self {
|
||||
if !self.is_fail() {
|
||||
panic!("Called failed on {}", serde_json::to_string_pretty(&self.0).unwrap());
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Value> for Value {
|
||||
|
||||
@@ -1681,7 +1681,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
|
||||
"message": "Index `test`: The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1719,7 +1719,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"message": "Index `test`: Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1757,7 +1757,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"message": "Index `test`: Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1795,7 +1795,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1833,7 +1833,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1871,7 +1871,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1909,7 +1909,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1947,7 +1947,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
|
||||
"message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -1985,7 +1985,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2023,7 +2023,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2061,7 +2061,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
|
||||
"message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2099,7 +2099,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
|
||||
"message": "Index `test`: The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2138,7 +2138,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
|
||||
"message": "Index `test`: Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2175,7 +2175,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
|
||||
"message": "Index `test`: Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2212,7 +2212,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
|
||||
"message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@@ -2279,7 +2279,7 @@ async fn add_invalid_geo_and_then_settings() {
|
||||
]
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
|
||||
"message": "Index `test`: Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
|
||||
@@ -604,7 +604,7 @@ async fn delete_document_by_filter() {
|
||||
"originalFilter": "\"doggo = bernese\""
|
||||
},
|
||||
"error": {
|
||||
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
|
||||
"message": "Index `EMPTY_INDEX`: Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
@@ -636,7 +636,7 @@ async fn delete_document_by_filter() {
|
||||
"originalFilter": "\"catto = jorts\""
|
||||
},
|
||||
"error": {
|
||||
"message": "Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
|
||||
"message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
|
||||
@@ -95,7 +95,7 @@ async fn error_update_existing_primary_key() {
|
||||
let response = index.wait_task(2).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index already has a primary key: `id`.",
|
||||
"message": "Index `test`: Index already has a primary key: `id`.",
|
||||
"code": "index_primary_key_already_exists",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_primary_key_already_exists"
|
||||
|
||||
@@ -711,7 +711,7 @@ async fn filter_invalid_attribute_array() {
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||
"message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -733,7 +733,7 @@ async fn filter_invalid_attribute_string() {
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||
"message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -940,7 +940,7 @@ async fn sort_unsortable_attribute() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `title` is not sortable. Available sortable attributes are: `id`.",
|
||||
"message": format!("Index `{}`: Attribute `title` is not sortable. Available sortable attributes are: `id`.", index.uid),
|
||||
"code": "invalid_search_sort",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
|
||||
@@ -998,7 +998,7 @@ async fn sort_unset_ranking_rule() {
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.",
|
||||
"message": format!("Index `{}`: You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.", index.uid),
|
||||
"code": "invalid_search_sort",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
|
||||
@@ -1024,19 +1024,18 @@ async fn search_on_unknown_field() {
|
||||
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
});
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@@ -1050,19 +1049,18 @@ async fn search_on_unknown_field_plus_joker() {
|
||||
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
});
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["*", "unknown"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@@ -1071,15 +1069,8 @@ async fn search_on_unknown_field_plus_joker() {
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown", "*"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@@ -1092,47 +1083,44 @@ async fn distinct_at_search_time() {
|
||||
let (task, _) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", index.uid),
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
});
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
|
||||
let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await;
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", index.uid),
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
});
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
|
||||
let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await;
|
||||
index.wait_task(task.uid()).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", index.uid),
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
});
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await;
|
||||
|
||||
@@ -221,8 +221,15 @@ async fn add_documents_and_deactivate_facet_search() {
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The facet search is disabled for this index",
|
||||
"code": "facet_search_disabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#facet_search_disabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -245,8 +252,15 @@ async fn deactivate_facet_search_and_add_documents() {
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "The facet search is disabled for this index",
|
||||
"code": "facet_search_disabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#facet_search_disabled"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -1070,7 +1070,7 @@ async fn federation_one_query_error() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[1]`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
|
||||
"message": "Inside `.queries[1]`: Index `nested`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -1102,7 +1102,7 @@ async fn federation_one_query_sort_error() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[1]`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
|
||||
"message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
|
||||
"code": "invalid_search_sort",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
|
||||
@@ -1166,7 +1166,7 @@ async fn federation_multiple_query_errors() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[0]`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
|
||||
"message": "Inside `.queries[0]`: Index `test`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -1198,7 +1198,7 @@ async fn federation_multiple_query_sort_errors() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[0]`: Attribute `title` is not sortable. This index does not have configured sortable attributes.",
|
||||
"message": "Inside `.queries[0]`: Index `test`: Attribute `title` is not sortable. This index does not have configured sortable attributes.",
|
||||
"code": "invalid_search_sort",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
|
||||
@@ -1231,7 +1231,7 @@ async fn federation_multiple_query_errors_interleaved() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[1]`: Attribute `doggos` is not filterable. This index does not have configured filterable attributes.\n1:7 doggos IN [intel, kefir]",
|
||||
"message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not filterable. This index does not have configured filterable attributes.\n1:7 doggos IN [intel, kefir]",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -1264,7 +1264,7 @@ async fn federation_multiple_query_sort_errors_interleaved() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Inside `.queries[1]`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
|
||||
"message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
|
||||
"code": "invalid_search_sort",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
|
||||
|
||||
@@ -129,11 +129,11 @@ async fn perform_on_demand_snapshot() {
|
||||
|
||||
index.load_test_set().await;
|
||||
|
||||
server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(2).await;
|
||||
let (task, _) = server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(2).await;
|
||||
let (task, _) = server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(task.uid()).await.failed();
|
||||
|
||||
let (task, code) = server.create_snapshot().await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
@@ -448,7 +448,7 @@ async fn test_summarized_delete_documents_by_filter() {
|
||||
"originalFilter": "\"doggo = bernese\""
|
||||
},
|
||||
"error": {
|
||||
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
|
||||
"message": "Index `test`: Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
|
||||
@@ -318,7 +318,7 @@ async fn try_to_disable_binary_quantization() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
|
||||
"message": "Index `doggo`: `.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
|
||||
"code": "invalid_settings_embedders",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
|
||||
|
||||
@@ -250,7 +250,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -280,7 +280,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -311,7 +311,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Could not parse `._vectors.manual.regenerate`: invalid type: string \"yes please\", expected a boolean at line 1 column 26",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Could not parse `._vectors.manual.regenerate`: invalid type: string \"yes please\", expected a boolean at line 1 column 26",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -340,7 +340,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings`: expected null or an array, but found a boolean: `true`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings`: expected null or an array, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -369,7 +369,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -398,7 +398,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -440,7 +440,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -469,7 +469,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -498,7 +498,7 @@ async fn user_provided_embeddings_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
|
||||
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
|
||||
"code": "invalid_vectors_type",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
|
||||
@@ -539,7 +539,7 @@ async fn user_provided_vectors_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `manual`: no vectors provided for document `40` and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `40` and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -569,7 +569,7 @@ async fn user_provided_vectors_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -599,7 +599,7 @@ async fn user_provided_vectors_error() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
||||
@@ -713,7 +713,7 @@ async fn bad_api_key() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -757,7 +757,7 @@ async fn bad_api_key() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
||||
@@ -985,7 +985,7 @@ async fn bad_settings() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting a single \"{{embedding}}\", expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting a single \"{{embedding}}\", expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1025,7 +1025,7 @@ async fn bad_settings() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1178,7 +1178,7 @@ async fn server_returns_bad_request() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"test\\\", expected struct MultipleRequest at line 1 column 6\"}`",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"test\\\", expected struct MultipleRequest at line 1 column 6\"}`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1247,7 +1247,7 @@ async fn server_returns_bad_request() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1306,7 +1306,7 @@ async fn server_returns_bad_response() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting the array of \"{{embedding}}\"s, configuration expects `response` to be an array with at least 1 item(s) but server sent an object with 1 field(s)",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting the array of \"{{embedding}}\"s, configuration expects `response` to be an array with at least 1 item(s) but server sent an object with 1 field(s)",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1362,7 +1362,7 @@ async fn server_returns_bad_response() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting item #0 from the array of \"{{embedding}}\"s, expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting item #0 from the array of \"{{embedding}}\"s, expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1414,7 +1414,7 @@ async fn server_returns_bad_response() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output`, while extracting a single \"{{embedding}}\", expected `output` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected f32",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output`, while extracting a single \"{{embedding}}\", expected `output` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected f32",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1478,7 +1478,7 @@ async fn server_returns_bad_response() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.embedding`, while extracting item #0 from the array of \"{{embedding}}\"s, configuration expects `embedding` to be an object with key `data` but server sent an array of size 3",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.embedding`, while extracting item #0 from the array of \"{{embedding}}\"s, configuration expects `embedding` to be an object with key `data` but server sent an array of size 3",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1542,7 +1542,7 @@ async fn server_returns_bad_response() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output[0]`, while extracting a single \"{{embedding}}\", configuration expects key \"embeddings\", which is missing in response\n - Hint: item #0 inside `output` has key `embedding`, did you mean `response.output[0].embedding` in embedder configuration?",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output[0]`, while extracting a single \"{{embedding}}\", configuration expects key \"embeddings\", which is missing in response\n - Hint: item #0 inside `output` has key `embedding`, did you mean `response.output[0].embedding` in embedder configuration?",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1908,7 +1908,7 @@ async fn server_custom_header() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -1951,7 +1951,7 @@ async fn server_custom_header() {
|
||||
}
|
||||
},
|
||||
"error": {
|
||||
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@@ -2099,7 +2099,7 @@ async fn searchable_reindex() {
|
||||
]
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
|
||||
"message": "Index `doggo`: While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
|
||||
@@ -98,7 +98,7 @@ allocator-api2 = "0.2.18"
|
||||
rustc-hash = "2.0.0"
|
||||
uell = "0.1.0"
|
||||
enum-iterator = "2.1.0"
|
||||
bbqueue = { git = "https://github.com/kerollmops/bbqueue" }
|
||||
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
|
||||
flume = { version = "0.11.1", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::num::NonZeroU16;
|
||||
use std::ops::Range;
|
||||
use std::time::Duration;
|
||||
|
||||
use bbqueue::framed::{FrameGrantR, FrameGrantW, FrameProducer};
|
||||
use bbqueue::framed::{FrameGrantR, FrameProducer};
|
||||
use bbqueue::BBBuffer;
|
||||
use bytemuck::{checked, CheckedBitPattern, NoUninit};
|
||||
use flume::{RecvTimeoutError, SendError};
|
||||
@@ -21,11 +21,15 @@ use super::ref_cell_ext::RefCellExt;
|
||||
use super::thread_local::{FullySend, ThreadLocal};
|
||||
use super::StdResult;
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
||||
use crate::heed_codec::StrBEU16Codec;
|
||||
use crate::index::db_name;
|
||||
use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
|
||||
use crate::update::new::KvReaderFieldId;
|
||||
use crate::vector::Embedding;
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
|
||||
use crate::{
|
||||
CboRoaringBitmapCodec, DocumentId, Error, FieldIdWordCountCodec, Index, InternalError,
|
||||
U8StrStrCodec,
|
||||
};
|
||||
|
||||
/// Creates a tuple of senders/receiver to be used by
|
||||
/// the extractors and the writer loop.
|
||||
@@ -403,6 +407,32 @@ impl Database {
|
||||
Database::FieldIdDocidFacetF64s => db_name::FIELD_ID_DOCID_FACET_F64S,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stringify_key(&self, key: &[u8]) -> String {
|
||||
use heed::types::*;
|
||||
|
||||
match self {
|
||||
Database::WordDocids => format!("{:?}", Str::bytes_decode(key).unwrap()),
|
||||
Database::WordFidDocids => format!("{:?}", StrBEU16Codec::bytes_decode(key).unwrap()),
|
||||
Database::WordPositionDocids => {
|
||||
format!("{:?}", StrBEU16Codec::bytes_decode(key).unwrap())
|
||||
}
|
||||
Database::WordPairProximityDocids => {
|
||||
format!("{:?}", U8StrStrCodec::bytes_decode(key).unwrap())
|
||||
}
|
||||
Database::ExactWordDocids => format!("{:?}", Str::bytes_decode(key).unwrap()),
|
||||
Database::FidWordCountDocids => {
|
||||
format!("{:?}", FieldIdWordCountCodec::bytes_decode(key).unwrap())
|
||||
}
|
||||
Database::FieldIdDocidFacetStrings => {
|
||||
format!("{:?}", FieldDocIdFacetStringCodec::bytes_decode(key).unwrap())
|
||||
}
|
||||
Database::FieldIdDocidFacetF64s => {
|
||||
format!("{:?}", FieldDocIdFacetF64Codec::bytes_decode(key).unwrap())
|
||||
}
|
||||
d => unimplemented!("stringify_key for {:?}", d),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FacetKind> for Database {
|
||||
@@ -454,14 +484,10 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
}
|
||||
|
||||
// Spin loop to have a frame the size we requested.
|
||||
let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
|
||||
payload_header.serialize_into(&mut grant);
|
||||
|
||||
// We only send a wake up message when the channel is empty
|
||||
// so that we don't fill the channel with too many WakeUps.
|
||||
if self.sender.is_empty() {
|
||||
self.sender.send(ReceiverAction::WakeUp).unwrap();
|
||||
}
|
||||
reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
|
||||
payload_header.serialize_into(grant);
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -500,24 +526,20 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
}
|
||||
|
||||
// Spin loop to have a frame the size we requested.
|
||||
let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
|
||||
reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
|
||||
if dimensions != 0 {
|
||||
let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
|
||||
for (embedding, output) in embeddings.iter().zip(output_iter) {
|
||||
output.copy_from_slice(bytemuck::cast_slice(embedding));
|
||||
if dimensions != 0 {
|
||||
let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
|
||||
for (embedding, output) in embeddings.iter().zip(output_iter) {
|
||||
output.copy_from_slice(bytemuck::cast_slice(embedding));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We only send a wake up message when the channel is empty
|
||||
// so that we don't fill the channel with too many WakeUps.
|
||||
if self.sender.is_empty() {
|
||||
self.sender.send(ReceiverAction::WakeUp).unwrap();
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -575,19 +597,13 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
}
|
||||
|
||||
// Spin loop to have a frame the size we requested.
|
||||
let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
|
||||
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize);
|
||||
key_value_writer(key_buffer, value_buffer)?;
|
||||
|
||||
// We only send a wake up message when the channel is empty
|
||||
// so that we don't fill the channel with too many WakeUps.
|
||||
if self.sender.is_empty() {
|
||||
self.sender.send(ReceiverAction::WakeUp).unwrap();
|
||||
}
|
||||
reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize);
|
||||
key_value_writer(key_buffer, value_buffer)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -629,37 +645,44 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
}
|
||||
|
||||
// Spin loop to have a frame the size we requested.
|
||||
let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
|
||||
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
key_writer(remaining)?;
|
||||
|
||||
// We only send a wake up message when the channel is empty
|
||||
// so that we don't fill the channel with too many WakeUps.
|
||||
if self.sender.is_empty() {
|
||||
self.sender.send(ReceiverAction::WakeUp).unwrap();
|
||||
}
|
||||
reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
|
||||
let header_size = payload_header.header_size();
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
key_writer(remaining)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to reserve a frame grant of `total_length` by spin looping
|
||||
/// on the BBQueue buffer and panics if the receiver has been disconnected.
|
||||
fn reserve_grant<'b>(
|
||||
producer: &mut FrameProducer<'b>,
|
||||
/// Try to reserve a frame grant of `total_length` by spin
|
||||
/// looping on the BBQueue buffer, panics if the receiver
|
||||
/// has been disconnected or send a WakeUp message if necessary.
|
||||
fn reserve_and_write_grant<F>(
|
||||
producer: &mut FrameProducer,
|
||||
total_length: usize,
|
||||
sender: &flume::Sender<ReceiverAction>,
|
||||
) -> crate::Result<FrameGrantW<'b>> {
|
||||
f: F,
|
||||
) -> crate::Result<()>
|
||||
where
|
||||
F: FnOnce(&mut [u8]) -> crate::Result<()>,
|
||||
{
|
||||
loop {
|
||||
for _ in 0..10_000 {
|
||||
match producer.grant(total_length) {
|
||||
Ok(mut grant) => {
|
||||
// We could commit only the used memory.
|
||||
grant.to_commit(total_length);
|
||||
return Ok(grant);
|
||||
f(&mut grant)?;
|
||||
grant.commit(total_length);
|
||||
|
||||
// We only send a wake up message when the channel is empty
|
||||
// so that we don't fill the channel with too many WakeUps.
|
||||
if sender.is_empty() {
|
||||
sender.send(ReceiverAction::WakeUp).unwrap();
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
Err(bbqueue::Error::InsufficientSize) => continue,
|
||||
Err(e) => unreachable!("{e:?}"),
|
||||
@@ -668,6 +691,11 @@ fn reserve_grant<'b>(
|
||||
if sender.is_disconnected() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
// We prefer to yield and allow the writing thread
|
||||
// to do its job, especially beneficial when there
|
||||
// is only one CPU core available.
|
||||
std::thread::yield_now();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use bumpalo::Bump;
|
||||
use heed::RoTxn;
|
||||
|
||||
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
|
||||
use super::document::{
|
||||
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
|
||||
};
|
||||
use super::extract::perm_json_p;
|
||||
use super::vector_document::{
|
||||
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
|
||||
};
|
||||
@@ -164,6 +167,80 @@ impl<'doc> Update<'doc> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the updated version of the document is different from the current version for the passed subset of fields.
|
||||
///
|
||||
/// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed.
|
||||
/// Otherwise `false`.
|
||||
pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
|
||||
&self,
|
||||
fields: Option<&[&str]>,
|
||||
rtxn: &'t RoTxn,
|
||||
index: &'t Index,
|
||||
mapper: &'t Mapper,
|
||||
) -> Result<bool> {
|
||||
let mut changed = false;
|
||||
let mut cached_current = None;
|
||||
let mut updated_selected_field_count = 0;
|
||||
|
||||
for entry in self.updated().iter_top_level_fields() {
|
||||
let (key, updated_value) = entry?;
|
||||
|
||||
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||
continue;
|
||||
}
|
||||
|
||||
updated_selected_field_count += 1;
|
||||
let current = match cached_current {
|
||||
Some(current) => current,
|
||||
None => self.current(rtxn, index, mapper)?,
|
||||
};
|
||||
let current_value = current.top_level_field(key)?;
|
||||
let Some(current_value) = current_value else {
|
||||
changed = true;
|
||||
break;
|
||||
};
|
||||
|
||||
if current_value.get() != updated_value.get() {
|
||||
changed = true;
|
||||
break;
|
||||
}
|
||||
cached_current = Some(current);
|
||||
}
|
||||
|
||||
if !self.has_deletion {
|
||||
// no field deletion, so fields that don't appear in `updated` cannot have changed
|
||||
return Ok(changed);
|
||||
}
|
||||
|
||||
if changed {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
// we saw all updated fields, and set `changed` if any field wasn't in `current`.
|
||||
// so if there are as many fields in `current` as in `updated`, then nothing changed.
|
||||
// If there is any more fields in `current`, then they are missing in `updated`.
|
||||
let has_deleted_fields = {
|
||||
let current = match cached_current {
|
||||
Some(current) => current,
|
||||
None => self.current(rtxn, index, mapper)?,
|
||||
};
|
||||
|
||||
let mut current_selected_field_count = 0;
|
||||
for entry in current.iter_top_level_fields() {
|
||||
let (key, _) = entry?;
|
||||
|
||||
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||
continue;
|
||||
}
|
||||
current_selected_field_count += 1;
|
||||
}
|
||||
|
||||
current_selected_field_count != updated_selected_field_count
|
||||
};
|
||||
|
||||
Ok(has_deleted_fields)
|
||||
}
|
||||
|
||||
pub fn updated_vectors(
|
||||
&self,
|
||||
doc_alloc: &'doc Bump,
|
||||
|
||||
@@ -466,12 +466,13 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
|
||||
Ok(bucket_caches)
|
||||
}
|
||||
|
||||
/// Merges the caches that must be all associated to the same bucket.
|
||||
/// Merges the caches that must be all associated to the same bucket
|
||||
/// but make sure to sort the different buckets before performing the merges.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - If the bucket IDs in these frozen caches are not exactly the same.
|
||||
pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
|
||||
pub fn merge_caches_sorted<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
|
||||
where
|
||||
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
|
||||
{
|
||||
@@ -543,12 +544,12 @@ where
|
||||
|
||||
// Then manage the content on the HashMap entries that weren't taken (mem::take).
|
||||
while let Some(mut map) = maps.pop() {
|
||||
for (key, bbbul) in map.iter_mut() {
|
||||
// Make sure we don't try to work with entries already managed by the spilled
|
||||
if bbbul.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// Make sure we don't try to work with entries already managed by the spilled
|
||||
let mut ordered_entries: Vec<_> =
|
||||
map.iter_mut().filter(|(_, bbbul)| !bbbul.is_empty()).collect();
|
||||
ordered_entries.sort_unstable_by_key(|(key, _)| *key);
|
||||
|
||||
for (key, bbbul) in ordered_entries {
|
||||
let mut output = DelAddRoaringBitmap::empty();
|
||||
output.union_and_clear_bbbul(bbbul);
|
||||
|
||||
|
||||
@@ -97,6 +97,15 @@ impl FacetedDocidsExtractor {
|
||||
},
|
||||
),
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
Some(attributes_to_extract),
|
||||
rtxn,
|
||||
index,
|
||||
context.db_fields_ids_map,
|
||||
)? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::cell::RefCell;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Write as _};
|
||||
use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Seek as _, Write as _};
|
||||
use std::{iter, mem, result};
|
||||
|
||||
use bumpalo::Bump;
|
||||
@@ -97,30 +97,34 @@ pub struct FrozenGeoExtractorData<'extractor> {
|
||||
impl<'extractor> FrozenGeoExtractorData<'extractor> {
|
||||
pub fn iter_and_clear_removed(
|
||||
&mut self,
|
||||
) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
|
||||
mem::take(&mut self.removed)
|
||||
) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
|
||||
Ok(mem::take(&mut self.removed)
|
||||
.iter()
|
||||
.copied()
|
||||
.map(Ok)
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_removed))
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_removed)?))
|
||||
}
|
||||
|
||||
pub fn iter_and_clear_inserted(
|
||||
&mut self,
|
||||
) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
|
||||
mem::take(&mut self.inserted)
|
||||
) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
|
||||
Ok(mem::take(&mut self.inserted)
|
||||
.iter()
|
||||
.copied()
|
||||
.map(Ok)
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted))
|
||||
.chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted)?))
|
||||
}
|
||||
}
|
||||
|
||||
fn iterator_over_spilled_geopoints(
|
||||
spilled: &mut Option<BufReader<File>>,
|
||||
) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
|
||||
) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
|
||||
let mut spilled = spilled.take();
|
||||
iter::from_fn(move || match &mut spilled {
|
||||
if let Some(spilled) = &mut spilled {
|
||||
spilled.rewind()?;
|
||||
}
|
||||
|
||||
Ok(iter::from_fn(move || match &mut spilled {
|
||||
Some(file) => {
|
||||
let geopoint_bytes = &mut [0u8; mem::size_of::<ExtractedGeoPoint>()];
|
||||
match file.read_exact(geopoint_bytes) {
|
||||
@@ -130,7 +134,7 @@ fn iterator_over_spilled_geopoints(
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
})
|
||||
}))
|
||||
}
|
||||
|
||||
impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
||||
@@ -157,7 +161,9 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
||||
let mut data_ref = context.data.borrow_mut_or_yield();
|
||||
|
||||
for change in changes {
|
||||
if max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm) {
|
||||
if data_ref.spilled_removed.is_none()
|
||||
&& max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm)
|
||||
{
|
||||
// We must spill as we allocated too much memory
|
||||
data_ref.spilled_removed = tempfile::tempfile().map(BufWriter::new).map(Some)?;
|
||||
data_ref.spilled_inserted = tempfile::tempfile().map(BufWriter::new).map(Some)?;
|
||||
|
||||
@@ -6,7 +6,9 @@ mod searchable;
|
||||
mod vectors;
|
||||
|
||||
use bumpalo::Bump;
|
||||
pub use cache::{merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap};
|
||||
pub use cache::{
|
||||
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||
};
|
||||
pub use documents::*;
|
||||
pub use faceted::*;
|
||||
pub use geo::*;
|
||||
|
||||
@@ -28,7 +28,7 @@ pub struct WordDocidsBalancedCaches<'extractor> {
|
||||
exact_word_docids: BalancedCaches<'extractor>,
|
||||
word_position_docids: BalancedCaches<'extractor>,
|
||||
fid_word_count_docids: BalancedCaches<'extractor>,
|
||||
fid_word_count: HashMap<FieldId, (usize, usize)>,
|
||||
fid_word_count: HashMap<FieldId, (Option<usize>, Option<usize>)>,
|
||||
current_docid: Option<DocumentId>,
|
||||
}
|
||||
|
||||
@@ -85,8 +85,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
|
||||
self.fid_word_count
|
||||
.entry(field_id)
|
||||
.and_modify(|(_current_count, new_count)| *new_count += 1)
|
||||
.or_insert((0, 1));
|
||||
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
|
||||
.or_insert((None, Some(1)));
|
||||
self.current_docid = Some(docid);
|
||||
|
||||
Ok(())
|
||||
@@ -130,8 +130,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
|
||||
self.fid_word_count
|
||||
.entry(field_id)
|
||||
.and_modify(|(current_count, _new_count)| *current_count += 1)
|
||||
.or_insert((1, 0));
|
||||
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
|
||||
.or_insert((Some(1), None));
|
||||
|
||||
self.current_docid = Some(docid);
|
||||
|
||||
@@ -141,14 +141,18 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
fn flush_fid_word_count(&mut self, buffer: &mut BumpVec<u8>) -> Result<()> {
|
||||
for (fid, (current_count, new_count)) in self.fid_word_count.drain() {
|
||||
if current_count != new_count {
|
||||
if current_count <= MAX_COUNTED_WORDS {
|
||||
if let Some(current_count) =
|
||||
current_count.filter(|current_count| *current_count <= MAX_COUNTED_WORDS)
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
buffer.push(current_count as u8);
|
||||
self.fid_word_count_docids
|
||||
.insert_del_u32(buffer, self.current_docid.unwrap())?;
|
||||
}
|
||||
if new_count <= MAX_COUNTED_WORDS {
|
||||
if let Some(new_count) =
|
||||
new_count.filter(|new_count| *new_count <= MAX_COUNTED_WORDS)
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
buffer.push(new_count as u8);
|
||||
@@ -351,6 +355,15 @@ impl WordDocidsExtractors {
|
||||
)?;
|
||||
}
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
document_tokenizer.attribute_to_extract,
|
||||
&context.rtxn,
|
||||
context.index,
|
||||
context.db_fields_ids_map,
|
||||
)? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut token_fn = |fname: &str, fid, pos, word: &str| {
|
||||
cached_sorter.insert_del_u32(
|
||||
fid,
|
||||
|
||||
@@ -70,6 +70,15 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
)?;
|
||||
}
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
document_tokenizer.attribute_to_extract,
|
||||
rtxn,
|
||||
index,
|
||||
context.db_fields_ids_map,
|
||||
)? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let document = inner.current(rtxn, index, context.db_fields_ids_map)?;
|
||||
process_document_tokens(
|
||||
document,
|
||||
|
||||
@@ -80,15 +80,26 @@ where
|
||||
let mut bbbuffers = Vec::new();
|
||||
let finished_extraction = AtomicBool::new(false);
|
||||
|
||||
// We reduce the actual memory used to 5%. The reason we do this here and not in Meilisearch
|
||||
// is because we still use the old indexer for the settings and it is highly impacted by the
|
||||
// max memory. So we keep the changes here and will remove these changes once we use the new
|
||||
// indexer to also index settings. Related to #5125 and #5141.
|
||||
let grenad_parameters = GrenadParameters {
|
||||
max_memory: grenad_parameters.max_memory.map(|mm| mm * 5 / 100),
|
||||
..grenad_parameters
|
||||
};
|
||||
|
||||
// We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
|
||||
let minimum_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
|
||||
let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
|
||||
(grenad_parameters, 2 * minimum_capacity), // 100 MiB by thread by default
|
||||
|max_memory| {
|
||||
// 2% of the indexing memory
|
||||
let total_bbbuffer_capacity = (max_memory / 100 / 2).min(minimum_capacity);
|
||||
let total_bbbuffer_capacity = (max_memory / 100 / 2).max(minimum_capacity);
|
||||
let new_grenad_parameters = GrenadParameters {
|
||||
max_memory: Some(max_memory - total_bbbuffer_capacity),
|
||||
max_memory: Some(
|
||||
max_memory.saturating_sub(total_bbbuffer_capacity).max(100 * 1024 * 1024),
|
||||
),
|
||||
..grenad_parameters
|
||||
};
|
||||
(new_grenad_parameters, total_bbbuffer_capacity)
|
||||
|
||||
@@ -9,8 +9,8 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::channel::*;
|
||||
use super::extract::{
|
||||
merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind,
|
||||
GeoExtractorData,
|
||||
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||
FacetKind, GeoExtractorData,
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
||||
|
||||
@@ -34,7 +34,7 @@ where
|
||||
}
|
||||
|
||||
let mut frozen = data.into_inner().freeze()?;
|
||||
for result in frozen.iter_and_clear_removed() {
|
||||
for result in frozen.iter_and_clear_removed()? {
|
||||
let extracted_geo_point = result?;
|
||||
let removed = rtree.remove(&GeoPoint::from(extracted_geo_point));
|
||||
debug_assert!(removed.is_some());
|
||||
@@ -42,7 +42,7 @@ where
|
||||
debug_assert!(removed);
|
||||
}
|
||||
|
||||
for result in frozen.iter_and_clear_inserted() {
|
||||
for result in frozen.iter_and_clear_inserted()? {
|
||||
let extracted_geo_point = result?;
|
||||
rtree.insert(GeoPoint::from(extracted_geo_point));
|
||||
let inserted = faceted.insert(extracted_geo_point.docid);
|
||||
@@ -78,18 +78,37 @@ where
|
||||
if must_stop_processing() {
|
||||
return Err(InternalError::AbortedIndexation.into());
|
||||
}
|
||||
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
let current = database.get(&rtxn, key)?;
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => {
|
||||
|
||||
if let (Some(del), Some(current)) = (&del, ¤t) {
|
||||
let current = CboRoaringBitmapCodec::deserialize_from(current).unwrap();
|
||||
let diff = del - ¤t;
|
||||
let external_ids = index.external_id_of(&rtxn, &diff).unwrap().into_iter().map(|id| id.unwrap()).collect::<Vec<_>>();
|
||||
if !del.is_subset(¤t) {
|
||||
eprintln!(
|
||||
"======================== {:?}: {} -> c: {:?} d: {:?} a: {:?} extra: {:?} extra_external_ids: {:?}",
|
||||
D::DATABASE,
|
||||
D::DATABASE.stringify_key(key),
|
||||
¤t,
|
||||
del,
|
||||
add,
|
||||
diff,
|
||||
external_ids
|
||||
);
|
||||
}
|
||||
}
|
||||
match merge_cbo_bitmaps(current, del, add) {
|
||||
Ok(Operation::Write(bitmap)) => {
|
||||
docids_sender.write(key, &bitmap)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Delete => {
|
||||
Ok(Operation::Delete) => {
|
||||
docids_sender.delete(key)?;
|
||||
Ok(())
|
||||
}
|
||||
Operation::Ignore => Ok(()),
|
||||
Ok(Operation::Ignore) => Ok(()),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
})
|
||||
})
|
||||
@@ -107,7 +126,7 @@ pub fn merge_and_send_facet_docids<'extractor>(
|
||||
.map(|frozen| {
|
||||
let mut facet_field_ids_delta = FacetFieldIdsDelta::default();
|
||||
let rtxn = index.read_txn()?;
|
||||
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||
let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => {
|
||||
@@ -234,17 +253,46 @@ fn merge_cbo_bitmaps(
|
||||
(None, Some(_del), Some(add)) => Ok(Operation::Write(add)),
|
||||
(Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
|
||||
(Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
|
||||
(Some(current), Some(del), add) => {
|
||||
(Some(current), Some(mut del), add) => {
|
||||
debug_assert!(
|
||||
del.is_subset(¤t),
|
||||
"del is not a subset of current, which must be impossible."
|
||||
);
|
||||
let output = match add {
|
||||
Some(add) => (¤t - del) | add,
|
||||
None => ¤t - del,
|
||||
Some(add) => {
|
||||
del -= &add;
|
||||
|
||||
if del.is_empty() {
|
||||
if add.is_subset(¤t) {
|
||||
// no changes, no allocation
|
||||
None
|
||||
} else {
|
||||
// addition
|
||||
Some(current | add)
|
||||
}
|
||||
} else {
|
||||
if add.is_subset(¤t) {
|
||||
// deletion only, no union
|
||||
Some(current - del)
|
||||
} else {
|
||||
// deletion and addition
|
||||
Some((current - del) | add)
|
||||
}
|
||||
}
|
||||
}
|
||||
// deletion only, no union
|
||||
None => Some(current - del),
|
||||
};
|
||||
if output.is_empty() {
|
||||
Ok(Operation::Delete)
|
||||
} else if current == output {
|
||||
Ok(Operation::Ignore)
|
||||
} else {
|
||||
Ok(Operation::Write(output))
|
||||
|
||||
match output {
|
||||
Some(output) => {
|
||||
if output.is_empty() {
|
||||
Ok(Operation::Delete)
|
||||
} else {
|
||||
Ok(Operation::Write(output))
|
||||
}
|
||||
}
|
||||
None => Ok(Operation::Ignore),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::collections::HashSet;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io::BufWriter;
|
||||
|
||||
use fst::{Set, SetBuilder, Streamer};
|
||||
@@ -75,8 +75,8 @@ pub struct PrefixData {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PrefixDelta {
|
||||
pub modified: HashSet<Prefix>,
|
||||
pub deleted: HashSet<Prefix>,
|
||||
pub modified: BTreeSet<Prefix>,
|
||||
pub deleted: BTreeSet<Prefix>,
|
||||
}
|
||||
|
||||
struct PrefixFstBuilder {
|
||||
@@ -86,7 +86,7 @@ struct PrefixFstBuilder {
|
||||
prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
|
||||
current_prefix: Vec<Prefix>,
|
||||
current_prefix_count: Vec<usize>,
|
||||
modified_prefixes: HashSet<Prefix>,
|
||||
modified_prefixes: BTreeSet<Prefix>,
|
||||
current_prefix_is_modified: Vec<bool>,
|
||||
}
|
||||
|
||||
@@ -110,7 +110,7 @@ impl PrefixFstBuilder {
|
||||
prefix_fst_builders,
|
||||
current_prefix: vec![Prefix::new(); max_prefix_length],
|
||||
current_prefix_count: vec![0; max_prefix_length],
|
||||
modified_prefixes: HashSet::new(),
|
||||
modified_prefixes: BTreeSet::new(),
|
||||
current_prefix_is_modified: vec![false; max_prefix_length],
|
||||
})
|
||||
}
|
||||
@@ -180,7 +180,7 @@ impl PrefixFstBuilder {
|
||||
let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
|
||||
let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
|
||||
let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
|
||||
let mut deleted_prefixes = HashSet::new();
|
||||
let mut deleted_prefixes = BTreeSet::new();
|
||||
{
|
||||
let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
|
||||
while let Some(prefix) = deleted_prefixes_stream.next() {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
||||
|
||||
use hashbrown::HashMap;
|
||||
@@ -37,8 +37,8 @@ impl WordPrefixDocids {
|
||||
fn execute(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||
@@ -48,7 +48,7 @@ impl WordPrefixDocids {
|
||||
fn recompute_modified_prefixes(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
prefixes: &HashSet<Prefix>,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
// We fetch the docids associated to the newly added word prefix fst only.
|
||||
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
||||
@@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
|
||||
pub fn from_prefixes(
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
rtxn: &'rtxn RoTxn,
|
||||
prefixes: &'a HashSet<Prefix>,
|
||||
prefixes: &'a BTreeSet<Prefix>,
|
||||
) -> heed::Result<Self> {
|
||||
let database = database.remap_data_type::<Bytes>();
|
||||
|
||||
@@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids {
|
||||
fn execute(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||
@@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids {
|
||||
fn recompute_modified_prefixes(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
prefixes: &HashSet<Prefix>,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
// We fetch the docids associated to the newly added word prefix fst only.
|
||||
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
||||
@@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
|
||||
pub fn from_prefixes(
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
rtxn: &'rtxn RoTxn,
|
||||
prefixes: &'a HashSet<Prefix>,
|
||||
prefixes: &'a BTreeSet<Prefix>,
|
||||
) -> heed::Result<Self> {
|
||||
let database = database.remap_data_type::<Bytes>();
|
||||
|
||||
@@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {}
|
||||
fn delete_prefixes(
|
||||
wtxn: &mut RwTxn,
|
||||
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefixes: &HashSet<Prefix>,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
// We remove all the entries that are no more required in this word prefix docids database.
|
||||
for prefix in prefixes {
|
||||
@@ -309,8 +309,8 @@ fn delete_prefixes(
|
||||
pub fn compute_word_prefix_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixDocids::new(
|
||||
@@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids(
|
||||
pub fn compute_exact_word_prefix_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixDocids::new(
|
||||
@@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids(
|
||||
pub fn compute_word_prefix_fid_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixIntegerDocids::new(
|
||||
@@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids(
|
||||
pub fn compute_word_prefix_position_docids(
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
prefix_to_compute: &HashSet<Prefix>,
|
||||
prefix_to_delete: &HashSet<Prefix>,
|
||||
prefix_to_compute: &BTreeSet<Prefix>,
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixIntegerDocids::new(
|
||||
|
||||
@@ -82,6 +82,10 @@ pub struct BenchDeriveArgs {
|
||||
/// Reason for the benchmark invocation
|
||||
#[arg(short, long)]
|
||||
reason: Option<String>,
|
||||
|
||||
/// The maximum time in seconds we allow for fetching the task queue before timing out.
|
||||
#[arg(long, default_value_t = 60)]
|
||||
tasks_queue_timeout_secs: u64,
|
||||
}
|
||||
|
||||
pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
@@ -127,7 +131,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
let meili_client = Client::new(
|
||||
Some("http://127.0.0.1:7700".into()),
|
||||
args.master_key.as_deref(),
|
||||
Some(std::time::Duration::from_secs(60)),
|
||||
Some(std::time::Duration::from_secs(args.tasks_queue_timeout_secs)),
|
||||
)?;
|
||||
|
||||
// enter runtime
|
||||
@@ -135,7 +139,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
rt.block_on(async {
|
||||
dashboard_client.send_machine_info(&env).await?;
|
||||
|
||||
let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap();
|
||||
let commit_message = build_info.commit_msg.unwrap_or_default().split('\n').next().unwrap();
|
||||
let max_workloads = args.workload_file.len();
|
||||
let reason: Option<&str> = args.reason.as_deref();
|
||||
let invocation_uuid = dashboard_client.create_invocation(build_info.clone(), commit_message, env, max_workloads, reason).await?;
|
||||
|
||||
@@ -16,6 +16,7 @@ struct ListFeaturesDeriveArgs {
|
||||
#[command(author, version, about, long_about)]
|
||||
#[command(name = "cargo xtask")]
|
||||
#[command(bin_name = "cargo xtask")]
|
||||
#[allow(clippy::large_enum_variant)] // please, that's enough...
|
||||
enum Command {
|
||||
ListFeatures(ListFeaturesDeriveArgs),
|
||||
Bench(BenchDeriveArgs),
|
||||
|
||||
105
workloads/hackernews-add-new-documents.json
Normal file
105
workloads/hackernews-add-new-documents.json
Normal file
@@ -0,0 +1,105 @@
|
||||
{
|
||||
"name": "hackernews.add_new_documents",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-05.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
111
workloads/hackernews-modify-facet-numbers.json
Normal file
111
workloads/hackernews-modify-facet-numbers.json
Normal file
@@ -0,0 +1,111 @@
|
||||
{
|
||||
"name": "hackernews.modify_facet_numbers",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-02-modified-filters.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
|
||||
"sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02-modified-filters.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
111
workloads/hackernews-modify-facet-strings.json
Normal file
111
workloads/hackernews-modify-facet-strings.json
Normal file
@@ -0,0 +1,111 @@
|
||||
{
|
||||
"name": "hackernews.modify_facet_strings",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-01-modified-filters.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
|
||||
"sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01-modified-filters.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
123
workloads/hackernews-modify-searchables.json
Normal file
123
workloads/hackernews-modify-searchables.json
Normal file
@@ -0,0 +1,123 @@
|
||||
{
|
||||
"name": "hackernews.modify_searchables",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-01.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
|
||||
"sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
|
||||
},
|
||||
"hackernews-02.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
|
||||
"sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
|
||||
},
|
||||
"hackernews-03.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
|
||||
"sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
|
||||
},
|
||||
"hackernews-04.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
|
||||
"sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
|
||||
},
|
||||
"hackernews-05.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
|
||||
"sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
|
||||
},
|
||||
"hackernews-01-modified-searchables.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-searchables.ndjson",
|
||||
"sha256": "e5c08710c6af70031ac7212e0ba242c72ef29c8d4e1fce66c789544641452a7c"
|
||||
},
|
||||
"hackernews-02-modified-searchables.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-searchables.ndjson",
|
||||
"sha256": "098b029851117087b1e26ccb7ac408eda9bba54c3008213a2880d6fab607346e"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time",
|
||||
"text"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"text"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by",
|
||||
"kids",
|
||||
"parent"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-03.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-04.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-01-modified-searchables.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-02-modified-searchables.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user