Compare commits

..

1 Commits

32 changed files with 336 additions and 546 deletions

View File

@ -29,6 +29,7 @@ use bumpalo::collections::CollectIn;
use bumpalo::Bump;
use dump::IndexMetadata;
use meilisearch_types::batches::BatchId;
use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
use meilisearch_types::milli::heed::CompactionOption;
@ -688,9 +689,7 @@ impl IndexScheduler {
let index = self.index_mapper.index(&rtxn, name)?;
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?;
index
.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
}
drop(rtxn);
@ -792,19 +791,16 @@ impl IndexScheduler {
let content_file = self.file_store.get_update(content_file)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(|e| Error::from_milli(e.into(), None))?;
.map_err(milli::Error::from)?;
let (mut cursor, documents_batch_index) =
reader.into_cursor_and_fields_index();
while let Some(doc) = cursor
.next_document()
.map_err(|e| Error::from_milli(e.into(), None))?
while let Some(doc) =
cursor.next_document().map_err(milli::Error::from)?
{
dump_content_file.push_document(
&obkv_to_object(doc, &documents_batch_index)
.map_err(|e| Error::from_milli(e, None))?,
)?;
dump_content_file
.push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
}
dump_content_file.flush()?;
}
@ -818,41 +814,27 @@ impl IndexScheduler {
let metadata = IndexMetadata {
uid: uid.to_owned(),
primary_key: index.primary_key(&rtxn)?.map(String::from),
created_at: index
.created_at(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
updated_at: index
.updated_at(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
};
let mut index_dumper = dump.create_index(uid, &metadata)?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index
.embedding_configs(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let embedding_configs = index.embedding_configs(&rtxn)?;
let documents = index
.all_documents(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// 3.1. Dump the documents
for ret in documents {
for ret in index.all_documents(&rtxn)? {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (id, doc) =
ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let (id, doc) = ret?;
let mut document =
milli::obkv_to_json(&all_fields, &fields_ids_map, doc)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
'inject_vectors: {
let embeddings = index
.embeddings(&rtxn, id)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let embeddings = index.embeddings(&rtxn, id)?;
if embeddings.is_empty() {
break 'inject_vectors;
@ -863,7 +845,7 @@ impl IndexScheduler {
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
let user_err = milli::Error::UserError(
return Err(milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
@ -877,9 +859,8 @@ impl IndexScheduler {
},
value: vectors.clone(),
},
);
return Err(Error::from_milli(user_err, Some(uid.to_string())));
)
.into());
};
for (embedder_name, embeddings) in embeddings {
@ -909,8 +890,7 @@ impl IndexScheduler {
index,
&rtxn,
meilisearch_types::settings::SecretPolicy::RevealSecrets,
)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
)?;
index_dumper.settings(&settings)?;
Ok(())
})?;
@ -966,8 +946,7 @@ impl IndexScheduler {
// the entire batch.
let res = || -> Result<()> {
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?;
let mut wtxn = self.env.write_txn()?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
wtxn.commit()?;
@ -1009,12 +988,10 @@ impl IndexScheduler {
);
builder.set_primary_key(primary_key);
let must_stop_processing = self.must_stop_processing.clone();
builder
.execute(
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
)?;
index_wtxn.commit()?;
}
@ -1031,8 +1008,7 @@ impl IndexScheduler {
let res = || -> Result<()> {
let mut wtxn = self.env.write_txn()?;
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
wtxn.commit()?;
Ok(())
@ -1055,9 +1031,7 @@ impl IndexScheduler {
let number_of_documents = || -> Result<u64> {
let index = self.index_mapper.index(&wtxn, &index_uid)?;
let index_rtxn = index.read_txn()?;
index
.number_of_documents(&index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))
Ok(index.number_of_documents(&index_rtxn)?)
}()
.unwrap_or_default();
@ -1214,10 +1188,8 @@ impl IndexScheduler {
};
match operation {
IndexOperation::DocumentClear { index_uid, mut tasks } => {
let count = milli::update::ClearDocuments::new(index_wtxn, index)
.execute()
.map_err(|e| Error::from_milli(e, Some(index_uid)))?;
IndexOperation::DocumentClear { mut tasks, .. } => {
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
let mut first_clear_found = false;
for task in &mut tasks {
@ -1237,7 +1209,7 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::DocumentOperation {
index_uid,
index_uid: _,
primary_key,
method,
operations,
@ -1263,17 +1235,13 @@ impl IndexScheduler {
let mut content_files_iter = content_files.iter();
let mut indexer = indexer::DocumentOperation::new(method);
let embedders = index
.embedding_configs(index_wtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
let embedders = index.embedding_configs(index_wtxn)?;
let embedders = self.embedders(embedders)?;
for operation in operations {
match operation {
DocumentOperation::Add(_content_uuid) => {
let mmap = content_files_iter.next().unwrap();
indexer
.add_documents(mmap)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
indexer.add_documents(mmap)?;
}
DocumentOperation::Delete(document_ids) => {
let document_ids: bumpalo::collections::vec::Vec<_> = document_ids
@ -1298,8 +1266,7 @@ impl IndexScheduler {
}
};
let (document_changes, operation_stats, primary_key) = indexer
.into_changes(
let (document_changes, operation_stats, primary_key) = indexer.into_changes(
&indexer_alloc,
index,
&rtxn,
@ -1307,8 +1274,7 @@ impl IndexScheduler {
&mut new_fields_ids_map,
&|| must_stop_processing.get(),
&send_progress,
)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
)?;
let mut addition = 0;
for (stats, task) in operation_stats.into_iter().zip(&mut tasks) {
@ -1355,15 +1321,14 @@ impl IndexScheduler {
embedders,
&|| must_stop_processing.get(),
&send_progress,
)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
)?;
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
}
Ok(tasks)
}
IndexOperation::DocumentEdition { index_uid, mut task } => {
IndexOperation::DocumentEdition { mut task, .. } => {
let (filter, code) = if let KindWithContent::DocumentEdition {
filter_expr,
context: _,
@ -1377,11 +1342,16 @@ impl IndexScheduler {
};
let candidates = match filter.as_ref().map(Filter::from_json) {
Some(Ok(Some(filter))) => filter
.evaluate(index_wtxn, index)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
Some(Ok(Some(filter))) => {
filter.evaluate(index_wtxn, index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
}
e => e.into(),
})?
}
None | Some(Ok(None)) => index.documents_ids(index_wtxn)?,
Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))),
Some(Err(e)) => return Err(e.into()),
};
let (original_filter, context, function) = if let Some(Details::DocumentEdition {
@ -1416,9 +1386,8 @@ impl IndexScheduler {
// candidates not empty => index not empty => a primary key is set
let primary_key = index.primary_key(&rtxn)?.unwrap();
let primary_key =
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(milli::Error::from)?;
let result_count = Ok((candidates.len(), candidates.len())) as Result<_>;
@ -1437,17 +1406,11 @@ impl IndexScheduler {
};
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
let document_changes = pool
.install(|| {
indexer
.into_changes(&primary_key)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))
})
.unwrap()?;
let embedders = index
.embedding_configs(index_wtxn)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
let document_changes =
pool.install(|| indexer.into_changes(&primary_key)).unwrap()?;
let embedders = index.embedding_configs(index_wtxn)?;
let embedders = self.embedders(embedders)?;
indexer::index(
index_wtxn,
@ -1461,8 +1424,7 @@ impl IndexScheduler {
embedders,
&|| must_stop_processing.get(),
&send_progress,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
)?;
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
}
@ -1493,7 +1455,7 @@ impl IndexScheduler {
Ok(vec![task])
}
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
IndexOperation::DocumentDeletion { mut tasks, index_uid: _ } => {
let mut to_delete = RoaringBitmap::new();
let external_documents_ids = index.external_documents_ids();
@ -1514,23 +1476,35 @@ impl IndexScheduler {
deleted_documents: Some(will_be_removed),
});
}
KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => {
KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
let before = to_delete.len();
let filter = match Filter::from_json(filter_expr) {
Ok(filter) => filter,
Err(err) => {
// theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens
task.status = Status::Failed;
task.error = Some(
Error::from_milli(err, Some(index_uid.clone())).into(),
);
task.error = match err {
milli::Error::UserError(
milli::UserError::InvalidFilterExpression { .. },
) => Some(
Error::from(err)
.with_custom_error_code(Code::InvalidDocumentFilter)
.into(),
),
e => Some(e.into()),
};
None
}
};
if let Some(filter) = filter {
let candidates = filter
.evaluate(index_wtxn, index)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())));
let candidates =
filter.evaluate(index_wtxn, index).map_err(|err| match err {
milli::Error::UserError(
milli::UserError::InvalidFilter(_),
) => Error::from(err)
.with_custom_error_code(Code::InvalidDocumentFilter),
e => e.into(),
});
match candidates {
Ok(candidates) => to_delete |= candidates,
Err(err) => {
@ -1566,9 +1540,8 @@ impl IndexScheduler {
// to_delete not empty => index not empty => primary key set
let primary_key = index.primary_key(&rtxn)?.unwrap();
let primary_key =
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(milli::Error::from)?;
if !tasks.iter().all(|res| res.error.is_some()) {
let local_pool;
@ -1587,10 +1560,8 @@ impl IndexScheduler {
let mut indexer = indexer::DocumentDeletion::new();
indexer.delete_documents_by_docids(to_delete);
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
let embedders = index
.embedding_configs(index_wtxn)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
let embedders = index.embedding_configs(index_wtxn)?;
let embedders = self.embedders(embedders)?;
indexer::index(
index_wtxn,
@ -1604,15 +1575,14 @@ impl IndexScheduler {
embedders,
&|| must_stop_processing.get(),
&send_progress,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
)?;
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
}
Ok(tasks)
}
IndexOperation::Settings { index_uid, settings, mut tasks } => {
IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
let indexer_config = self.index_mapper.indexer_config();
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
@ -1626,12 +1596,10 @@ impl IndexScheduler {
task.status = Status::Succeeded;
}
builder
.execute(
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
)?;
Ok(tasks)
}

View File

@ -1,12 +1,13 @@
use std::fmt::Display;
use crate::TaskId;
use meilisearch_types::batches::BatchId;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::tasks::{Kind, Status};
use meilisearch_types::{heed, milli};
use thiserror::Error;
use crate::TaskId;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DateField {
BeforeEnqueuedAt,
@ -121,11 +122,8 @@ pub enum Error {
Dump(#[from] dump::Error),
#[error(transparent)]
Heed(#[from] heed::Error),
#[error("{}", match .index_uid {
Some(uid) if !uid.is_empty() => format!("Index `{}`: {error}", uid),
_ => format!("{error}")
})]
Milli { error: milli::Error, index_uid: Option<String> },
#[error(transparent)]
Milli(#[from] milli::Error),
#[error("An unexpected crash occurred when processing the task.")]
ProcessBatchPanicked,
#[error(transparent)]
@ -192,7 +190,7 @@ impl Error {
| Error::AbortedTask
| Error::Dump(_)
| Error::Heed(_)
| Error::Milli { .. }
| Error::Milli(_)
| Error::ProcessBatchPanicked
| Error::FileStore(_)
| Error::IoError(_)
@ -211,20 +209,6 @@ impl Error {
pub fn with_custom_error_code(self, code: Code) -> Self {
Self::WithCustomErrorCode(code, Box::new(self))
}
pub fn from_milli(err: milli::Error, index_uid: Option<String>) -> Self {
match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
Self::Milli { error: err, index_uid }
.with_custom_error_code(Code::InvalidDocumentFilter)
}
milli::Error::UserError(milli::UserError::InvalidFilterExpression { .. }) => {
Self::Milli { error: err, index_uid }
.with_custom_error_code(Code::InvalidDocumentFilter)
}
_ => Self::Milli { error: err, index_uid },
}
}
}
impl ErrorCode for Error {
@ -252,7 +236,7 @@ impl ErrorCode for Error {
// TODO: not sure of the Code to use
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::Dump(e) => e.error_code(),
Error::Milli { error, .. } => error.error_code(),
Error::Milli(e) => e.error_code(),
Error::ProcessBatchPanicked => Code::Internal,
Error::Heed(e) => e.error_code(),
Error::HeedTransaction(e) => e.error_code(),

View File

@ -3,13 +3,14 @@ use std::path::Path;
use std::time::Duration;
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
use meilisearch_types::milli::{Index, Result};
use meilisearch_types::milli::Index;
use time::OffsetDateTime;
use uuid::Uuid;
use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing};
use crate::clamp_to_page_size;
use crate::lru::{InsertionOutcome, LruMap};
use crate::{clamp_to_page_size, Result};
/// Keep an internally consistent view of the open indexes in memory.
///
/// This view is made of an LRU cache that will evict the least frequently used indexes when new indexes are opened.

View File

@ -3,13 +3,8 @@ use std::sync::{Arc, RwLock};
use std::time::Duration;
use std::{fs, thread};
use self::index_map::IndexMap;
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
use crate::uuid_codec::UuidCodec;
use crate::{Error, Result};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize};
@ -17,6 +12,11 @@ use time::OffsetDateTime;
use tracing::error;
use uuid::Uuid;
use self::index_map::IndexMap;
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
use crate::uuid_codec::UuidCodec;
use crate::{Error, Result};
mod index_map;
const INDEX_MAPPING: &str = "index-mapping";
@ -121,7 +121,7 @@ impl IndexStats {
/// # Parameters
///
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
Ok(IndexStats {
number_of_documents: index.number_of_documents(rtxn)?,
database_size: index.on_disk_size()?,
@ -183,18 +183,13 @@ impl IndexMapper {
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
// This is very unlikely to happen in practice.
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
let index = self
.index_map
.write()
.unwrap()
.create(
let index = self.index_map.write().unwrap().create(
&uuid,
&index_path,
date,
self.enable_mdb_writemap,
self.index_base_map_size,
)
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
)?;
wtxn.commit()?;
@ -362,9 +357,7 @@ impl IndexMapper {
};
let index_path = self.base_path.join(uuid.to_string());
// take the lock to reopen the environment.
reopen
.reopen(&mut self.index_map.write().unwrap(), &index_path)
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)?;
continue;
}
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
@ -379,15 +372,13 @@ impl IndexMapper {
Missing => {
let index_path = self.base_path.join(uuid.to_string());
break index_map
.create(
break index_map.create(
&uuid,
&index_path,
None,
self.enable_mdb_writemap,
self.index_base_map_size,
)
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
)?;
}
Available(index) => break index,
Closing(_) => {
@ -469,7 +460,6 @@ impl IndexMapper {
let index = self.index(rtxn, index_uid)?;
let index_rtxn = index.read_txn()?;
IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))
}
}
}

View File

@ -1678,10 +1678,9 @@ impl IndexScheduler {
tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks.");
}
// If we have an abortion error we must stop the tick here and re-schedule tasks.
Err(Error::Milli {
error: milli::Error::InternalError(milli::InternalError::AbortedIndexation),
..
})
Err(Error::Milli(milli::Error::InternalError(
milli::InternalError::AbortedIndexation,
)))
| Err(Error::AbortedTask) => {
#[cfg(test)]
self.breakpoint(Breakpoint::AbortedIndexation);
@ -1700,10 +1699,9 @@ impl IndexScheduler {
// 2. close the associated environment
// 3. resize it
// 4. re-schedule tasks
Err(Error::Milli {
error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached),
..
}) if index_uid.is_some() => {
Err(Error::Milli(milli::Error::UserError(
milli::UserError::MaxDatabaseSizeReached,
))) if index_uid.is_some() => {
// fixme: add index_uid to match to avoid the unwrap
let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen
@ -1945,7 +1943,6 @@ impl IndexScheduler {
// TODO: consider using a type alias or a struct embedder/template
pub fn embedders(
&self,
index_uid: String,
embedding_configs: Vec<IndexEmbeddingConfig>,
) -> Result<EmbeddingConfigs> {
let res: Result<_> = embedding_configs
@ -1956,12 +1953,8 @@ impl IndexScheduler {
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
..
}| {
let prompt = Arc::new(
prompt
.try_into()
.map_err(meilisearch_types::milli::Error::from)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
let prompt =
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
// optimistically return existing embedder
{
let embedders = self.embedders.read().unwrap();
@ -1977,9 +1970,7 @@ impl IndexScheduler {
let embedder = Arc::new(
Embedder::new(embedder_options.clone())
.map_err(meilisearch_types::milli::vector::Error::from)
.map_err(|err| {
Error::from_milli(err.into(), Some(index_uid.clone()))
})?,
.map_err(meilisearch_types::milli::Error::from)?,
);
{
let mut embedders = self.embedders.write().unwrap();
@ -6180,7 +6171,7 @@ mod tests {
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
let simple_hf_name = name.clone();
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
let configs = index_scheduler.embedders(configs).unwrap();
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed =
hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap();

View File

@ -9,8 +9,8 @@ source: crates/index-scheduler/src/lib.rs
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
----------------------------------------------------------------------
### Status:

View File

@ -4,7 +4,6 @@ use byte_unit::{Byte, UnitType};
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
use meilisearch_types::milli;
use meilisearch_types::milli::OrderBy;
use serde_json::Value;
use tokio::task::JoinError;
@ -63,11 +62,8 @@ pub enum MeilisearchHttpError {
HeedError(#[from] meilisearch_types::heed::Error),
#[error(transparent)]
IndexScheduler(#[from] index_scheduler::Error),
#[error("{}", match .index_name {
Some(name) if !name.is_empty() => format!("Index `{}`: {error}", name),
_ => format!("{error}")
})]
Milli { error: milli::Error, index_name: Option<String> },
#[error(transparent)]
Milli(#[from] meilisearch_types::milli::Error),
#[error(transparent)]
Payload(#[from] PayloadError),
#[error(transparent)]
@ -80,12 +76,6 @@ pub enum MeilisearchHttpError {
MissingSearchHybrid,
}
impl MeilisearchHttpError {
pub(crate) fn from_milli(error: milli::Error, index_name: Option<String>) -> Self {
Self::Milli { error, index_name }
}
}
impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
@ -105,7 +95,7 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
MeilisearchHttpError::HeedError(_) => Code::Internal,
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
MeilisearchHttpError::Milli { error, .. } => error.error_code(),
MeilisearchHttpError::Milli(e) => e.error_code(),
MeilisearchHttpError::Payload(e) => e.error_code(),
MeilisearchHttpError::FileStore(_) => Code::Internal,
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),

View File

@ -395,7 +395,6 @@ fn import_dump(
for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
let uid = metadata.uid.clone();
tracing::info!("Importing index `{}`.", metadata.uid);
let date = Some((metadata.created_at, metadata.updated_at));
@ -433,7 +432,7 @@ fn import_dump(
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(uid, embedder_configs)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,

View File

@ -654,8 +654,9 @@ impl Opt {
#[derive(Debug, Default, Clone, Parser, Deserialize)]
pub struct IndexerOpts {
/// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch
/// uses no more than two thirds of available memory.
/// Specifies the maximum resident memory that Meilisearch can use for indexing.
/// By default, Meilisearch limits the RAM usage to 5% of the total available memory.
/// Note that the underlying store utilizes memory-mapping and makes use of the rest.
#[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
#[serde(default)]
pub max_indexing_memory: MaxMemory,
@ -714,7 +715,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
}
}
/// A type used to detect the max memory available and use 2/3 of it.
/// A type used to detect the max resident memory available and use 5% of it.
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
pub struct MaxMemory(Option<Byte>);
@ -728,7 +729,7 @@ impl FromStr for MaxMemory {
impl Default for MaxMemory {
fn default() -> MaxMemory {
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64))
MaxMemory(total_memory_bytes().map(|bytes| bytes * 5 / 100).map(Byte::from_u64))
}
}

View File

@ -185,8 +185,7 @@ pub async fn search(
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let search_kind =
search_kind(&search_query, &index_scheduler, index_uid.to_string(), &index, features)?;
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(

View File

@ -5,7 +5,7 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::{Error, IndexScheduler};
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
@ -107,10 +107,7 @@ pub async fn list_indexes(
if !filters.is_index_authorized(uid) {
return Ok(None);
}
Ok(Some(
IndexView::new(uid.to_string(), index)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
))
Ok(Some(IndexView::new(uid.to_string(), index)?))
})?;
// Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();

View File

@ -243,19 +243,11 @@ pub async fn search_with_url_query(
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid.to_string(),
&index,
query,
search_kind,
retrieve_vector,
index_scheduler.features(),
)
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
})
.await;
permit.drop().await;
@ -295,20 +287,12 @@ pub async fn search_with_post(
let features = index_scheduler.features();
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid.to_string(),
&index,
query,
search_kind,
retrieve_vectors,
index_scheduler.features(),
)
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
})
.await;
permit.drop().await;
@ -330,7 +314,6 @@ pub async fn search_with_post(
pub fn search_kind(
query: &SearchQuery,
index_scheduler: &IndexScheduler,
index_uid: String,
index: &milli::Index,
features: RoFeatures,
) -> Result<SearchKind, ResponseError> {
@ -349,7 +332,7 @@ pub fn search_kind(
(None, _, None) => Ok(SearchKind::KeywordOnly),
// hybrid.semantic_ratio == 1.0 => vector
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
}
// hybrid.semantic_ratio == 0.0 => keyword
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
@ -357,14 +340,13 @@ pub fn search_kind(
}
// no query, hybrid, vector => semantic
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len()))
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
}
// query, no hybrid, no vector => keyword
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
// query, hybrid, maybe vector => hybrid
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
index_scheduler,
index_uid,
index,
embedder,
**semantic_ratio,

View File

@ -103,13 +103,8 @@ async fn similar(
let index = index_scheduler.index(&index_uid)?;
let (embedder_name, embedder, quantized) = SearchKind::embedder(
&index_scheduler,
index_uid.to_string(),
&index,
&query.embedder,
None,
)?;
let (embedder_name, embedder, quantized) =
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
tokio::task::spawn_blocking(move || {
perform_similar(

View File

@ -125,28 +125,14 @@ pub async fn multi_search_with_post(
})
.with_index(query_index)?;
let index_uid_str = index_uid.to_string();
let search_kind = search_kind(
&query,
index_scheduler.get_ref(),
index_uid_str.clone(),
&index,
features,
)
let search_kind =
search_kind(&query, index_scheduler.get_ref(), &index, features)
.with_index(query_index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
.with_index(query_index)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid_str.clone(),
&index,
query,
search_kind,
retrieve_vector,
features,
)
perform_search(&index, query, search_kind, retrieve_vector, features)
})
.await
.with_index(query_index)?;

View File

@ -560,8 +560,7 @@ pub fn perform_federated_search(
// use an immediately invoked lambda to capture the result without returning from the function
let res: Result<(), ResponseError> = (|| {
let search_kind =
search_kind(&query, index_scheduler, index_uid.to_string(), &index, features)?;
let search_kind = search_kind(&query, index_scheduler, &index, features)?;
let canonicalization_kind = match (&search_kind, &query.q) {
(SearchKind::SemanticOnly { .. }, _) => {
@ -637,8 +636,7 @@ pub fn perform_federated_search(
search.offset(0);
search.limit(required_hit_count);
let (result, _semantic_hit_count) =
super::search_from_kind(index_uid.to_string(), search_kind, search)?;
let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
let format = AttributesFormat {
attributes_to_retrieve: query.attributes_to_retrieve,
retrieve_vectors,
@ -672,10 +670,7 @@ pub fn perform_federated_search(
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
let hit_maker =
HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| {
MeilisearchHttpError::from_milli(e, Some(index_uid.to_string()))
})?;
let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
results_by_query.push(SearchResultByQuery {
federation_options,

View File

@ -19,9 +19,7 @@ use meilisearch_types::locales::Locale;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder;
use meilisearch_types::milli::{
FacetValueHit, InternalError, OrderBy, SearchForFacetValues, TimeBudget,
};
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::{Language, TokenizerBuilder};
@ -283,38 +281,35 @@ pub enum SearchKind {
impl SearchKind {
pub(crate) fn semantic(
index_scheduler: &index_scheduler::IndexScheduler,
index_uid: String,
index: &Index,
embedder_name: &str,
vector_len: Option<usize>,
) -> Result<Self, ResponseError> {
let (embedder_name, embedder, quantized) =
Self::embedder(index_scheduler, index_uid, index, embedder_name, vector_len)?;
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
}
pub(crate) fn hybrid(
index_scheduler: &index_scheduler::IndexScheduler,
index_uid: String,
index: &Index,
embedder_name: &str,
semantic_ratio: f32,
vector_len: Option<usize>,
) -> Result<Self, ResponseError> {
let (embedder_name, embedder, quantized) =
Self::embedder(index_scheduler, index_uid, index, embedder_name, vector_len)?;
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
}
pub(crate) fn embedder(
index_scheduler: &index_scheduler::IndexScheduler,
index_uid: String,
index: &Index,
embedder_name: &str,
vector_len: Option<usize>,
) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(index_uid, embedder_configs)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let (embedder, _, quantized) = embedders
.get(embedder_name)
@ -895,7 +890,6 @@ fn prepare_search<'t>(
}
pub fn perform_search(
index_uid: String,
index: &Index,
query: SearchQuery,
search_kind: SearchKind,
@ -922,7 +916,7 @@ pub fn perform_search(
used_negative_operator,
},
semantic_hit_count,
) = search_from_kind(index_uid, search_kind, search)?;
) = search_from_kind(search_kind, search)?;
let SearchQuery {
q,
@ -1075,27 +1069,17 @@ fn compute_facet_distribution_stats<S: AsRef<str>>(
}
pub fn search_from_kind(
index_uid: String,
search_kind: SearchKind,
search: milli::Search<'_>,
) -> Result<(milli::SearchResult, Option<u32>), MeilisearchHttpError> {
let (milli_result, semantic_hit_count) = match &search_kind {
SearchKind::KeywordOnly => {
let results = search
.execute()
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
(results, None)
}
SearchKind::KeywordOnly => (search.execute()?, None),
SearchKind::SemanticOnly { .. } => {
let results = search
.execute()
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
let results = search.execute()?;
let semantic_hit_count = results.document_scores.len() as u32;
(results, Some(semantic_hit_count))
}
SearchKind::Hybrid { semantic_ratio, .. } => search
.execute_hybrid(*semantic_ratio)
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid)))?,
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
};
Ok((milli_result, semantic_hit_count))
}
@ -1197,7 +1181,7 @@ impl<'a> HitMaker<'a> {
rtxn: &'a RoTxn<'a>,
format: AttributesFormat,
mut formatter_builder: MatcherBuilder<'a>,
) -> milli::Result<Self> {
) -> Result<Self, MeilisearchHttpError> {
formatter_builder.crop_marker(format.crop_marker);
formatter_builder.highlight_prefix(format.highlight_pre_tag);
formatter_builder.highlight_suffix(format.highlight_post_tag);
@ -1292,7 +1276,11 @@ impl<'a> HitMaker<'a> {
})
}
pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
pub fn make_hit(
&self,
id: u32,
score: &[ScoreDetails],
) -> Result<SearchHit, MeilisearchHttpError> {
let (_, obkv) =
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
@ -1335,10 +1323,7 @@ impl<'a> HitMaker<'a> {
.is_some_and(|conf| conf.user_provided.contains(id));
let embeddings =
ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,
);
vectors.insert(name, serde_json::to_value(embeddings)?);
}
document.insert("_vectors".into(), vectors.into());
}
@ -1384,7 +1369,7 @@ fn make_hits<'a>(
format: AttributesFormat,
matching_words: milli::MatchingWords,
documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
) -> milli::Result<Vec<SearchHit>> {
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
let mut documents = Vec::new();
let dictionary = index.dictionary(rtxn)?;
@ -1712,12 +1697,12 @@ fn make_document(
displayed_attributes: &BTreeSet<FieldId>,
field_ids_map: &FieldsIdsMap,
obkv: &obkv::KvReaderU16,
) -> milli::Result<Document> {
) -> Result<Document, MeilisearchHttpError> {
let mut document = serde_json::Map::new();
// recreate the original json
for (key, value) in obkv.iter() {
let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
let value = serde_json::from_slice(value)?;
let key = field_ids_map.name(key).expect("Missing field name").to_string();
document.insert(key, value);
@ -1742,7 +1727,7 @@ fn format_fields(
displayable_ids: &BTreeSet<FieldId>,
locales: Option<&[Language]>,
localized_attributes: &[LocalizedAttributesRule],
) -> milli::Result<(Option<MatchesPosition>, Document)> {
) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
let mut matches_position = compute_matches.then(BTreeMap::new);
let mut document = document.clone();
@ -1920,7 +1905,7 @@ fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpEr
}
}
Filter::from_array(ands).map_err(|e| MeilisearchHttpError::from_milli(e, None))
Ok(Filter::from_array(ands)?)
}
#[cfg(test)]

View File

@ -1681,7 +1681,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
"message": "The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1719,7 +1719,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1757,7 +1757,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1795,7 +1795,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1833,7 +1833,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1871,7 +1871,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1909,7 +1909,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1947,7 +1947,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1985,7 +1985,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2023,7 +2023,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2061,7 +2061,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2099,7 +2099,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
"message": "The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2138,7 +2138,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
"message": "Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2175,7 +2175,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
"message": "Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2212,7 +2212,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
"message": "Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2279,7 +2279,7 @@ async fn add_invalid_geo_and_then_settings() {
]
},
"error": {
"message": "Index `test`: Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
"message": "Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"

View File

@ -604,7 +604,7 @@ async fn delete_document_by_filter() {
"originalFilter": "\"doggo = bernese\""
},
"error": {
"message": "Index `EMPTY_INDEX`: Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -636,7 +636,7 @@ async fn delete_document_by_filter() {
"originalFilter": "\"catto = jorts\""
},
"error": {
"message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
"message": "Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"

View File

@ -95,7 +95,7 @@ async fn error_update_existing_primary_key() {
let response = index.wait_task(2).await;
let expected_response = json!({
"message": "Index `test`: Index already has a primary key: `id`.",
"message": "Index already has a primary key: `id`.",
"code": "index_primary_key_already_exists",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_primary_key_already_exists"

View File

@ -711,7 +711,7 @@ async fn filter_invalid_attribute_array() {
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -733,7 +733,7 @@ async fn filter_invalid_attribute_string() {
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -940,7 +940,7 @@ async fn sort_unsortable_attribute() {
index.wait_task(response.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: Attribute `title` is not sortable. Available sortable attributes are: `id`.", index.uid),
"message": "Attribute `title` is not sortable. Available sortable attributes are: `id`.",
"code": "invalid_search_sort",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
@ -998,7 +998,7 @@ async fn sort_unset_ranking_rule() {
index.wait_task(response.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.", index.uid),
"message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.",
"code": "invalid_search_sort",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
@ -1024,18 +1024,19 @@ async fn search_on_unknown_field() {
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
index.wait_task(response.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
"code": "invalid_search_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
});
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
"code": "invalid_search_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
}
"###);
},
)
.await;
@ -1049,18 +1050,19 @@ async fn search_on_unknown_field_plus_joker() {
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
index.wait_task(response.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
"code": "invalid_search_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
});
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["*", "unknown"]}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
"code": "invalid_search_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
}
"###);
},
)
.await;
@ -1069,8 +1071,15 @@ async fn search_on_unknown_field_plus_joker() {
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown", "*"]}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
"code": "invalid_search_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
}
"###);
},
)
.await;
@ -1083,44 +1092,47 @@ async fn distinct_at_search_time() {
let (task, _) = index.create(None).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", index.uid),
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
});
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
assert_eq!(response, expected_response);
assert_eq!(code, 400);
}
"###);
let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", index.uid),
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
});
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
assert_eq!(response, expected_response);
assert_eq!(code, 400);
}
"###);
let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", index.uid),
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
});
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
assert_eq!(response, expected_response);
assert_eq!(code, 400);
}
"###);
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await;

View File

@ -1070,7 +1070,7 @@ async fn federation_one_query_error() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Inside `.queries[1]`: Index `nested`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
"message": "Inside `.queries[1]`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -1102,7 +1102,7 @@ async fn federation_one_query_sort_error() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
"message": "Inside `.queries[1]`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
"code": "invalid_search_sort",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
@ -1166,7 +1166,7 @@ async fn federation_multiple_query_errors() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Inside `.queries[0]`: Index `test`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
"message": "Inside `.queries[0]`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -1198,7 +1198,7 @@ async fn federation_multiple_query_sort_errors() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Inside `.queries[0]`: Index `test`: Attribute `title` is not sortable. This index does not have configured sortable attributes.",
"message": "Inside `.queries[0]`: Attribute `title` is not sortable. This index does not have configured sortable attributes.",
"code": "invalid_search_sort",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"
@ -1231,7 +1231,7 @@ async fn federation_multiple_query_errors_interleaved() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not filterable. This index does not have configured filterable attributes.\n1:7 doggos IN [intel, kefir]",
"message": "Inside `.queries[1]`: Attribute `doggos` is not filterable. This index does not have configured filterable attributes.\n1:7 doggos IN [intel, kefir]",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -1264,7 +1264,7 @@ async fn federation_multiple_query_sort_errors_interleaved() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
"message": "Inside `.queries[1]`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
"code": "invalid_search_sort",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"

View File

@ -448,7 +448,7 @@ async fn test_summarized_delete_documents_by_filter() {
"originalFilter": "\"doggo = bernese\""
},
"error": {
"message": "Index `test`: Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"

View File

@ -318,7 +318,7 @@ async fn try_to_disable_binary_quantization() {
}
},
"error": {
"message": "Index `doggo`: `.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
"message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"

View File

@ -250,7 +250,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
"message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -280,7 +280,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
"message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -311,7 +311,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Could not parse `._vectors.manual.regenerate`: invalid type: string \"yes please\", expected a boolean at line 1 column 26",
"message": "Bad embedder configuration in the document with id: `0`. Could not parse `._vectors.manual.regenerate`: invalid type: string \"yes please\", expected a boolean at line 1 column 26",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -340,7 +340,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings`: expected null or an array, but found a boolean: `true`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings`: expected null or an array, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -369,7 +369,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -398,7 +398,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -440,7 +440,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -469,7 +469,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -498,7 +498,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -539,7 +539,7 @@ async fn user_provided_vectors_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `40` and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
"message": "While embedding documents for embedder `manual`: no vectors provided for document `40` and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -569,7 +569,7 @@ async fn user_provided_vectors_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
"message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -599,7 +599,7 @@ async fn user_provided_vectors_error() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
"message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"

View File

@ -713,7 +713,7 @@ async fn bad_api_key() {
}
},
"error": {
"message": "Index `doggo`: While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
"message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -757,7 +757,7 @@ async fn bad_api_key() {
}
},
"error": {
"message": "Index `doggo`: While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
"message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"

View File

@ -985,7 +985,7 @@ async fn bad_settings() {
}
},
"error": {
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting a single \"{{embedding}}\", expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting a single \"{{embedding}}\", expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1025,7 +1025,7 @@ async fn bad_settings() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`",
"message": "While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1178,7 +1178,7 @@ async fn server_returns_bad_request() {
}
},
"error": {
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"test\\\", expected struct MultipleRequest at line 1 column 6\"}`",
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"test\\\", expected struct MultipleRequest at line 1 column 6\"}`",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1247,7 +1247,7 @@ async fn server_returns_bad_request() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
"message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1306,7 +1306,7 @@ async fn server_returns_bad_response() {
}
},
"error": {
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting the array of \"{{embedding}}\"s, configuration expects `response` to be an array with at least 1 item(s) but server sent an object with 1 field(s)",
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting the array of \"{{embedding}}\"s, configuration expects `response` to be an array with at least 1 item(s) but server sent an object with 1 field(s)",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1362,7 +1362,7 @@ async fn server_returns_bad_response() {
}
},
"error": {
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting item #0 from the array of \"{{embedding}}\"s, expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting item #0 from the array of \"{{embedding}}\"s, expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1414,7 +1414,7 @@ async fn server_returns_bad_response() {
}
},
"error": {
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output`, while extracting a single \"{{embedding}}\", expected `output` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected f32",
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output`, while extracting a single \"{{embedding}}\", expected `output` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected f32",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1478,7 +1478,7 @@ async fn server_returns_bad_response() {
}
},
"error": {
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.embedding`, while extracting item #0 from the array of \"{{embedding}}\"s, configuration expects `embedding` to be an object with key `data` but server sent an array of size 3",
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.embedding`, while extracting item #0 from the array of \"{{embedding}}\"s, configuration expects `embedding` to be an object with key `data` but server sent an array of size 3",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1542,7 +1542,7 @@ async fn server_returns_bad_response() {
}
},
"error": {
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output[0]`, while extracting a single \"{{embedding}}\", configuration expects key \"embeddings\", which is missing in response\n - Hint: item #0 inside `output` has key `embedding`, did you mean `response.output[0].embedding` in embedder configuration?",
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output[0]`, while extracting a single \"{{embedding}}\", configuration expects key \"embeddings\", which is missing in response\n - Hint: item #0 inside `output` has key `embedding`, did you mean `response.output[0].embedding` in embedder configuration?",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1908,7 +1908,7 @@ async fn server_custom_header() {
}
},
"error": {
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1951,7 +1951,7 @@ async fn server_custom_header() {
}
},
"error": {
"message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -2099,7 +2099,7 @@ async fn searchable_reindex() {
]
},
"error": {
"message": "Index `doggo`: While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
"message": "While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"

View File

@ -21,15 +21,11 @@ use super::ref_cell_ext::RefCellExt;
use super::thread_local::{FullySend, ThreadLocal};
use super::StdResult;
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
use crate::heed_codec::StrBEU16Codec;
use crate::index::db_name;
use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
use crate::update::new::KvReaderFieldId;
use crate::vector::Embedding;
use crate::{
CboRoaringBitmapCodec, DocumentId, Error, FieldIdWordCountCodec, Index, InternalError,
U8StrStrCodec,
};
use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
/// Creates a tuple of senders/receiver to be used by
/// the extractors and the writer loop.
@ -407,32 +403,6 @@ impl Database {
Database::FieldIdDocidFacetF64s => db_name::FIELD_ID_DOCID_FACET_F64S,
}
}
pub fn stringify_key(&self, key: &[u8]) -> String {
use heed::types::*;
match self {
Database::WordDocids => format!("{:?}", Str::bytes_decode(key).unwrap()),
Database::WordFidDocids => format!("{:?}", StrBEU16Codec::bytes_decode(key).unwrap()),
Database::WordPositionDocids => {
format!("{:?}", StrBEU16Codec::bytes_decode(key).unwrap())
}
Database::WordPairProximityDocids => {
format!("{:?}", U8StrStrCodec::bytes_decode(key).unwrap())
}
Database::ExactWordDocids => format!("{:?}", Str::bytes_decode(key).unwrap()),
Database::FidWordCountDocids => {
format!("{:?}", FieldIdWordCountCodec::bytes_decode(key).unwrap())
}
Database::FieldIdDocidFacetStrings => {
format!("{:?}", FieldDocIdFacetStringCodec::bytes_decode(key).unwrap())
}
Database::FieldIdDocidFacetF64s => {
format!("{:?}", FieldDocIdFacetF64Codec::bytes_decode(key).unwrap())
}
d => unimplemented!("stringify_key for {:?}", d),
}
}
}
impl From<FacetKind> for Database {

View File

@ -1,5 +1,6 @@
use std::collections::{BTreeMap, BTreeSet};
use either::Either;
use heed::RoTxn;
use raw_collections::RawMap;
use serde_json::value::RawValue;
@ -209,11 +210,13 @@ impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
for MergedDocument<'d, 'doc, 't, Mapper>
{
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'d str, &'d RawValue)>> {
match &self.db {
Some(db) => {
let mut new_doc_it = self.new_doc.iter_top_level_fields();
let mut db_it = self.db.iter().flat_map(|db| db.iter_top_level_fields());
let mut db_it = db.iter_top_level_fields();
let mut seen_fields = BTreeSet::new();
std::iter::from_fn(move || {
Either::Left(std::iter::from_fn(move || {
if let Some(next) = new_doc_it.next() {
if let Ok((name, _)) = next {
seen_fields.insert(name);
@ -231,7 +234,10 @@ impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
Err(err) => return Some(Err(err)),
}
}
})
}))
}
None => Either::Right(self.new_doc.iter_top_level_fields()),
}
}
fn vectors_field(&self) -> Result<Option<&'d RawValue>> {

View File

@ -28,7 +28,7 @@ pub struct WordDocidsBalancedCaches<'extractor> {
exact_word_docids: BalancedCaches<'extractor>,
word_position_docids: BalancedCaches<'extractor>,
fid_word_count_docids: BalancedCaches<'extractor>,
fid_word_count: HashMap<FieldId, (Option<usize>, Option<usize>)>,
fid_word_count: HashMap<FieldId, (usize, usize)>,
current_docid: Option<DocumentId>,
}
@ -85,8 +85,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
self.fid_word_count
.entry(field_id)
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
.or_insert((None, Some(1)));
.and_modify(|(_current_count, new_count)| *new_count += 1)
.or_insert((0, 1));
self.current_docid = Some(docid);
Ok(())
@ -130,8 +130,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
self.fid_word_count
.entry(field_id)
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
.or_insert((Some(1), None));
.and_modify(|(current_count, _new_count)| *current_count += 1)
.or_insert((1, 0));
self.current_docid = Some(docid);
@ -141,18 +141,14 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
fn flush_fid_word_count(&mut self, buffer: &mut BumpVec<u8>) -> Result<()> {
for (fid, (current_count, new_count)) in self.fid_word_count.drain() {
if current_count != new_count {
if let Some(current_count) =
current_count.filter(|current_count| *current_count <= MAX_COUNTED_WORDS)
{
if current_count <= MAX_COUNTED_WORDS {
buffer.clear();
buffer.extend_from_slice(&fid.to_be_bytes());
buffer.push(current_count as u8);
self.fid_word_count_docids
.insert_del_u32(buffer, self.current_docid.unwrap())?;
}
if let Some(new_count) =
new_count.filter(|new_count| *new_count <= MAX_COUNTED_WORDS)
{
if new_count <= MAX_COUNTED_WORDS {
buffer.clear();
buffer.extend_from_slice(&fid.to_be_bytes());
buffer.push(new_count as u8);

View File

@ -80,15 +80,6 @@ where
let mut bbbuffers = Vec::new();
let finished_extraction = AtomicBool::new(false);
// We reduce the actual memory used to 5%. The reason we do this here and not in Meilisearch
// is because we still use the old indexer for the settings and it is highly impacted by the
// max memory. So we keep the changes here and will remove these changes once we use the new
// indexer to also index settings. Related to #5125 and #5141.
let grenad_parameters = GrenadParameters {
max_memory: grenad_parameters.max_memory.map(|mm| mm * 5 / 100),
..grenad_parameters
};
// We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
let minimum_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(

View File

@ -80,35 +80,16 @@ where
}
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
let current = database.get(&rtxn, key)?;
if let (Some(del), Some(current)) = (&del, &current) {
let current = CboRoaringBitmapCodec::deserialize_from(current).unwrap();
let diff = del - &current;
let external_ids = index.external_id_of(&rtxn, &diff).unwrap().into_iter().map(|id| id.unwrap()).collect::<Vec<_>>();
if !del.is_subset(&current) {
eprintln!(
"======================== {:?}: {} -> c: {:?} d: {:?} a: {:?} extra: {:?} extra_external_ids: {:?}",
D::DATABASE,
D::DATABASE.stringify_key(key),
&current,
del,
add,
diff,
external_ids
);
}
}
match merge_cbo_bitmaps(current, del, add) {
Ok(Operation::Write(bitmap)) => {
match merge_cbo_bitmaps(current, del, add)? {
Operation::Write(bitmap) => {
docids_sender.write(key, &bitmap)?;
Ok(())
}
Ok(Operation::Delete) => {
Operation::Delete => {
docids_sender.delete(key)?;
Ok(())
}
Ok(Operation::Ignore) => Ok(()),
Err(e) => Err(e),
Operation::Ignore => Ok(()),
}
})
})
@ -253,47 +234,18 @@ fn merge_cbo_bitmaps(
(None, Some(_del), Some(add)) => Ok(Operation::Write(add)),
(Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
(Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
(Some(current), Some(mut del), add) => {
debug_assert!(
del.is_subset(&current),
"del is not a subset of current, which must be impossible."
);
(Some(current), Some(del), add) => {
let output = match add {
Some(add) => {
del -= &add;
if del.is_empty() {
if add.is_subset(&current) {
// no changes, no allocation
None
} else {
// addition
Some(current | add)
}
} else {
if add.is_subset(&current) {
// deletion only, no union
Some(current - del)
} else {
// deletion and addition
Some((current - del) | add)
}
}
}
// deletion only, no union
None => Some(current - del),
Some(add) => (&current - del) | add,
None => &current - del,
};
match output {
Some(output) => {
if output.is_empty() {
Ok(Operation::Delete)
} else if current == output {
Ok(Operation::Ignore)
} else {
Ok(Operation::Write(output))
}
}
None => Ok(Operation::Ignore),
}
}
}
}

View File

@ -139,7 +139,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
rt.block_on(async {
dashboard_client.send_machine_info(&env).await?;
let commit_message = build_info.commit_msg.unwrap_or_default().split('\n').next().unwrap();
let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap();
let max_workloads = args.workload_file.len();
let reason: Option<&str> = args.reason.as_deref();
let invocation_uuid = dashboard_client.create_invocation(build_info.clone(), commit_message, env, max_workloads, reason).await?;