Compare commits

..

1 Commits

Author SHA1 Message Date
YoEight
6ce8a5726a Fix empty index crashing when searching attributes 2025-12-18 08:26:59 -05:00
13 changed files with 103 additions and 62 deletions

View File

@@ -15,7 +15,7 @@ env:
jobs: jobs:
test-linux: test-linux:
name: Tests on ${{ matrix.runner }} ${{ matrix.features }} name: Tests on Ubuntu
runs-on: ${{ matrix.runner }} runs-on: ${{ matrix.runner }}
strategy: strategy:
matrix: matrix:

4
Cargo.lock generated
View File

@@ -2698,9 +2698,9 @@ dependencies = [
[[package]] [[package]]
name = "hannoy" name = "hannoy"
version = "0.1.2-nested-rtxns" version = "0.1.0-nested-rtxns"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "533c952127a7e73448f26af313ac7b98012516561e48e953781cd6b30e573436" checksum = "be82bf3f2108ddc8885e3d306fcd7f4692066bfe26065ca8b42ba417f3c26dd1"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder", "byteorder",

View File

@@ -502,11 +502,13 @@ impl Queue {
*before_finished_at, *before_finished_at,
)?; )?;
batches = if query.reverse.unwrap_or_default() { if let Some(limit) = limit {
batches.into_iter().take(*limit).collect() batches = if query.reverse.unwrap_or_default() {
} else { batches.into_iter().take(*limit as usize).collect()
batches.into_iter().rev().take(*limit).collect() } else {
}; batches.into_iter().rev().take(*limit as usize).collect()
};
}
Ok(batches) Ok(batches)
} }
@@ -600,8 +602,11 @@ impl Queue {
Box::new(batches.into_iter().rev()) as Box<dyn Iterator<Item = u32>> Box::new(batches.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
}; };
let batches = let batches = self.batches.get_existing_batches(
self.batches.get_existing_batches(rtxn, batches.take(query.limit), processing)?; rtxn,
batches.take(query.limit.unwrap_or(u32::MAX) as usize),
processing,
)?;
Ok((batches, total)) Ok((batches, total))
} }

View File

@@ -31,21 +31,21 @@ fn query_batches_from_and_limit() {
let proc = index_scheduler.processing_tasks.read().unwrap().clone(); let proc = index_scheduler.processing_tasks.read().unwrap().clone();
let rtxn = index_scheduler.env.read_txn().unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap();
let query = Query { limit: 0, ..Default::default() }; let query = Query { limit: Some(0), ..Default::default() };
let (batches, _) = index_scheduler let (batches, _) = index_scheduler
.queue .queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap(); .unwrap();
snapshot!(snapshot_bitmap(&batches), @"[]"); snapshot!(snapshot_bitmap(&batches), @"[]");
let query = Query { limit: 1, ..Default::default() }; let query = Query { limit: Some(1), ..Default::default() };
let (batches, _) = index_scheduler let (batches, _) = index_scheduler
.queue .queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap(); .unwrap();
snapshot!(snapshot_bitmap(&batches), @"[2,]"); snapshot!(snapshot_bitmap(&batches), @"[2,]");
let query = Query { limit: 2, ..Default::default() }; let query = Query { limit: Some(2), ..Default::default() };
let (batches, _) = index_scheduler let (batches, _) = index_scheduler
.queue .queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
@@ -66,14 +66,14 @@ fn query_batches_from_and_limit() {
.unwrap(); .unwrap();
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]"); snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]");
let query = Query { from: Some(1), limit: 1, ..Default::default() }; let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
let (batches, _) = index_scheduler let (batches, _) = index_scheduler
.queue .queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap(); .unwrap();
snapshot!(snapshot_bitmap(&batches), @"[1,]"); snapshot!(snapshot_bitmap(&batches), @"[1,]");
let query = Query { from: Some(1), limit: 2, ..Default::default() }; let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
let (batches, _) = index_scheduler let (batches, _) = index_scheduler
.queue .queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)

View File

@@ -32,9 +32,6 @@ use crate::{Error, IndexSchedulerOptions, Result, TaskId};
/// The number of database used by queue itself /// The number of database used by queue itself
const NUMBER_OF_DATABASES: u32 = 1; const NUMBER_OF_DATABASES: u32 = 1;
/// The default limit for pagination
const DEFAULT_LIMIT: usize = 20;
/// Database const names for the `IndexScheduler`. /// Database const names for the `IndexScheduler`.
mod db_name { mod db_name {
pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping"; pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping";
@@ -44,11 +41,11 @@ mod db_name {
/// ///
/// An empty/default query (where each field is set to `None`) matches all tasks. /// An empty/default query (where each field is set to `None`) matches all tasks.
/// Each non-null field restricts the set of tasks further. /// Each non-null field restricts the set of tasks further.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct Query { pub struct Query {
/// The maximum number of tasks to be matched. Defaults to 20. /// The maximum number of tasks to be matched
pub limit: usize, pub limit: Option<u32>,
/// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched. Defaults to 0. /// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched
pub from: Option<u32>, pub from: Option<u32>,
/// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`. /// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`.
pub reverse: Option<bool>, pub reverse: Option<bool>,
@@ -87,29 +84,32 @@ pub struct Query {
pub after_finished_at: Option<OffsetDateTime>, pub after_finished_at: Option<OffsetDateTime>,
} }
impl Default for Query {
fn default() -> Self {
Self {
limit: DEFAULT_LIMIT,
from: Default::default(),
reverse: Default::default(),
uids: Default::default(),
batch_uids: Default::default(),
statuses: Default::default(),
types: Default::default(),
index_uids: Default::default(),
canceled_by: Default::default(),
before_enqueued_at: Default::default(),
after_enqueued_at: Default::default(),
before_started_at: Default::default(),
after_started_at: Default::default(),
before_finished_at: Default::default(),
after_finished_at: Default::default(),
}
}
}
impl Query { impl Query {
/// Return `true` if every field of the query is set to `None`, such that the query
/// matches all tasks.
pub fn is_empty(&self) -> bool {
matches!(
self,
Query {
limit: None,
from: None,
reverse: None,
uids: None,
batch_uids: None,
statuses: None,
types: None,
index_uids: None,
canceled_by: None,
before_enqueued_at: None,
after_enqueued_at: None,
before_started_at: None,
after_started_at: None,
before_finished_at: None,
after_finished_at: None,
}
)
}
/// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes. /// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes.
pub fn with_index(self, index_uid: String) -> Self { pub fn with_index(self, index_uid: String) -> Self {
let mut index_vec = self.index_uids.unwrap_or_default(); let mut index_vec = self.index_uids.unwrap_or_default();
@@ -120,7 +120,7 @@ impl Query {
// Removes the `from` and `limit` restrictions from the query. // Removes the `from` and `limit` restrictions from the query.
// Useful to get the total number of tasks matching a filter. // Useful to get the total number of tasks matching a filter.
pub fn without_limits(self) -> Self { pub fn without_limits(self) -> Self {
Query { limit: usize::MAX, from: None, ..self } Query { limit: None, from: None, ..self }
} }
} }

View File

@@ -487,11 +487,13 @@ impl Queue {
*before_finished_at, *before_finished_at,
)?; )?;
tasks = if query.reverse.unwrap_or_default() { if let Some(limit) = limit {
tasks.into_iter().take(*limit).collect() tasks = if query.reverse.unwrap_or_default() {
} else { tasks.into_iter().take(*limit as usize).collect()
tasks.into_iter().rev().take(*limit).collect() } else {
}; tasks.into_iter().rev().take(*limit as usize).collect()
};
}
Ok(tasks) Ok(tasks)
} }
@@ -549,7 +551,9 @@ impl Queue {
} else { } else {
Box::new(tasks.into_iter().rev()) as Box<dyn Iterator<Item = u32>> Box::new(tasks.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
}; };
let tasks = self.tasks.get_existing_tasks(rtxn, tasks.take(query.limit))?; let tasks = self
.tasks
.get_existing_tasks(rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?;
let ProcessingTasks { batch, processing, progress: _ } = processing_tasks; let ProcessingTasks { batch, processing, progress: _ } = processing_tasks;

View File

@@ -28,21 +28,21 @@ fn query_tasks_from_and_limit() {
let rtxn = index_scheduler.env.read_txn().unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap();
let processing = index_scheduler.processing_tasks.read().unwrap(); let processing = index_scheduler.processing_tasks.read().unwrap();
let query = Query { limit: 0, ..Default::default() }; let query = Query { limit: Some(0), ..Default::default() };
let (tasks, _) = index_scheduler let (tasks, _) = index_scheduler
.queue .queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
.unwrap(); .unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[]"); snapshot!(snapshot_bitmap(&tasks), @"[]");
let query = Query { limit: 1, ..Default::default() }; let query = Query { limit: Some(1), ..Default::default() };
let (tasks, _) = index_scheduler let (tasks, _) = index_scheduler
.queue .queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
.unwrap(); .unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[2,]"); snapshot!(snapshot_bitmap(&tasks), @"[2,]");
let query = Query { limit: 2, ..Default::default() }; let query = Query { limit: Some(2), ..Default::default() };
let (tasks, _) = index_scheduler let (tasks, _) = index_scheduler
.queue .queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
@@ -63,14 +63,14 @@ fn query_tasks_from_and_limit() {
.unwrap(); .unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");
let query = Query { from: Some(1), limit: 1, ..Default::default() }; let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
let (tasks, _) = index_scheduler let (tasks, _) = index_scheduler
.queue .queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
.unwrap(); .unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[1,]"); snapshot!(snapshot_bitmap(&tasks), @"[1,]");
let query = Query { from: Some(1), limit: 2, ..Default::default() }; let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
let (tasks, _) = index_scheduler let (tasks, _) = index_scheduler
.queue .queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)

View File

@@ -185,7 +185,7 @@ pub async fn get_metrics(
// Fetch the finished batches... // Fetch the finished batches...
&Query { &Query {
statuses: Some(vec![Status::Succeeded, Status::Failed]), statuses: Some(vec![Status::Succeeded, Status::Failed]),
limit: 1, limit: Some(1),
..Query::default() ..Query::default()
}, },
auth_filters, auth_filters,
@@ -214,7 +214,7 @@ pub async fn get_metrics(
let task_queue_latency_seconds = index_scheduler let task_queue_latency_seconds = index_scheduler
.get_tasks_from_authorized_indexes( .get_tasks_from_authorized_indexes(
&Query { &Query {
limit: 1, limit: Some(1),
reverse: Some(true), reverse: Some(true),
statuses: Some(vec![Status::Enqueued, Status::Processing]), statuses: Some(vec![Status::Enqueued, Status::Processing]),
..Query::default() ..Query::default()

View File

@@ -126,7 +126,7 @@ pub struct TasksFilterQuery {
impl TasksFilterQuery { impl TasksFilterQuery {
pub(crate) fn into_query(self) -> Query { pub(crate) fn into_query(self) -> Query {
Query { Query {
limit: self.limit.0 as usize, limit: Some(self.limit.0),
from: self.from.as_deref().copied(), from: self.from.as_deref().copied(),
reverse: self.reverse.as_deref().copied(), reverse: self.reverse.as_deref().copied(),
batch_uids: self.batch_uids.merge_star_and_none(), batch_uids: self.batch_uids.merge_star_and_none(),
@@ -225,8 +225,7 @@ pub struct TaskDeletionOrCancelationQuery {
impl TaskDeletionOrCancelationQuery { impl TaskDeletionOrCancelationQuery {
fn into_query(self) -> Query { fn into_query(self) -> Query {
Query { Query {
// We want to delete all tasks that match the given filters limit: None,
limit: usize::MAX,
from: None, from: None,
reverse: None, reverse: None,
batch_uids: self.batch_uids.merge_star_and_none(), batch_uids: self.batch_uids.merge_star_and_none(),

View File

@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
"sync", "sync",
] } ] }
arroy = "0.6.4-nested-rtxns" arroy = "0.6.4-nested-rtxns"
hannoy = { version = "0.1.2-nested-rtxns", features = ["arroy"] } hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.41" tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] } ureq = { version = "2.12.1", features = ["json"] }

View File

@@ -178,6 +178,12 @@ impl<'ctx> SearchContext<'ctx> {
None if user_defined_searchable.is_none() => continue, None if user_defined_searchable.is_none() => continue,
// The field is not searchable => User error // The field is not searchable => User error
None => { None => {
if let Some(defined_searchable) = &user_defined_searchable {
if defined_searchable.iter().any(|s| s == field_name) {
continue;
}
}
let (valid_fields, hidden_fields) = self.index.remove_hidden_fields( let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
self.txn, self.txn,
searchable_fields_weights.iter().map(|(name, _, _)| name), searchable_fields_weights.iter().map(|(name, _, _)| name),

View File

@@ -0,0 +1,26 @@
use crate::index::tests::TempIndex;
use crate::Search;
fn create_empty_index() -> TempIndex {
let index = TempIndex::new();
index.update_settings(|s| {
s.set_primary_key("id".to_string());
s.set_searchable_fields(vec!["name".to_string(), "title".to_string()]);
}).unwrap();
index
}
#[test]
fn test_attribute_search_on_empty_index() {
let index = create_empty_index();
let txn = index.read_txn().unwrap();
let mut search = Search::new(&txn, &index);
let attrs= ["title".to_string()];
search.searchable_attributes(&attrs);
search.query("doc");
search.execute().unwrap();
}

View File

@@ -16,6 +16,7 @@ pub mod stop_words;
pub mod typo; pub mod typo;
pub mod typo_proximity; pub mod typo_proximity;
pub mod words_tms; pub mod words_tms;
mod attribute_update;
fn collect_field_values( fn collect_field_values(
index: &crate::Index, index: &crate::Index,