Compare commits

...

18 Commits

Author SHA1 Message Date
1cf14e765f Change implementation of MergedDocuments::iter_top_level_fields 2024-12-09 09:38:21 +01:00
4a082683df Merge #5131
Some checks failed
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 21s
Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Run Rustfmt (push) Successful in 1m25s
Test suite / Run Clippy (push) Successful in 5m54s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5131: Ignore documents whose selected fields didn't change r=dureuill a=dureuill

Attempts to improve the new indexer performance by ignoring documents whose selected fields didn't change:

- Add `Update::has_changed_for_fields` function
- Ignore documents whose searchable attributes didn't change for word docids and word pair proximity extraction
- Ignore documents whose faceted attributes didn't change for facet extraction

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-05 16:04:16 +00:00
26be5e0733 Merge #5123
5123: Fix batch details r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5079
Fixes https://github.com/meilisearch/meilisearch/issues/5112

## What does this PR do?
- Make the processing tasks actually processing in the stats of the batch instead of enqueued
- Stop counting one extra task for all non-prioritized batches in the stats
- Add a test

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-05 15:21:55 +00:00
bd5110a2fe Fix clippy warnings 2024-12-05 16:13:07 +01:00
fa8b9acdf6 Ignore documents that didn't change in facets 2024-12-05 16:12:52 +01:00
2b74d1824b Ignore documents that didn't change any field in word pair proximity 2024-12-05 15:56:22 +01:00
c77b00d3ac Don't extract word docids when no searchable changed 2024-12-05 15:51:58 +01:00
c77073efcc Update::has_changed_for_fields 2024-12-05 15:50:12 +01:00
1537323eb9 Merge #5119
5119: Settings opt out error msg r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
PRD: https://meilisearch.notion.site/API-usage-Settings-to-opt-out-indexing-features-fff4b06b651f8108ade3f858aeb16b14?pvs=4
## What does this PR do?

Add a new error code and message when the user tries a facet search on an index where the facet search is disabled:
```json
{
  "message": "The facet search is disabled for this index",
  "code": "facet_search_disabled",
  "type": "invalid_request",
  "link": "https://docs.meilisearch.com/errors#invalid_facet_search_disabled"
}
 ```


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-12-05 13:51:11 +00:00
a0a3b55700 Change error code 2024-12-05 14:48:29 +01:00
214b51de87 try to fix the snapshot on demand flaky test 2024-12-05 14:45:54 +01:00
95975944d7 fix the dumps missing the empty swap index tasks 2024-12-05 14:23:38 +01:00
7a2af06b1e update the impacted snapshots 2024-12-04 15:52:24 +01:00
cb0c3a5aad stop adding one enqueued tasks to all unprioritized batches 2024-12-04 15:48:28 +01:00
cbcf6c9ba3 make the processing tasks as processing in a batch 2024-12-04 14:48:48 +01:00
bf742d81cf add a test 2024-12-04 14:47:02 +01:00
fc1df5793c fix tests 2024-12-04 14:35:20 +01:00
953a82ca04 Add new error message 2024-12-04 11:15:29 +01:00
28 changed files with 224 additions and 48 deletions

View File

@ -497,7 +497,6 @@ impl IndexScheduler {
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
let mut task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
current_batch.processing(Some(&mut task));
// If the task is not associated with any index, verify that it is an index swap and
// create the batch directly. Otherwise, get the index name associated with the task
@ -507,6 +506,7 @@ impl IndexScheduler {
index_name
} else {
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
current_batch.processing(Some(&mut task));
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
};

View File

@ -4319,10 +4319,35 @@ mod tests {
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default())
let (mut batches, _) = index_scheduler
.get_batches_from_authorized_indexes(query.clone(), &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&batches), @"[0,]"); // only the processing batch in the first tick
assert_eq!(batches.len(), 1);
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
snapshot!(batch, @r#"
{
"uid": 0,
"details": {
"primaryKey": "mouse"
},
"stats": {
"totalNbTasks": 1,
"status": {
"processing": 1
},
"types": {
"indexCreation": 1
},
"indexUids": {
"catto": 1
}
},
"startedAt": "1970-01-01T00:00:00Z",
"finishedAt": null
}
"#);
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
let (batches, _) = index_scheduler

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(1):
[1,]
{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(1):
[1,]
{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"dumpCreation":1},"indexUids":{}}, }
{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"dumpCreation":1},"indexUids":{}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[0,]
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(1):
[1,]
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"doggo":2}}, }
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}

View File

@ -5,7 +5,7 @@ snapshot_kind: text
### Autobatching Enabled = true
### Processing batch Some(0):
[3,]
{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"taskDeletion":1},"indexUids":{}}, }
{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"taskDeletion":1},"indexUids":{}}, }
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}

View File

@ -67,7 +67,7 @@ impl ProcessingBatch {
task.batch_uid = Some(self.uid);
// We don't store the statuses in the map since they're all enqueued but we must
// still store them in the stats since that can be displayed.
*self.stats.status.entry(task.status).or_default() += 1;
*self.stats.status.entry(Status::Processing).or_default() += 1;
self.kinds.insert(task.kind.as_kind());
*self.stats.types.entry(task.kind.as_kind()).or_default() += 1;

View File

@ -279,6 +279,7 @@ InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
FacetSearchDisabled , InvalidRequest , BAD_REQUEST ;
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;

View File

@ -1407,6 +1407,13 @@ pub fn perform_facet_search(
None => TimeBudget::default(),
};
if !index.facet_search(&rtxn)? {
return Err(ResponseError::from_msg(
"The facet search is disabled for this index".to_string(),
Code::FacetSearchDisabled,
));
}
// In the faceted search context, we want to use the intersection between the locales provided by the user
// and the locales of the facet string.
// If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.

View File

@ -52,6 +52,25 @@ impl Value {
}
self
}
/// Return `true` if the `status` field is set to `failed`.
/// Panic if the `status` field doesn't exists.
#[track_caller]
pub fn is_fail(&self) -> bool {
if !self["status"].is_string() {
panic!("Called `is_fail` on {}", serde_json::to_string_pretty(&self.0).unwrap());
}
self["status"] == serde_json::Value::String(String::from("failed"))
}
// Panic if the json doesn't contain the `status` field set to "succeeded"
#[track_caller]
pub fn failed(&self) -> &Self {
if !self.is_fail() {
panic!("Called failed on {}", serde_json::to_string_pretty(&self.0).unwrap());
}
self
}
}
impl From<serde_json::Value> for Value {

View File

@ -221,8 +221,15 @@ async fn add_documents_and_deactivate_facet_search() {
let (response, code) =
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
assert_eq!(code, 200, "{}", response);
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
assert_eq!(code, 400, "{}", response);
snapshot!(response, @r###"
{
"message": "The facet search is disabled for this index",
"code": "facet_search_disabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#facet_search_disabled"
}
"###);
}
#[actix_rt::test]
@ -245,8 +252,15 @@ async fn deactivate_facet_search_and_add_documents() {
let (response, code) =
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
assert_eq!(code, 200, "{}", response);
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
assert_eq!(code, 400, "{}", response);
snapshot!(response, @r###"
{
"message": "The facet search is disabled for this index",
"code": "facet_search_disabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#facet_search_disabled"
}
"###);
}
#[actix_rt::test]

View File

@ -129,11 +129,11 @@ async fn perform_on_demand_snapshot() {
index.load_test_set().await;
server.index("doggo").create(Some("bone")).await;
index.wait_task(2).await;
let (task, _) = server.index("doggo").create(Some("bone")).await;
index.wait_task(task.uid()).await.succeeded();
server.index("doggo").create(Some("bone")).await;
index.wait_task(2).await;
let (task, _) = server.index("doggo").create(Some("bone")).await;
index.wait_task(task.uid()).await.failed();
let (task, code) = server.create_snapshot().await;
snapshot!(code, @"202 Accepted");

View File

@ -1,5 +1,6 @@
use std::collections::{BTreeMap, BTreeSet};
use either::Either;
use heed::RoTxn;
use raw_collections::RawMap;
use serde_json::value::RawValue;
@ -209,11 +210,13 @@ impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
for MergedDocument<'d, 'doc, 't, Mapper>
{
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'d str, &'d RawValue)>> {
match &self.db {
Some(db) => {
let mut new_doc_it = self.new_doc.iter_top_level_fields();
let mut db_it = self.db.iter().flat_map(|db| db.iter_top_level_fields());
let mut db_it = db.iter_top_level_fields();
let mut seen_fields = BTreeSet::new();
std::iter::from_fn(move || {
Either::Left(std::iter::from_fn(move || {
if let Some(next) = new_doc_it.next() {
if let Ok((name, _)) = next {
seen_fields.insert(name);
@ -231,7 +234,10 @@ impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
Err(err) => return Some(Err(err)),
}
}
})
}))
}
None => Either::Right(self.new_doc.iter_top_level_fields()),
}
}
fn vectors_field(&self) -> Result<Option<&'d RawValue>> {

View File

@ -1,7 +1,10 @@
use bumpalo::Bump;
use heed::RoTxn;
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
use super::document::{
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
};
use super::extract::perm_json_p;
use super::vector_document::{
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
};
@ -164,6 +167,80 @@ impl<'doc> Update<'doc> {
}
}
/// Returns whether the updated version of the document is different from the current version for the passed subset of fields.
///
/// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed.
/// Otherwise `false`.
pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
&self,
fields: Option<&[&str]>,
rtxn: &'t RoTxn,
index: &'t Index,
mapper: &'t Mapper,
) -> Result<bool> {
let mut changed = false;
let mut cached_current = None;
let mut updated_selected_field_count = 0;
for entry in self.updated().iter_top_level_fields() {
let (key, updated_value) = entry?;
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
continue;
}
updated_selected_field_count += 1;
let current = match cached_current {
Some(current) => current,
None => self.current(rtxn, index, mapper)?,
};
let current_value = current.top_level_field(key)?;
let Some(current_value) = current_value else {
changed = true;
break;
};
if current_value.get() != updated_value.get() {
changed = true;
break;
}
cached_current = Some(current);
}
if !self.has_deletion {
// no field deletion, so fields that don't appear in `updated` cannot have changed
return Ok(changed);
}
if changed {
return Ok(true);
}
// we saw all updated fields, and set `changed` if any field wasn't in `current`.
// so if there are as many fields in `current` as in `updated`, then nothing changed.
// If there is any more fields in `current`, then they are missing in `updated`.
let has_deleted_fields = {
let current = match cached_current {
Some(current) => current,
None => self.current(rtxn, index, mapper)?,
};
let mut current_selected_field_count = 0;
for entry in current.iter_top_level_fields() {
let (key, _) = entry?;
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
continue;
}
current_selected_field_count += 1;
}
current_selected_field_count != updated_selected_field_count
};
Ok(has_deleted_fields)
}
pub fn updated_vectors(
&self,
doc_alloc: &'doc Bump,

View File

@ -97,6 +97,15 @@ impl FacetedDocidsExtractor {
},
),
DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields(
Some(attributes_to_extract),
rtxn,
index,
context.db_fields_ids_map,
)? {
return Ok(());
}
extract_document_facets(
attributes_to_extract,
inner.current(rtxn, index, context.db_fields_ids_map)?,

View File

@ -351,6 +351,15 @@ impl WordDocidsExtractors {
)?;
}
DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields(
document_tokenizer.attribute_to_extract,
&context.rtxn,
context.index,
context.db_fields_ids_map,
)? {
return Ok(());
}
let mut token_fn = |fname: &str, fid, pos, word: &str| {
cached_sorter.insert_del_u32(
fid,

View File

@ -70,6 +70,15 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
)?;
}
DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields(
document_tokenizer.attribute_to_extract,
rtxn,
index,
context.db_fields_ids_map,
)? {
return Ok(());
}
let document = inner.current(rtxn, index, context.db_fields_ids_map)?;
process_document_tokens(
document,