1176: fix race condition in  document addition r=Kerollmops a=MarinPostma

As described in #1160, there was a race condition when updating settings and adding documents simultaneously. This was due to the schema being updated and document addition being processed in two different transactions. This PR moves the schema update logic for the primary key in the same transaction as the document addition, while maintaining the input checks for the validity of the primary key in the http route, in order not to break the error reporting for the document addition route.

close #1160.

Co-authored-by: mpostma <postma.marin@protonmail.com>
Co-authored-by: marin <postma.marin@protonmail.com>
This commit is contained in:
bors[bot]
2021-02-02 09:26:32 +00:00
committed by GitHub
5 changed files with 98 additions and 63 deletions

View File

@ -23,6 +23,8 @@ pub struct DocumentsAddition<D> {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
// Whether the user explicitly set the primary key in the update
primary_key: Option<String>,
documents: Vec<D>,
is_partial: bool,
}
@ -39,6 +41,7 @@ impl<D> DocumentsAddition<D> {
updates_notifier,
documents: Vec::new(),
is_partial: false,
primary_key: None,
}
}
@ -53,9 +56,14 @@ impl<D> DocumentsAddition<D> {
updates_notifier,
documents: Vec::new(),
is_partial: true,
primary_key: None,
}
}
pub fn set_primary_key(&mut self, primary_key: String) {
self.primary_key = Some(primary_key);
}
pub fn update_document(&mut self, document: D) {
self.documents.push(document);
}
@ -71,6 +79,7 @@ impl<D> DocumentsAddition<D> {
self.updates_results_store,
self.documents,
self.is_partial,
self.primary_key,
)?;
Ok(update_id)
}
@ -88,6 +97,7 @@ pub fn push_documents_addition<D: serde::Serialize>(
updates_results_store: store::UpdatesResults,
addition: Vec<D>,
is_partial: bool,
primary_key: Option<String>,
) -> MResult<u64> {
let mut values = Vec::with_capacity(addition.len());
for add in addition {
@ -99,9 +109,9 @@ pub fn push_documents_addition<D: serde::Serialize>(
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = if is_partial {
Update::documents_partial(values)
Update::documents_partial(primary_key, values)
} else {
Update::documents_addition(values)
Update::documents_addition(primary_key, values)
};
updates_store.put_update(writer, last_update_id, &update)?;
@ -149,7 +159,8 @@ pub fn apply_addition(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
new_documents: Vec<IndexMap<String, Value>>,
partial: bool
partial: bool,
primary_key: Option<String>,
) -> MResult<()>
{
let mut schema = match index.main.schema(writer)? {
@ -162,7 +173,14 @@ pub fn apply_addition(
let internal_docids = index.main.internal_docids(writer)?;
let mut available_ids = DiscoverIds::new(&internal_docids);
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
let primary_key = match schema.primary_key() {
Some(primary_key) => primary_key.to_string(),
None => {
let name = primary_key.ok_or(Error::MissingPrimaryKey)?;
schema.set_primary_key(&name)?;
name
}
};
// 1. store documents ids for future deletion
let mut documents_additions = HashMap::new();
@ -275,16 +293,18 @@ pub fn apply_documents_partial_addition(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
new_documents: Vec<IndexMap<String, Value>>,
primary_key: Option<String>,
) -> MResult<()> {
apply_addition(writer, index, new_documents, true)
apply_addition(writer, index, new_documents, true, primary_key)
}
pub fn apply_documents_addition(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
new_documents: Vec<IndexMap<String, Value>>,
primary_key: Option<String>,
) -> MResult<()> {
apply_addition(writer, index, new_documents, false)
apply_addition(writer, index, new_documents, false, primary_key)
}
pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Index) -> MResult<()> {

View File

@ -52,16 +52,16 @@ impl Update {
}
}
fn documents_addition(documents: Vec<IndexMap<String, Value>>) -> Update {
fn documents_addition(primary_key: Option<String>, documents: Vec<IndexMap<String, Value>>) -> Update {
Update {
data: UpdateData::DocumentsAddition(documents),
data: UpdateData::DocumentsAddition{ documents, primary_key },
enqueued_at: Utc::now(),
}
}
fn documents_partial(documents: Vec<IndexMap<String, Value>>) -> Update {
fn documents_partial(primary_key: Option<String>, documents: Vec<IndexMap<String, Value>>) -> Update {
Update {
data: UpdateData::DocumentsPartial(documents),
data: UpdateData::DocumentsPartial{ documents, primary_key },
enqueued_at: Utc::now(),
}
}
@ -85,8 +85,15 @@ impl Update {
pub enum UpdateData {
ClearAll,
Customs(Vec<u8>),
DocumentsAddition(Vec<IndexMap<String, Value>>),
DocumentsPartial(Vec<IndexMap<String, Value>>),
// (primary key, documents)
DocumentsAddition {
primary_key: Option<String>,
documents: Vec<IndexMap<String, Value>>
},
DocumentsPartial {
primary_key: Option<String>,
documents: Vec<IndexMap<String, Value>>,
},
DocumentsDeletion(Vec<String>),
Settings(Box<SettingsUpdate>)
}
@ -96,11 +103,11 @@ impl UpdateData {
match self {
UpdateData::ClearAll => UpdateType::ClearAll,
UpdateData::Customs(_) => UpdateType::Customs,
UpdateData::DocumentsAddition(addition) => UpdateType::DocumentsAddition {
number: addition.len(),
UpdateData::DocumentsAddition{ documents, .. } => UpdateType::DocumentsAddition {
number: documents.len(),
},
UpdateData::DocumentsPartial(addition) => UpdateType::DocumentsPartial {
number: addition.len(),
UpdateData::DocumentsPartial{ documents, .. } => UpdateType::DocumentsPartial {
number: documents.len(),
},
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
number: deletion.len(),
@ -239,25 +246,25 @@ pub fn update_task(
(update_type, result, start.elapsed())
}
UpdateData::DocumentsAddition(documents) => {
UpdateData::DocumentsAddition { documents, primary_key } => {
let start = Instant::now();
let update_type = UpdateType::DocumentsAddition {
number: documents.len(),
};
let result = apply_documents_addition(writer, index, documents);
let result = apply_documents_addition(writer, index, documents, primary_key);
(update_type, result, start.elapsed())
}
UpdateData::DocumentsPartial(documents) => {
UpdateData::DocumentsPartial{ documents, primary_key } => {
let start = Instant::now();
let update_type = UpdateType::DocumentsPartial {
number: documents.len(),
};
let result = apply_documents_partial_addition(writer, index, documents);
let result = apply_documents_partial_addition(writer, index, documents, primary_key);
(update_type, result, start.elapsed())
}