293: Make sure that the relevancy is not impacted by other settings r=Kerollmops a=Kerollmops

Fix https://github.com/meilisearch/meilisearch/issues/1505.

fix https://github.com/meilisearch/MeiliSearch/issues/1529

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot]
2021-07-27 16:04:52 +00:00
committed by GitHub
8 changed files with 182 additions and 164 deletions

View File

@ -8,7 +8,7 @@ use roaring::RoaringBitmap;
use serde_json::Value;
use super::ClearDocuments;
use crate::error::{FieldIdMapMissingEntry, InternalError, UserError};
use crate::error::{InternalError, UserError};
use crate::heed_codec::facet::FacetStringLevelZeroValueCodec;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::index::{db_name, main_key};
@ -82,11 +82,13 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
key: Some(main_key::PRIMARY_KEY_KEY),
}
})?;
let id_field =
fields_ids_map.id(primary_key).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: primary_key.to_string(),
process: "DeleteDocuments::execute",
})?;
// If we can't find the id of the primary key it means that the database
// is empty and it should be safe to return that we deleted 0 documents.
let id_field = match fields_ids_map.id(primary_key) {
Some(field) => field,
None => return Ok(0),
};
let Index {
env: _env,

View File

@ -391,6 +391,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
documents_file,
} = output;
// The fields_ids_map is put back to the store now so the rest of the transaction sees an
// up to date field map.
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
// We delete the documents that this document addition replaces. This way we are
// able to simply insert all the documents even if they already exist in the database.
if !replaced_documents_ids.is_empty() {
@ -596,9 +600,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
debug!("Writing using the write method: {:?}", write_method);
// We write the fields ids map into the main database
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
// We write the field distribution into the main database
self.index.put_field_distribution(self.wtxn, &field_distribution)?;

View File

@ -235,15 +235,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fn update_displayed(&mut self) -> Result<bool> {
match self.displayed_fields {
Setting::Set(ref fields) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
// fields are deduplicated, only the first occurrence is taken into account
let names: Vec<_> = fields.iter().unique().map(String::as_str).collect();
for name in names.iter() {
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
}
self.index.put_displayed_fields(self.wtxn, &names)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
}
Setting::Reset => {
self.index.delete_displayed_fields(self.wtxn)?;
@ -256,11 +250,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fn update_distinct_field(&mut self) -> Result<bool> {
match self.distinct_field {
Setting::Set(ref attr) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
fields_ids_map.insert(attr).ok_or(UserError::AttributeLimitReached)?;
self.index.put_distinct_field(self.wtxn, &attr)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
}
Setting::Reset => {
self.index.delete_distinct_field(self.wtxn)?;
@ -388,14 +378,11 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fn update_filterable(&mut self) -> Result<()> {
match self.filterable_fields {
Setting::Set(ref fields) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_facets = HashSet::new();
for name in fields {
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
new_facets.insert(name.clone());
}
self.index.put_filterable_fields(self.wtxn, &new_facets)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
}
Setting::Reset => {
self.index.delete_filterable_fields(self.wtxn)?;
@ -408,17 +395,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fn update_criteria(&mut self) -> Result<()> {
match self.criteria {
Setting::Set(ref fields) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_criteria = Vec::new();
for name in fields {
let criterion: Criterion = name.parse()?;
if let Some(name) = criterion.field_name() {
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
}
new_criteria.push(criterion);
}
self.index.put_criteria(self.wtxn, &new_criteria)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
}
Setting::Reset => {
self.index.delete_criteria(self.wtxn)?;
@ -692,7 +674,8 @@ mod tests {
let count = index
.facet_id_f64_docids
.remap_key_type::<ByteSlice>()
.prefix_iter(&rtxn, &[0, 0])
// The faceted field id is 2u16
.prefix_iter(&rtxn, &[0, 2, 0])
.unwrap()
.count();
assert_eq!(count, 3);
@ -718,7 +701,7 @@ mod tests {
let count = index
.facet_id_f64_docids
.remap_key_type::<ByteSlice>()
.prefix_iter(&rtxn, &[0, 0])
.prefix_iter(&rtxn, &[0, 2, 0])
.unwrap()
.count();
assert_eq!(count, 4);
@ -1066,4 +1049,53 @@ mod tests {
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
}
#[test]
fn setting_impact_relevancy() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
// Set the genres setting
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_filterable_fields(hashset! { S("genres") });
builder.execute(|_, _| ()).unwrap();
let content = &br#"[
{
"id": 11,
"title": "Star Wars",
"overview":
"Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.",
"genres": ["Adventure", "Action", "Science Fiction"],
"poster": "https://image.tmdb.org/t/p/w500/6FfCtAuVAW8XJjZ7eWeLibRLWTw.jpg",
"release_date": 233366400
},
{
"id": 30,
"title": "Magnetic Rose",
"overview": "",
"genres": ["Animation", "Science Fiction"],
"poster": "https://image.tmdb.org/t/p/w500/gSuHDeWemA1menrwfMRChnSmMVN.jpg",
"release_date": 819676800
}
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
builder.update_format(UpdateFormat::Json);
builder.execute(content, |_, _| ()).unwrap();
wtxn.commit().unwrap();
// We now try to reset the primary key
let rtxn = index.read_txn().unwrap();
let SearchResult { documents_ids, .. } = index.search(&rtxn).query("S").execute().unwrap();
let first_id = documents_ids[0];
let documents = index.documents(&rtxn, documents_ids).unwrap();
let (_, content) = documents.iter().find(|(id, _)| *id == first_id).unwrap();
let fid = index.fields_ids_map(&rtxn).unwrap().id("title").unwrap();
let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap();
assert_eq!(line, r#""Star Wars""#);
}
}