mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 08:41:00 +00:00
Merge #3834
3834: Define searchable fields at runtime r=Kerollmops a=ManyTheFish ## Summary This feature allows the end-user to search in one or multiple attributes using the search parameter `attributesToSearchOn`: ```json { "q": "Captain Marvel", "attributesToSearchOn": ["title"] } ``` This feature act like a filter, forcing Meilisearch to only return the documents containing the requested words in the attributes-to-search-on. Note that, with the matching strategy `last`, Meilisearch will only ensure that the first word is in the attributes-to-search-on, but, the retrieved documents will be ordered taking into account the word contained in the attributes-to-search-on. ## Trying the prototype A dedicated docker image has been released for this feature: #### last prototype version: ```bash docker pull getmeili/meilisearch:prototype-define-searchable-fields-at-search-time-1 ``` #### others prototype versions: ```bash docker pull getmeili/meilisearch:prototype-define-searchable-fields-at-search-time-0 ``` ## Technical Detail The attributes-to-search-on list is given to the search context, then, the search context uses the `fid_word_docids`database using only the allowed field ids instead of the global `word_docids` database. This is the same for the prefix databases. The database cache is updated with the merged values, meaning that the union of the field-id-database values is only made if the requested key is missing from the cache. ### Relevancy limits Almost all ranking rules behave as expected when ordering the documents. Only `proximity` could miss-order documents if all the searched words are in the restricted attribute but a better proximity is found in an ignored attribute in a document that should be ranked lower. I put below a failing test showing it: ```rust #[actix_rt::test] async fn proximity_ranking_rule_order() { let server = Server::new().await; let index = index_with_documents( &server, &json!([ { "title": "Captain super mega cool. A Marvel story", // Perfect distance between words in an ignored attribute "desc": "Captain Marvel", "id": "1", }, { "title": "Captain America from Marvel", "desc": "a Shazam ersatz", "id": "2", }]), ) .await; // Document 2 should appear before document 1. index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"], json!([ {"id": "2"}, {"id": "1"}, ]) ); }) .await; } ``` Fixing this would force us to create a `fid_word_pair_proximity_docids` and a `fid_word_prefix_pair_proximity_docids` databases which may multiply the keys of `word_pair_proximity_docids` and `word_prefix_pair_proximity_docids` by the number of attributes in the searchable_attributes list. If we think we should fix this test, I'll suggest doing it in another PR. ## Related Fixes #3772 Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
@ -20,7 +20,7 @@ mod sort;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use bucket_sort::{bucket_sort, BucketSortOutput};
|
||||
use charabia::TokenizerBuilder;
|
||||
@ -46,6 +46,7 @@ use self::geo_sort::GeoSort;
|
||||
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
use crate::error::FieldIdMapMissingEntry;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
use crate::{
|
||||
@ -62,6 +63,7 @@ pub struct SearchContext<'ctx> {
|
||||
pub phrase_interner: DedupInterner<Phrase>,
|
||||
pub term_interner: Interner<QueryTerm>,
|
||||
pub phrase_docids: PhraseDocIdsCache,
|
||||
pub restricted_fids: Option<Vec<u16>>,
|
||||
}
|
||||
|
||||
impl<'ctx> SearchContext<'ctx> {
|
||||
@ -74,8 +76,66 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
phrase_interner: <_>::default(),
|
||||
term_interner: <_>::default(),
|
||||
phrase_docids: <_>::default(),
|
||||
restricted_fids: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
|
||||
let fids_map = self.index.fields_ids_map(self.txn)?;
|
||||
let searchable_names = self.index.searchable_fields(self.txn)?;
|
||||
|
||||
let mut restricted_fids = Vec::new();
|
||||
for field_name in searchable_attributes {
|
||||
let searchable_contains_name =
|
||||
searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
|
||||
let fid = match (fids_map.id(field_name), searchable_contains_name) {
|
||||
// The Field id exist and the field is searchable
|
||||
(Some(fid), Some(true)) | (Some(fid), None) => fid,
|
||||
// The field is searchable but the Field id doesn't exist => Internal Error
|
||||
(None, Some(true)) => {
|
||||
return Err(FieldIdMapMissingEntry::FieldName {
|
||||
field_name: field_name.to_string(),
|
||||
process: "search",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
// The field is not searchable => User error
|
||||
_otherwise => {
|
||||
let mut valid_fields: BTreeSet<_> =
|
||||
fids_map.names().map(String::from).collect();
|
||||
|
||||
// Filter by the searchable names
|
||||
if let Some(sn) = searchable_names {
|
||||
let searchable_names = sn.iter().map(|s| s.to_string()).collect();
|
||||
valid_fields = &valid_fields & &searchable_names;
|
||||
}
|
||||
|
||||
let searchable_count = valid_fields.len();
|
||||
|
||||
// Remove hidden fields
|
||||
if let Some(dn) = self.index.displayed_fields(self.txn)? {
|
||||
let displayable_names = dn.iter().map(|s| s.to_string()).collect();
|
||||
valid_fields = &valid_fields & &displayable_names;
|
||||
}
|
||||
|
||||
let hidden_fields = searchable_count > valid_fields.len();
|
||||
let field = field_name.to_string();
|
||||
return Err(UserError::InvalidSearchableAttribute {
|
||||
field,
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
};
|
||||
|
||||
restricted_fids.push(fid);
|
||||
}
|
||||
|
||||
self.restricted_fids = Some(restricted_fids);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
|
||||
|
Reference in New Issue
Block a user