mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Merge #3834
3834: Define searchable fields at runtime r=Kerollmops a=ManyTheFish
## Summary
This feature allows the end-user to search in one or multiple attributes using the search parameter `attributesToSearchOn`:
```json
{
  "q": "Captain Marvel",
  "attributesToSearchOn": ["title"]
}
```
This feature act like a filter, forcing Meilisearch to only return the documents containing the requested words in the attributes-to-search-on. Note that, with the matching strategy `last`, Meilisearch will only ensure that the first word is in the attributes-to-search-on, but, the retrieved documents will be ordered taking into account the word contained in the attributes-to-search-on. 
## Trying the prototype
A dedicated docker image has been released for this feature:
#### last prototype version:
```bash
docker pull getmeili/meilisearch:prototype-define-searchable-fields-at-search-time-1
```
#### others prototype versions:
```bash
docker pull getmeili/meilisearch:prototype-define-searchable-fields-at-search-time-0
```
## Technical Detail
The attributes-to-search-on list is given to the search context, then, the search context uses the `fid_word_docids`database using only the allowed field ids instead of the global `word_docids` database. This is the same for the prefix databases.
The database cache is updated with the merged values, meaning that the union of the field-id-database values is only made if the requested key is missing from the cache.
### Relevancy limits
Almost all ranking rules behave as expected when ordering the documents.
Only `proximity` could miss-order documents if all the searched words are in the restricted attribute but a better proximity is found in an ignored attribute in a document that should be ranked lower. I put below a failing test showing it:
```rust
#[actix_rt::test]
async fn proximity_ranking_rule_order() {
    let server = Server::new().await;
    let index = index_with_documents(
        &server,
        &json!([
        {
            "title": "Captain super mega cool. A Marvel story",
            // Perfect distance between words in an ignored attribute
            "desc": "Captain Marvel",
            "id": "1",
        },
        {
            "title": "Captain America from Marvel",
            "desc": "a Shazam ersatz",
            "id": "2",
        }]),
    )
    .await;
    // Document 2 should appear before document 1.
    index
        .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| {
            assert_eq!(code, 200, "{}", response);
            assert_eq!(
                response["hits"],
                json!([
                    {"id": "2"},
                    {"id": "1"},
                ])
            );
        })
        .await;
}
```
Fixing this would force us to create a `fid_word_pair_proximity_docids` and a `fid_word_prefix_pair_proximity_docids` databases which may multiply the keys of `word_pair_proximity_docids` and `word_prefix_pair_proximity_docids` by the number of attributes in the searchable_attributes list. If we think we should fix this test, I'll suggest doing it in another PR.
## Related
Fixes #3772
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
			
			
This commit is contained in:
		| @@ -128,6 +128,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco | ||||
|         } | ||||
|     )] | ||||
|     InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> }, | ||||
|     #[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.", | ||||
|         .field, | ||||
|         .valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "), | ||||
|         .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""), | ||||
|     )] | ||||
|     InvalidSearchableAttribute { | ||||
|         field: String, | ||||
|         valid_fields: BTreeSet<String>, | ||||
|         hidden_fields: bool, | ||||
|     }, | ||||
|     #[error("{}", HeedError::BadOpenOptions)] | ||||
|     InvalidLmdbOpenOptions, | ||||
|     #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] | ||||
|   | ||||
| @@ -23,3 +23,9 @@ pub use self::roaring_bitmap_length::{ | ||||
| pub use self::script_language_codec::ScriptLanguageCodec; | ||||
| pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec}; | ||||
| pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; | ||||
|  | ||||
| pub trait BytesDecodeOwned { | ||||
|     type DItem; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>; | ||||
| } | ||||
|   | ||||
| @@ -2,8 +2,11 @@ use std::borrow::Cow; | ||||
| use std::convert::TryInto; | ||||
| use std::mem::size_of; | ||||
|  | ||||
| use heed::BytesDecode; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
|  | ||||
| pub struct BoRoaringBitmapCodec; | ||||
|  | ||||
| impl BoRoaringBitmapCodec { | ||||
| @@ -13,7 +16,7 @@ impl BoRoaringBitmapCodec { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl heed::BytesDecode<'_> for BoRoaringBitmapCodec { | ||||
| impl BytesDecode<'_> for BoRoaringBitmapCodec { | ||||
|     type DItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { | ||||
| @@ -28,6 +31,14 @@ impl heed::BytesDecode<'_> for BoRoaringBitmapCodec { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for BoRoaringBitmapCodec { | ||||
|     type DItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         Self::bytes_decode(bytes) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl heed::BytesEncode<'_> for BoRoaringBitmapCodec { | ||||
|     type EItem = RoaringBitmap; | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,8 @@ use std::mem::size_of; | ||||
| use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
|  | ||||
| /// This is the limit where using a byteorder became less size efficient | ||||
| /// than using a direct roaring encoding, it is also the point where we are able | ||||
| /// to determine the encoding used only by using the array of bytes length. | ||||
| @@ -103,6 +105,14 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for CboRoaringBitmapCodec { | ||||
|     type DItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         Self::deserialize_from(bytes).ok() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl heed::BytesEncode<'_> for CboRoaringBitmapCodec { | ||||
|     type EItem = RoaringBitmap; | ||||
|  | ||||
|   | ||||
| @@ -2,6 +2,8 @@ use std::borrow::Cow; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
|  | ||||
| pub struct RoaringBitmapCodec; | ||||
|  | ||||
| impl heed::BytesDecode<'_> for RoaringBitmapCodec { | ||||
| @@ -12,6 +14,14 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for RoaringBitmapCodec { | ||||
|     type DItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         RoaringBitmap::deserialize_from(bytes).ok() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl heed::BytesEncode<'_> for RoaringBitmapCodec { | ||||
|     type EItem = RoaringBitmap; | ||||
|  | ||||
|   | ||||
| @@ -1,11 +1,23 @@ | ||||
| use std::mem; | ||||
|  | ||||
| use heed::BytesDecode; | ||||
|  | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
|  | ||||
| pub struct BoRoaringBitmapLenCodec; | ||||
|  | ||||
| impl heed::BytesDecode<'_> for BoRoaringBitmapLenCodec { | ||||
| impl BytesDecode<'_> for BoRoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         Some((bytes.len() / mem::size_of::<u32>()) as u64) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for BoRoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         Self::bytes_decode(bytes) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,11 +1,14 @@ | ||||
| use std::mem; | ||||
|  | ||||
| use heed::BytesDecode; | ||||
|  | ||||
| use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec}; | ||||
| use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD; | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
|  | ||||
| pub struct CboRoaringBitmapLenCodec; | ||||
|  | ||||
| impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec { | ||||
| impl BytesDecode<'_> for CboRoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { | ||||
| @@ -20,3 +23,11 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec { | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for CboRoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         Self::bytes_decode(bytes) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -3,6 +3,8 @@ use std::mem; | ||||
|  | ||||
| use byteorder::{LittleEndian, ReadBytesExt}; | ||||
|  | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
|  | ||||
| const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; | ||||
| const SERIAL_COOKIE: u16 = 12347; | ||||
|  | ||||
| @@ -59,6 +61,14 @@ impl heed::BytesDecode<'_> for RoaringBitmapLenCodec { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for RoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use heed::BytesEncode; | ||||
|   | ||||
| @@ -29,6 +29,7 @@ pub struct Search<'a> { | ||||
|     offset: usize, | ||||
|     limit: usize, | ||||
|     sort_criteria: Option<Vec<AscDesc>>, | ||||
|     searchable_attributes: Option<&'a [String]>, | ||||
|     geo_strategy: new::GeoSortStrategy, | ||||
|     terms_matching_strategy: TermsMatchingStrategy, | ||||
|     scoring_strategy: ScoringStrategy, | ||||
| @@ -47,6 +48,7 @@ impl<'a> Search<'a> { | ||||
|             offset: 0, | ||||
|             limit: 20, | ||||
|             sort_criteria: None, | ||||
|             searchable_attributes: None, | ||||
|             geo_strategy: new::GeoSortStrategy::default(), | ||||
|             terms_matching_strategy: TermsMatchingStrategy::default(), | ||||
|             scoring_strategy: Default::default(), | ||||
| @@ -82,6 +84,11 @@ impl<'a> Search<'a> { | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> { | ||||
|         self.searchable_attributes = Some(searchable); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> { | ||||
|         self.terms_matching_strategy = value; | ||||
|         self | ||||
| @@ -117,6 +124,11 @@ impl<'a> Search<'a> { | ||||
|  | ||||
|     pub fn execute(&self) -> Result<SearchResult> { | ||||
|         let mut ctx = SearchContext::new(self.index, self.rtxn); | ||||
|  | ||||
|         if let Some(searchable_attributes) = self.searchable_attributes { | ||||
|             ctx.searchable_attributes(searchable_attributes)?; | ||||
|         } | ||||
|  | ||||
|         let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } = | ||||
|             execute_search( | ||||
|                 &mut ctx, | ||||
| @@ -154,6 +166,7 @@ impl fmt::Debug for Search<'_> { | ||||
|             offset, | ||||
|             limit, | ||||
|             sort_criteria, | ||||
|             searchable_attributes, | ||||
|             geo_strategy: _, | ||||
|             terms_matching_strategy, | ||||
|             scoring_strategy, | ||||
| @@ -169,6 +182,7 @@ impl fmt::Debug for Search<'_> { | ||||
|             .field("offset", offset) | ||||
|             .field("limit", limit) | ||||
|             .field("sort_criteria", sort_criteria) | ||||
|             .field("searchable_attributes", searchable_attributes) | ||||
|             .field("terms_matching_strategy", terms_matching_strategy) | ||||
|             .field("scoring_strategy", scoring_strategy) | ||||
|             .field("exhaustive_number_hits", exhaustive_number_hits) | ||||
|   | ||||
| @@ -4,12 +4,13 @@ use std::hash::Hash; | ||||
|  | ||||
| use fxhash::FxHashMap; | ||||
| use heed::types::ByteSlice; | ||||
| use heed::{BytesDecode, BytesEncode, Database, RoTxn}; | ||||
| use heed::{BytesEncode, Database, RoTxn}; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::interner::Interned; | ||||
| use super::Word; | ||||
| use crate::heed_codec::StrBEU16Codec; | ||||
| use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec}; | ||||
| use crate::update::{merge_cbo_roaring_bitmaps, MergeFn}; | ||||
| use crate::{ | ||||
|     CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext, | ||||
| }; | ||||
| @@ -22,50 +23,104 @@ use crate::{ | ||||
| #[derive(Default)] | ||||
| pub struct DatabaseCache<'ctx> { | ||||
|     pub word_pair_proximity_docids: | ||||
|         FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>, | ||||
|         FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>, | ||||
|     pub word_prefix_pair_proximity_docids: | ||||
|         FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>, | ||||
|         FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>, | ||||
|     pub prefix_word_pair_proximity_docids: | ||||
|         FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>, | ||||
|     pub word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>, | ||||
|     pub exact_word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>, | ||||
|     pub word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>, | ||||
|     pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>, | ||||
|         FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>, | ||||
|     pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>, | ||||
|     pub exact_word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>, | ||||
|     pub word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>, | ||||
|     pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>, | ||||
|  | ||||
|     pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>, | ||||
|     pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>, | ||||
|     pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>, | ||||
|     pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>, | ||||
|     pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>, | ||||
|     pub word_positions: FxHashMap<Interned<String>, Vec<u16>>, | ||||
|     pub word_prefix_positions: FxHashMap<Interned<String>, Vec<u16>>, | ||||
|  | ||||
|     pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>, | ||||
|     pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>, | ||||
|     pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>, | ||||
|     pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>, | ||||
|     pub word_fids: FxHashMap<Interned<String>, Vec<u16>>, | ||||
|     pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>, | ||||
| } | ||||
| impl<'ctx> DatabaseCache<'ctx> { | ||||
|     fn get_value<'v, K1, KC>( | ||||
|     fn get_value<'v, K1, KC, DC>( | ||||
|         txn: &'ctx RoTxn, | ||||
|         cache_key: K1, | ||||
|         db_key: &'v KC::EItem, | ||||
|         cache: &mut FxHashMap<K1, Option<&'ctx [u8]>>, | ||||
|         cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>, | ||||
|         db: Database<KC, ByteSlice>, | ||||
|     ) -> Result<Option<&'ctx [u8]>> | ||||
|     ) -> Result<Option<DC::DItem>> | ||||
|     where | ||||
|         K1: Copy + Eq + Hash, | ||||
|         KC: BytesEncode<'v>, | ||||
|         DC: BytesDecodeOwned, | ||||
|     { | ||||
|         let bitmap_ptr = match cache.entry(cache_key) { | ||||
|             Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), | ||||
|             Entry::Vacant(entry) => { | ||||
|                 let bitmap_ptr = db.get(txn, db_key)?; | ||||
|                 entry.insert(bitmap_ptr); | ||||
|                 bitmap_ptr | ||||
|         if let Entry::Vacant(entry) = cache.entry(cache_key) { | ||||
|             let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed); | ||||
|             entry.insert(bitmap_ptr); | ||||
|         } | ||||
|  | ||||
|         match cache.get(&cache_key).unwrap() { | ||||
|             Some(Cow::Borrowed(bytes)) => { | ||||
|                 DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) | ||||
|             } | ||||
|         }; | ||||
|         Ok(bitmap_ptr) | ||||
|             Some(Cow::Owned(bytes)) => { | ||||
|                 DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) | ||||
|             } | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn get_value_from_keys<'v, K1, KC, DC>( | ||||
|         txn: &'ctx RoTxn, | ||||
|         cache_key: K1, | ||||
|         db_keys: &'v [KC::EItem], | ||||
|         cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>, | ||||
|         db: Database<KC, ByteSlice>, | ||||
|         merger: MergeFn, | ||||
|     ) -> Result<Option<DC::DItem>> | ||||
|     where | ||||
|         K1: Copy + Eq + Hash, | ||||
|         KC: BytesEncode<'v>, | ||||
|         DC: BytesDecodeOwned, | ||||
|         KC::EItem: Sized, | ||||
|     { | ||||
|         if let Entry::Vacant(entry) = cache.entry(cache_key) { | ||||
|             let bitmap_ptr: Option<Cow<'ctx, [u8]>> = match db_keys { | ||||
|                 [] => None, | ||||
|                 [key] => db.get(txn, key)?.map(Cow::Borrowed), | ||||
|                 keys => { | ||||
|                     let bitmaps = keys | ||||
|                         .iter() | ||||
|                         .filter_map(|key| db.get(txn, key).transpose()) | ||||
|                         .map(|v| v.map(Cow::Borrowed)) | ||||
|                         .collect::<std::result::Result<Vec<Cow<[u8]>>, _>>()?; | ||||
|  | ||||
|                     if bitmaps.is_empty() { | ||||
|                         None | ||||
|                     } else { | ||||
|                         Some(merger(&[], &bitmaps[..])?) | ||||
|                     } | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
|             entry.insert(bitmap_ptr); | ||||
|         } | ||||
|  | ||||
|         match cache.get(&cache_key).unwrap() { | ||||
|             Some(Cow::Borrowed(bytes)) => { | ||||
|                 DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) | ||||
|             } | ||||
|             Some(Cow::Owned(bytes)) => { | ||||
|                 DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) | ||||
|             } | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'ctx> SearchContext<'ctx> { | ||||
|     pub fn get_words_fst(&mut self) -> Result<fst::Set<Cow<'ctx, [u8]>>> { | ||||
|         if let Some(fst) = self.db_cache.words_fst.clone() { | ||||
| @@ -99,30 +154,41 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|  | ||||
|     /// Retrieve or insert the given value in the `word_docids` database. | ||||
|     fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|             self.txn, | ||||
|             word, | ||||
|             self.word_interner.get(word).as_str(), | ||||
|             &mut self.db_cache.word_docids, | ||||
|             self.index.word_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         match &self.restricted_fids { | ||||
|             Some(restricted_fids) => { | ||||
|                 let interned = self.word_interner.get(word).as_str(); | ||||
|                 let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); | ||||
|  | ||||
|                 DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( | ||||
|                     self.txn, | ||||
|                     word, | ||||
|                     &keys[..], | ||||
|                     &mut self.db_cache.word_docids, | ||||
|                     self.index.word_fid_docids.remap_data_type::<ByteSlice>(), | ||||
|                     merge_cbo_roaring_bitmaps, | ||||
|                 ) | ||||
|             } | ||||
|             None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( | ||||
|                 self.txn, | ||||
|                 word, | ||||
|                 self.word_interner.get(word).as_str(), | ||||
|                 &mut self.db_cache.word_docids, | ||||
|                 self.index.word_docids.remap_data_type::<ByteSlice>(), | ||||
|             ), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn get_db_exact_word_docids( | ||||
|         &mut self, | ||||
|         word: Interned<String>, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|         DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( | ||||
|             self.txn, | ||||
|             word, | ||||
|             self.word_interner.get(word).as_str(), | ||||
|             &mut self.db_cache.exact_word_docids, | ||||
|             self.index.exact_word_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> { | ||||
| @@ -150,30 +216,41 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         &mut self, | ||||
|         prefix: Interned<String>, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|             self.txn, | ||||
|             prefix, | ||||
|             self.word_interner.get(prefix).as_str(), | ||||
|             &mut self.db_cache.word_prefix_docids, | ||||
|             self.index.word_prefix_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         match &self.restricted_fids { | ||||
|             Some(restricted_fids) => { | ||||
|                 let interned = self.word_interner.get(prefix).as_str(); | ||||
|                 let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); | ||||
|  | ||||
|                 DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( | ||||
|                     self.txn, | ||||
|                     prefix, | ||||
|                     &keys[..], | ||||
|                     &mut self.db_cache.word_prefix_docids, | ||||
|                     self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(), | ||||
|                     merge_cbo_roaring_bitmaps, | ||||
|                 ) | ||||
|             } | ||||
|             None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( | ||||
|                 self.txn, | ||||
|                 prefix, | ||||
|                 self.word_interner.get(prefix).as_str(), | ||||
|                 &mut self.db_cache.word_prefix_docids, | ||||
|                 self.index.word_prefix_docids.remap_data_type::<ByteSlice>(), | ||||
|             ), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn get_db_exact_word_prefix_docids( | ||||
|         &mut self, | ||||
|         prefix: Interned<String>, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|         DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( | ||||
|             self.txn, | ||||
|             prefix, | ||||
|             self.word_interner.get(prefix).as_str(), | ||||
|             &mut self.db_cache.exact_word_prefix_docids, | ||||
|             self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_pair_proximity_docids( | ||||
| @@ -182,7 +259,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         word2: Interned<String>, | ||||
|         proximity: u8, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             self.txn, | ||||
|             (proximity, word1, word2), | ||||
|             &( | ||||
| @@ -192,9 +269,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|             ), | ||||
|             &mut self.db_cache.word_pair_proximity_docids, | ||||
|             self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_pair_proximity_docids_len( | ||||
| @@ -203,7 +278,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         word2: Interned<String>, | ||||
|         proximity: u8, | ||||
|     ) -> Result<Option<u64>> { | ||||
|         DatabaseCache::get_value( | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>( | ||||
|             self.txn, | ||||
|             (proximity, word1, word2), | ||||
|             &( | ||||
| @@ -213,11 +288,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|             ), | ||||
|             &mut self.db_cache.word_pair_proximity_docids, | ||||
|             self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| { | ||||
|             CboRoaringBitmapLenCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()) | ||||
|         }) | ||||
|         .transpose() | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_prefix_pair_proximity_docids( | ||||
| @@ -226,7 +297,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         prefix2: Interned<String>, | ||||
|         proximity: u8, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             self.txn, | ||||
|             (proximity, word1, prefix2), | ||||
|             &( | ||||
| @@ -236,9 +307,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|             ), | ||||
|             &mut self.db_cache.word_prefix_pair_proximity_docids, | ||||
|             self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         ) | ||||
|     } | ||||
|     pub fn get_db_prefix_word_pair_proximity_docids( | ||||
|         &mut self, | ||||
| @@ -246,7 +315,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         right: Interned<String>, | ||||
|         proximity: u8, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             self.txn, | ||||
|             (proximity, left_prefix, right), | ||||
|             &( | ||||
| @@ -256,9 +325,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|             ), | ||||
|             &mut self.db_cache.prefix_word_pair_proximity_docids, | ||||
|             self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_fid_docids( | ||||
| @@ -266,15 +333,18 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         word: Interned<String>, | ||||
|         fid: u16, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|         // if the requested fid isn't in the restricted list, return None. | ||||
|         if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { | ||||
|             return Ok(None); | ||||
|         } | ||||
|  | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             self.txn, | ||||
|             (word, fid), | ||||
|             &(self.word_interner.get(word).as_str(), fid), | ||||
|             &mut self.db_cache.word_fid_docids, | ||||
|             self.index.word_fid_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_prefix_fid_docids( | ||||
| @@ -282,15 +352,18 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         word_prefix: Interned<String>, | ||||
|         fid: u16, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|         // if the requested fid isn't in the restricted list, return None. | ||||
|         if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { | ||||
|             return Ok(None); | ||||
|         } | ||||
|  | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             self.txn, | ||||
|             (word_prefix, fid), | ||||
|             &(self.word_interner.get(word_prefix).as_str(), fid), | ||||
|             &mut self.db_cache.word_prefix_fid_docids, | ||||
|             self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_fids(&mut self, word: Interned<String>) -> Result<Vec<u16>> { | ||||
| @@ -309,7 +382,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                 for result in remap_key_type { | ||||
|                     let ((_, fid), value) = result?; | ||||
|                     // filling other caches to avoid searching for them again | ||||
|                     self.db_cache.word_fid_docids.insert((word, fid), Some(value)); | ||||
|                     self.db_cache.word_fid_docids.insert((word, fid), Some(Cow::Borrowed(value))); | ||||
|                     fids.push(fid); | ||||
|                 } | ||||
|                 entry.insert(fids.clone()); | ||||
| @@ -335,7 +408,9 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                 for result in remap_key_type { | ||||
|                     let ((_, fid), value) = result?; | ||||
|                     // filling other caches to avoid searching for them again | ||||
|                     self.db_cache.word_prefix_fid_docids.insert((word_prefix, fid), Some(value)); | ||||
|                     self.db_cache | ||||
|                         .word_prefix_fid_docids | ||||
|                         .insert((word_prefix, fid), Some(Cow::Borrowed(value))); | ||||
|                     fids.push(fid); | ||||
|                 } | ||||
|                 entry.insert(fids.clone()); | ||||
| @@ -350,15 +425,13 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         word: Interned<String>, | ||||
|         position: u16, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             self.txn, | ||||
|             (word, position), | ||||
|             &(self.word_interner.get(word).as_str(), position), | ||||
|             &mut self.db_cache.word_position_docids, | ||||
|             self.index.word_position_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_prefix_position_docids( | ||||
| @@ -366,15 +439,13 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|         word_prefix: Interned<String>, | ||||
|         position: u16, | ||||
|     ) -> Result<Option<RoaringBitmap>> { | ||||
|         DatabaseCache::get_value( | ||||
|         DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( | ||||
|             self.txn, | ||||
|             (word_prefix, position), | ||||
|             &(self.word_interner.get(word_prefix).as_str(), position), | ||||
|             &mut self.db_cache.word_prefix_position_docids, | ||||
|             self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(), | ||||
|         )? | ||||
|         .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())) | ||||
|         .transpose() | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn get_db_word_positions(&mut self, word: Interned<String>) -> Result<Vec<u16>> { | ||||
| @@ -393,7 +464,9 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                 for result in remap_key_type { | ||||
|                     let ((_, position), value) = result?; | ||||
|                     // filling other caches to avoid searching for them again | ||||
|                     self.db_cache.word_position_docids.insert((word, position), Some(value)); | ||||
|                     self.db_cache | ||||
|                         .word_position_docids | ||||
|                         .insert((word, position), Some(Cow::Borrowed(value))); | ||||
|                     positions.push(position); | ||||
|                 } | ||||
|                 entry.insert(positions.clone()); | ||||
| @@ -424,7 +497,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                     // filling other caches to avoid searching for them again | ||||
|                     self.db_cache | ||||
|                         .word_prefix_position_docids | ||||
|                         .insert((word_prefix, position), Some(value)); | ||||
|                         .insert((word_prefix, position), Some(Cow::Borrowed(value))); | ||||
|                     positions.push(position); | ||||
|                 } | ||||
|                 entry.insert(positions.clone()); | ||||
|   | ||||
| @@ -20,7 +20,7 @@ mod sort; | ||||
| #[cfg(test)] | ||||
| mod tests; | ||||
|  | ||||
| use std::collections::HashSet; | ||||
| use std::collections::{BTreeSet, HashSet}; | ||||
|  | ||||
| use bucket_sort::{bucket_sort, BucketSortOutput}; | ||||
| use charabia::TokenizerBuilder; | ||||
| @@ -46,6 +46,7 @@ use self::geo_sort::GeoSort; | ||||
| pub use self::geo_sort::Strategy as GeoSortStrategy; | ||||
| use self::graph_based_ranking_rule::Words; | ||||
| use self::interner::Interned; | ||||
| use crate::error::FieldIdMapMissingEntry; | ||||
| use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||
| use crate::search::new::distinct::apply_distinct_rule; | ||||
| use crate::{ | ||||
| @@ -62,6 +63,7 @@ pub struct SearchContext<'ctx> { | ||||
|     pub phrase_interner: DedupInterner<Phrase>, | ||||
|     pub term_interner: Interner<QueryTerm>, | ||||
|     pub phrase_docids: PhraseDocIdsCache, | ||||
|     pub restricted_fids: Option<Vec<u16>>, | ||||
| } | ||||
|  | ||||
| impl<'ctx> SearchContext<'ctx> { | ||||
| @@ -74,8 +76,66 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|             phrase_interner: <_>::default(), | ||||
|             term_interner: <_>::default(), | ||||
|             phrase_docids: <_>::default(), | ||||
|             restricted_fids: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { | ||||
|         let fids_map = self.index.fields_ids_map(self.txn)?; | ||||
|         let searchable_names = self.index.searchable_fields(self.txn)?; | ||||
|  | ||||
|         let mut restricted_fids = Vec::new(); | ||||
|         for field_name in searchable_attributes { | ||||
|             let searchable_contains_name = | ||||
|                 searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name)); | ||||
|             let fid = match (fids_map.id(field_name), searchable_contains_name) { | ||||
|                 // The Field id exist and the field is searchable | ||||
|                 (Some(fid), Some(true)) | (Some(fid), None) => fid, | ||||
|                 // The field is searchable but the Field id doesn't exist => Internal Error | ||||
|                 (None, Some(true)) => { | ||||
|                     return Err(FieldIdMapMissingEntry::FieldName { | ||||
|                         field_name: field_name.to_string(), | ||||
|                         process: "search", | ||||
|                     } | ||||
|                     .into()) | ||||
|                 } | ||||
|                 // The field is not searchable => User error | ||||
|                 _otherwise => { | ||||
|                     let mut valid_fields: BTreeSet<_> = | ||||
|                         fids_map.names().map(String::from).collect(); | ||||
|  | ||||
|                     // Filter by the searchable names | ||||
|                     if let Some(sn) = searchable_names { | ||||
|                         let searchable_names = sn.iter().map(|s| s.to_string()).collect(); | ||||
|                         valid_fields = &valid_fields & &searchable_names; | ||||
|                     } | ||||
|  | ||||
|                     let searchable_count = valid_fields.len(); | ||||
|  | ||||
|                     // Remove hidden fields | ||||
|                     if let Some(dn) = self.index.displayed_fields(self.txn)? { | ||||
|                         let displayable_names = dn.iter().map(|s| s.to_string()).collect(); | ||||
|                         valid_fields = &valid_fields & &displayable_names; | ||||
|                     } | ||||
|  | ||||
|                     let hidden_fields = searchable_count > valid_fields.len(); | ||||
|                     let field = field_name.to_string(); | ||||
|                     return Err(UserError::InvalidSearchableAttribute { | ||||
|                         field, | ||||
|                         valid_fields, | ||||
|                         hidden_fields, | ||||
|                     } | ||||
|                     .into()); | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
|             restricted_fids.push(fid); | ||||
|         } | ||||
|  | ||||
|         self.restricted_fids = Some(restricted_fids); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)] | ||||
|   | ||||
| @@ -4,7 +4,8 @@ pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDele | ||||
| pub use self::facet::bulk::FacetsUpdateBulk; | ||||
| pub use self::facet::incremental::FacetsUpdateIncrementalInner; | ||||
| pub use self::index_documents::{ | ||||
|     DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, | ||||
|     merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId, | ||||
|     IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, MergeFn, | ||||
| }; | ||||
| pub use self::indexer_config::IndexerConfig; | ||||
| pub use self::prefix_word_pairs::{ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user