mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 08:41:00 +00:00
never store the _vectors as searchable or faceted fields
This commit is contained in:
@ -23,6 +23,7 @@ use crate::heed_codec::{
|
||||
};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
||||
use crate::vector::{Embedding, EmbeddingConfig};
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
@ -644,6 +645,7 @@ impl Index {
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
user_fields: &[&str],
|
||||
non_searchable_fields_ids: &[FieldId],
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
) -> Result<()> {
|
||||
// We can write the user defined searchable fields as-is.
|
||||
@ -662,6 +664,7 @@ impl Index {
|
||||
for (weight, user_field) in user_fields.iter().enumerate() {
|
||||
if crate::is_faceted_by(field_from_map, user_field)
|
||||
&& !real_fields.contains(&field_from_map)
|
||||
&& !non_searchable_fields_ids.contains(&id)
|
||||
{
|
||||
real_fields.push(field_from_map);
|
||||
|
||||
@ -708,6 +711,7 @@ impl Index {
|
||||
Ok(self
|
||||
.fields_ids_map(rtxn)?
|
||||
.names()
|
||||
.filter(|name| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME))
|
||||
.map(|field| Cow::Owned(field.to_string()))
|
||||
.collect())
|
||||
})
|
||||
@ -1669,15 +1673,17 @@ pub(crate) mod tests {
|
||||
|
||||
use big_s::S;
|
||||
use heed::{EnvOpenOptions, RwTxn};
|
||||
use maplit::hashset;
|
||||
use maplit::{btreemap, hashset};
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::documents::DocumentsBatchReader;
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::update::{
|
||||
self, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
||||
self, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
|
||||
Settings,
|
||||
};
|
||||
use crate::vector::settings::{EmbedderSource, EmbeddingSettings};
|
||||
use crate::{db_snap, obkv_to_json, Filter, Index, Search, SearchResult};
|
||||
|
||||
pub(crate) struct TempIndex {
|
||||
@ -2783,4 +2789,95 @@ pub(crate) mod tests {
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vectors_are_never_indexed_as_searchable_or_filterable() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 0, "_vectors": { "doggo": [2345] } },
|
||||
{ "id": 1, "_vectors": { "doggo": [6789] } },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 _vectors |
|
||||
2 _vectors.doggo |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["id"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
"###);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search.query("2345").execute().unwrap();
|
||||
assert!(results.candidates.is_empty());
|
||||
drop(rtxn);
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]);
|
||||
settings.set_filterable_fields(hashset![S("_vectors"), S("_vectors.doggo")]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 _vectors |
|
||||
2 _vectors.doggo |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @"[]");
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
"###);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search.query("2345").execute().unwrap();
|
||||
assert!(results.candidates.is_empty());
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search
|
||||
.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
|
||||
.execute()
|
||||
.unwrap();
|
||||
assert!(results.candidates.is_empty());
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_embedder_settings(btreemap! {
|
||||
S("doggo") => Setting::Set(EmbeddingSettings {
|
||||
dimensions: Setting::Set(1),
|
||||
source: Setting::Set(EmbedderSource::UserProvided),
|
||||
..EmbeddingSettings::default()}),
|
||||
});
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 _vectors |
|
||||
2 _vectors.doggo |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @"[]");
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
"###);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search.query("2345").execute().unwrap();
|
||||
assert!(results.candidates.is_empty());
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
let results = search
|
||||
.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
|
||||
.execute()
|
||||
.unwrap();
|
||||
assert!(results.candidates.is_empty());
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user