mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Faceted fields settings must specify the facet type
This commit is contained in:
		| @@ -193,7 +193,7 @@ impl Index { | ||||
|  | ||||
|     /// Writes the facet fields ids associated with their facet type or `None` if | ||||
|     /// the facet type is currently unknown. | ||||
|     pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<u8, Option<FacetType>>) -> heed::Result<()> { | ||||
|     pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<u8, FacetType>) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types) | ||||
|     } | ||||
|  | ||||
| @@ -203,7 +203,7 @@ impl Index { | ||||
|     } | ||||
|  | ||||
|     /// Returns the facet fields ids associated with their facet type. | ||||
|     pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<u8, Option<FacetType>>> { | ||||
|     pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<u8, FacetType>> { | ||||
|         Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -61,9 +61,9 @@ pub fn obkv_to_json( | ||||
| } | ||||
|  | ||||
| /// Transform a JSON value into a string that can be indexed. | ||||
| pub fn json_to_string(value: Value) -> Option<String> { | ||||
| pub fn json_to_string(value: &Value) -> Option<String> { | ||||
|  | ||||
|     fn inner(value: Value, output: &mut String) -> bool { | ||||
|     fn inner(value: &Value, output: &mut String) -> bool { | ||||
|         use std::fmt::Write; | ||||
|         match value { | ||||
|             Value::Null => false, | ||||
| @@ -122,7 +122,7 @@ mod tests { | ||||
|             "not_there": null, | ||||
|         }); | ||||
|  | ||||
|         let string = json_to_string(value).unwrap(); | ||||
|         let string = json_to_string(&value).unwrap(); | ||||
|         assert_eq!(string, "name: John Doe. age: 43. "); | ||||
|     } | ||||
|  | ||||
| @@ -136,7 +136,7 @@ mod tests { | ||||
|             null, | ||||
|         ]); | ||||
|  | ||||
|         let string = json_to_string(value).unwrap(); | ||||
|         let string = json_to_string(&value).unwrap(); | ||||
|         // We don't care about having two point (.) after the other as | ||||
|         // the distance of hard separators is clamped to 8 anyway. | ||||
|         assert_eq!(string, "name: John Doe. . 43. hello. I. am. fine. . "); | ||||
|   | ||||
| @@ -329,6 +329,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|             WordDocids, | ||||
|         } | ||||
|  | ||||
|         let faceted_fields = self.index.faceted_fields(self.wtxn)?; | ||||
|         let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? { | ||||
|             Some(fields) => fields.iter().copied().collect(), | ||||
|             None => fields_ids_map.iter().map(|(id, _name)| id).collect(), | ||||
| @@ -362,6 +363,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|                 .map(|(i, documents)| { | ||||
|                     let store = Store::new( | ||||
|                         searchable_fields.clone(), | ||||
|                         faceted_fields.clone(), | ||||
|                         linked_hash_map_size, | ||||
|                         max_nb_chunks, | ||||
|                         max_memory_by_job, | ||||
|   | ||||
| @@ -14,6 +14,7 @@ use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType}; | ||||
| use roaring::RoaringBitmap; | ||||
| use tempfile::tempfile; | ||||
|  | ||||
| use crate::facet::FacetType; | ||||
| use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec}; | ||||
| use crate::tokenizer::{simple_tokenizer, only_token}; | ||||
| use crate::update::UpdateIndexingStep; | ||||
| @@ -39,6 +40,7 @@ pub struct Readers { | ||||
| pub struct Store { | ||||
|     // Indexing parameters | ||||
|     searchable_fields: HashSet<u8>, | ||||
|     faceted_fields: HashMap<u8, FacetType>, | ||||
|     // Caches | ||||
|     word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>, | ||||
|     word_docids_limit: usize, | ||||
| @@ -60,6 +62,7 @@ pub struct Store { | ||||
| impl Store { | ||||
|     pub fn new( | ||||
|         searchable_fields: HashSet<u8>, | ||||
|         faceted_fields: HashMap<u8, FacetType>, | ||||
|         linked_hash_map_size: Option<usize>, | ||||
|         max_nb_chunks: Option<usize>, | ||||
|         max_memory: Option<usize>, | ||||
| @@ -107,6 +110,7 @@ impl Store { | ||||
|         Ok(Store { | ||||
|             // Indexing parameters. | ||||
|             searchable_fields, | ||||
|             faceted_fields, | ||||
|             // Caches | ||||
|             word_docids: LinkedHashMap::with_capacity(linked_hash_map_size), | ||||
|             word_docids_limit: linked_hash_map_size, | ||||
| @@ -320,21 +324,26 @@ impl Store { | ||||
|                 } | ||||
|  | ||||
|                 for (attr, content) in document.iter() { | ||||
|                     if !self.searchable_fields.contains(&attr) { | ||||
|                         continue; | ||||
|                     } | ||||
|                     if self.faceted_fields.contains_key(&attr) || self.searchable_fields.contains(&attr) { | ||||
|                         let value = serde_json::from_slice(content)?; | ||||
|  | ||||
|                     let value = serde_json::from_slice(content)?; | ||||
|                     let content = match json_to_string(value) { | ||||
|                         Some(content) => content, | ||||
|                         None => continue, | ||||
|                     }; | ||||
|                         if let Some(ftype) = self.faceted_fields.get(&attr) { | ||||
|                             todo!("parse facet field value") | ||||
|                         } | ||||
|  | ||||
|                     let tokens = simple_tokenizer(&content).filter_map(only_token); | ||||
|                     for (pos, token) in tokens.enumerate().take(MAX_POSITION) { | ||||
|                         let word = token.to_lowercase(); | ||||
|                         let position = (attr as usize * MAX_POSITION + pos) as u32; | ||||
|                         words_positions.entry(word).or_insert_with(SmallVec32::new).push(position); | ||||
|                         if self.searchable_fields.contains(&attr) { | ||||
|                             let content = match json_to_string(&value) { | ||||
|                                 Some(content) => content, | ||||
|                                 None => continue, | ||||
|                             }; | ||||
|  | ||||
|                             let tokens = simple_tokenizer(&content).filter_map(only_token); | ||||
|                             for (pos, token) in tokens.enumerate().take(MAX_POSITION) { | ||||
|                                 let word = token.to_lowercase(); | ||||
|                                 let position = (attr as usize * MAX_POSITION + pos) as u32; | ||||
|                                 words_positions.entry(word).or_insert_with(SmallVec32::new).push(position); | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|   | ||||
| @@ -1,11 +1,13 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::str::FromStr; | ||||
|  | ||||
| use anyhow::Context; | ||||
| use anyhow::{ensure, Context}; | ||||
| use grenad::CompressionType; | ||||
| use rayon::ThreadPool; | ||||
|  | ||||
| use crate::update::index_documents::{Transform, IndexDocumentsMethod}; | ||||
| use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::{Index, FieldsIdsMap}; | ||||
|  | ||||
| pub struct Settings<'a, 't, 'u, 'i> { | ||||
| @@ -24,7 +26,7 @@ pub struct Settings<'a, 't, 'u, 'i> { | ||||
|     // however if it is `Some(None)` it means that the user forced a reset of the setting. | ||||
|     searchable_fields: Option<Option<Vec<String>>>, | ||||
|     displayed_fields: Option<Option<Vec<String>>>, | ||||
|     faceted_fields: Option<Vec<String>>, | ||||
|     faceted_fields: Option<HashMap<String, String>>, | ||||
| } | ||||
|  | ||||
| impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
| @@ -62,25 +64,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|         self.displayed_fields = Some(Some(names)); | ||||
|     } | ||||
|  | ||||
|     pub fn set_faceted_fields(&mut self, names: Vec<String>) { | ||||
|         self.faceted_fields = Some(names); | ||||
|     pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) { | ||||
|         self.faceted_fields = Some(names_facet_types); | ||||
|     } | ||||
|  | ||||
|     pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()> | ||||
|     where | ||||
|         F: Fn(UpdateIndexingStep) + Sync | ||||
|     { | ||||
|         if let Some(fields_names) = self.faceted_fields { | ||||
|         if let Some(fields_names_facet_types) = self.faceted_fields { | ||||
|             let current_faceted_fields = self.index.faceted_fields(self.wtxn)?; | ||||
|             let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||
|  | ||||
|             let mut fields_ids_map = current_fields_ids_map.clone(); | ||||
|             let mut faceted_fields = HashMap::new(); | ||||
|             for name in fields_names { | ||||
|             for (name, sftype) in fields_names_facet_types { | ||||
|                 let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?; | ||||
|                 let id = fields_ids_map.insert(&name).context("field id limit reached")?; | ||||
|                 match current_faceted_fields.get(&id) { | ||||
|                     Some(ftype) => faceted_fields.insert(id, ftype.clone()), | ||||
|                     None => faceted_fields.insert(id, None), | ||||
|                     Some(pftype) => { | ||||
|                         ensure!(ftype == *pftype, "{} facet type changed from {} to {}", name, ftype, pftype); | ||||
|                         faceted_fields.insert(id, ftype) | ||||
|                     }, | ||||
|                     None => faceted_fields.insert(id, ftype), | ||||
|                 }; | ||||
|             } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user