mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Make sure that the indexing Store only index searchable fields
This commit is contained in:
		| @@ -1,4 +1,5 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::HashSet; | ||||
| use std::fs::File; | ||||
| use std::io::{self, Seek, SeekFrom}; | ||||
| use std::sync::mpsc::sync_channel; | ||||
| @@ -327,6 +328,11 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|             WordsPairsProximitiesDocids, | ||||
|         } | ||||
|  | ||||
|         let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? { | ||||
|             Some(fields) => fields.iter().copied().collect(), | ||||
|             None => fields_ids_map.iter().map(|(id, _name)| id).collect(), | ||||
|         }; | ||||
|  | ||||
|         let linked_hash_map_size = self.linked_hash_map_size; | ||||
|         let max_nb_chunks = self.max_nb_chunks; | ||||
|         let max_memory = self.max_memory; | ||||
| @@ -354,6 +360,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|                 .enumerate() | ||||
|                 .map(|(i, documents)| { | ||||
|                     let store = Store::new( | ||||
|                         searchable_fields.clone(), | ||||
|                         linked_hash_map_size, | ||||
|                         max_nb_chunks, | ||||
|                         max_memory_by_job, | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::{BTreeMap, HashMap}; | ||||
| use std::collections::{BTreeMap, HashMap, HashSet}; | ||||
| use std::convert::{TryFrom, TryInto}; | ||||
| use std::fs::File; | ||||
| use std::iter::FromIterator; | ||||
| @@ -37,6 +37,9 @@ pub struct Readers { | ||||
| } | ||||
|  | ||||
| pub struct Store { | ||||
|     // Indexing parameters | ||||
|     searchable_fields: HashSet<u8>, | ||||
|     // Caches | ||||
|     word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>, | ||||
|     word_docids_limit: usize, | ||||
|     words_pairs_proximities_docids: LinkedHashMap<(SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap>, | ||||
| @@ -56,6 +59,7 @@ pub struct Store { | ||||
|  | ||||
| impl Store { | ||||
|     pub fn new( | ||||
|         searchable_fields: HashSet<u8>, | ||||
|         linked_hash_map_size: Option<usize>, | ||||
|         max_nb_chunks: Option<usize>, | ||||
|         max_memory: Option<usize>, | ||||
| @@ -101,18 +105,22 @@ impl Store { | ||||
|         })?; | ||||
|  | ||||
|         Ok(Store { | ||||
|             // Indexing parameters. | ||||
|             searchable_fields, | ||||
|             // Caches | ||||
|             word_docids: LinkedHashMap::with_capacity(linked_hash_map_size), | ||||
|             word_docids_limit: linked_hash_map_size, | ||||
|             words_pairs_proximities_docids: LinkedHashMap::with_capacity(linked_hash_map_size), | ||||
|             words_pairs_proximities_docids_limit: linked_hash_map_size, | ||||
|             // MTBL parameters | ||||
|             chunk_compression_type, | ||||
|             chunk_compression_level, | ||||
|             chunk_fusing_shrink_size, | ||||
|  | ||||
|             // MTBL sorters | ||||
|             main_sorter, | ||||
|             word_docids_sorter, | ||||
|             words_pairs_proximities_docids_sorter, | ||||
|  | ||||
|             // MTBL writers | ||||
|             docid_word_positions_writer, | ||||
|             documents_writer, | ||||
|         }) | ||||
| @@ -309,23 +317,25 @@ impl Store { | ||||
|                 } | ||||
|  | ||||
|                 for (attr, content) in document.iter() { | ||||
|                     use serde_json::Value; | ||||
|                     let content: Cow<str> = match serde_json::from_slice(content) { | ||||
|                         Ok(string) => string, | ||||
|                         Err(_) => match serde_json::from_slice(content)? { | ||||
|                             Value::Null => continue, | ||||
|                             Value::Bool(boolean) => Cow::Owned(boolean.to_string()), | ||||
|                             Value::Number(number) => Cow::Owned(number.to_string()), | ||||
|                             Value::String(string) => Cow::Owned(string), | ||||
|                             Value::Array(_array) => continue, | ||||
|                             Value::Object(_object) => continue, | ||||
|                         } | ||||
|                     }; | ||||
|                     if self.searchable_fields.contains(&attr) { | ||||
|                         use serde_json::Value; | ||||
|                         let content: Cow<str> = match serde_json::from_slice(content) { | ||||
|                             Ok(string) => string, | ||||
|                             Err(_) => match serde_json::from_slice(content)? { | ||||
|                                 Value::Null => continue, | ||||
|                                 Value::Bool(boolean) => Cow::Owned(boolean.to_string()), | ||||
|                                 Value::Number(number) => Cow::Owned(number.to_string()), | ||||
|                                 Value::String(string) => Cow::Owned(string), | ||||
|                                 Value::Array(_array) => continue, | ||||
|                                 Value::Object(_object) => continue, | ||||
|                             } | ||||
|                         }; | ||||
|  | ||||
|                     for (pos, token) in simple_tokenizer(&content).filter_map(only_token).enumerate().take(MAX_POSITION) { | ||||
|                         let word = token.to_lowercase(); | ||||
|                         let position = (attr as usize * MAX_POSITION + pos) as u32; | ||||
|                         words_positions.entry(word).or_insert_with(SmallVec32::new).push(position); | ||||
|                         for (pos, token) in simple_tokenizer(&content).filter_map(only_token).enumerate().take(MAX_POSITION) { | ||||
|                             let word = token.to_lowercase(); | ||||
|                             let position = (attr as usize * MAX_POSITION + pos) as u32; | ||||
|                             words_positions.entry(word).or_insert_with(SmallVec32::new).push(position); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|   | ||||
| @@ -42,6 +42,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn reset_searchable_fields(&mut self) { | ||||
|         self.searchable_fields = Some(None); | ||||
|     } | ||||
|  | ||||
|     pub fn set_searchable_fields(&mut self, names: Vec<String>) { | ||||
|         self.searchable_fields = Some(Some(names)); | ||||
|     } | ||||
|  | ||||
|     pub fn reset_displayed_fields(&mut self) { | ||||
|         self.displayed_fields = Some(None); | ||||
|     } | ||||
| @@ -56,7 +64,6 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|     { | ||||
|         // Check that the searchable attributes have been specified. | ||||
|         if let Some(value) = self.searchable_fields { | ||||
|             let current_searchable_fields = self.index.searchable_fields(self.wtxn)?; | ||||
|             let current_displayed_fields = self.index.displayed_fields(self.wtxn)?; | ||||
|             let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||
|  | ||||
| @@ -93,7 +100,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|                 }, | ||||
|                 None => ( | ||||
|                     current_fields_ids_map.clone(), | ||||
|                     current_searchable_fields.map(ToOwned::to_owned), | ||||
|                     None, | ||||
|                     current_displayed_fields.map(ToOwned::to_owned), | ||||
|                 ), | ||||
|             }; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user