mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Merge pull request #67 from meilisearch/fix-settings
Fix displayed and searchable attributes
This commit is contained in:
		
							
								
								
									
										481
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										481
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										632
									
								
								http-ui/Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										632
									
								
								http-ui/Cargo.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,4 +1,3 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::{HashMap, HashSet}; | ||||
| use std::fmt::Display; | ||||
| use std::fs::{File, create_dir_all}; | ||||
| @@ -654,13 +653,13 @@ async fn main() -> anyhow::Result<()> { | ||||
|  | ||||
|             let mut documents = Vec::new(); | ||||
|             let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|             let displayed_fields = match index.displayed_fields(&rtxn).unwrap() { | ||||
|                 Some(fields) => Cow::Borrowed(fields), | ||||
|                 None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()), | ||||
|             let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() { | ||||
|                 Some(fields) => fields, | ||||
|                 None => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||
|             }; | ||||
|             let attributes_to_highlight = match index.searchable_fields(&rtxn).unwrap() { | ||||
|                 Some(fields) => fields.iter().flat_map(|id| fields_ids_map.name(*id)).map(ToOwned::to_owned).collect(), | ||||
|                 None => fields_ids_map.iter().map(|(_, name)| name).map(ToOwned::to_owned).collect(), | ||||
|                 Some(fields) => fields.into_iter().map(String::from).collect(), | ||||
|                 None => fields_ids_map.iter().map(|(_, name)| name).map(String::from).collect(), | ||||
|             }; | ||||
|  | ||||
|             let stop_words = fst::Set::default(); | ||||
| @@ -690,9 +689,9 @@ async fn main() -> anyhow::Result<()> { | ||||
|  | ||||
|             let external_documents_ids = index.external_documents_ids(&rtxn).unwrap(); | ||||
|             let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|             let displayed_fields = match index.displayed_fields(&rtxn).unwrap() { | ||||
|                 Some(fields) => Cow::Borrowed(fields), | ||||
|                 None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()), | ||||
|             let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() { | ||||
|                 Some(fields) => fields, | ||||
|                 None => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||
|             }; | ||||
|  | ||||
|             match external_documents_ids.get(&id) { | ||||
|   | ||||
| @@ -1,10 +1,12 @@ | ||||
| use crate::{FieldsIdsMap, FieldId}; | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use anyhow::{Context, bail}; | ||||
| use regex::Regex; | ||||
| use serde::{Serialize, Deserialize}; | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq, Eq)] | ||||
| use crate::facet::FacetType; | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] | ||||
| pub enum Criterion { | ||||
|     /// Sorted by increasing number of typos. | ||||
|     Typo, | ||||
| @@ -21,13 +23,13 @@ pub enum Criterion { | ||||
|     /// Sorted by the similarity of the matched words with the query words. | ||||
|     Exactness, | ||||
|     /// Sorted by the increasing value of the field specified. | ||||
|     Asc(FieldId), | ||||
|     Asc(String), | ||||
|     /// Sorted by the decreasing value of the field specified. | ||||
|     Desc(FieldId), | ||||
|     Desc(String), | ||||
| } | ||||
|  | ||||
| impl Criterion { | ||||
|     pub fn from_str(fields_ids_map: &mut FieldsIdsMap, txt: &str) -> anyhow::Result<Criterion> { | ||||
|     pub fn from_str(faceted_attributes: &HashMap<String, FacetType>, txt: &str) -> anyhow::Result<Criterion> { | ||||
|         match txt { | ||||
|             "typo" => Ok(Criterion::Typo), | ||||
|             "words" => Ok(Criterion::Words), | ||||
| @@ -40,22 +42,15 @@ impl Criterion { | ||||
|                 let caps = re.captures(text).with_context(|| format!("unknown criterion name: {}", text))?; | ||||
|                 let order = caps.get(1).unwrap().as_str(); | ||||
|                 let field_name = caps.get(2).unwrap().as_str(); | ||||
|                 let field_id = fields_ids_map.insert(field_name).context("field id limit reached")?; | ||||
|                 faceted_attributes.get(field_name).with_context(|| format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name))?; | ||||
|                 match order { | ||||
|                     "asc" => Ok(Criterion::Asc(field_id)), | ||||
|                     "desc" => Ok(Criterion::Desc(field_id)), | ||||
|                     "asc" => Ok(Criterion::Asc(field_name.to_string())), | ||||
|                     "desc" => Ok(Criterion::Desc(field_name.to_string())), | ||||
|                     otherwise => bail!("unknown criterion name: {}", otherwise), | ||||
|                 } | ||||
|             }, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn field_id(&self) -> Option<FieldId> { | ||||
|         match *self { | ||||
|             Criterion::Asc(fid) | Criterion::Desc(fid) => Some(fid), | ||||
|             _ => None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn default_criteria() -> Vec<Criterion> { | ||||
|   | ||||
							
								
								
									
										89
									
								
								src/index.rs
									
									
									
									
									
								
							
							
						
						
									
										89
									
								
								src/index.rs
									
									
									
									
									
								
							| @@ -112,8 +112,8 @@ impl Index { | ||||
|     /* primary key */ | ||||
|  | ||||
|     /// Writes the documents primary key, this is the field name that is used to store the id. | ||||
|     pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: FieldId) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, OwnedType<FieldId>>(wtxn, PRIMARY_KEY_KEY, &primary_key) | ||||
|     pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key) | ||||
|     } | ||||
|  | ||||
|     /// Deletes the primary key of the documents, this can be done to reset indexes settings. | ||||
| @@ -122,8 +122,8 @@ impl Index { | ||||
|     } | ||||
|  | ||||
|     /// Returns the documents primary key, `None` if it hasn't been defined. | ||||
|     pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result<Option<FieldId>> { | ||||
|         self.main.get::<_, Str, OwnedType<FieldId>>(rtxn, PRIMARY_KEY_KEY) | ||||
|     pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> { | ||||
|         self.main.get::<_, Str, Str>(rtxn, PRIMARY_KEY_KEY) | ||||
|     } | ||||
|  | ||||
|     /* external documents ids */ | ||||
| @@ -175,10 +175,10 @@ impl Index { | ||||
|  | ||||
|     /* displayed fields */ | ||||
|  | ||||
|     /// Writes the fields ids that must be displayed in the defined order. | ||||
|     /// Writes the fields that must be displayed in the defined order. | ||||
|     /// There must be not be any duplicate field id. | ||||
|     pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, ByteSlice>(wtxn, DISPLAYED_FIELDS_KEY, fields) | ||||
|     pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, DISPLAYED_FIELDS_KEY, &fields) | ||||
|     } | ||||
|  | ||||
|     /// Deletes the displayed fields ids, this will make the engine to display | ||||
| @@ -187,18 +187,27 @@ impl Index { | ||||
|         self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Returns the displayed fields ids in the order they must be returned. If it returns | ||||
|     /// `None` it means that all the attributes are displayed in the order of the `FieldsIdsMap`. | ||||
|     pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> { | ||||
|         self.main.get::<_, Str, ByteSlice>(rtxn, DISPLAYED_FIELDS_KEY) | ||||
|     /// Returns the displayed fields in the order they were set by the user. If it returns | ||||
|     /// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`. | ||||
|     pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> { | ||||
|         self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, DISPLAYED_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> { | ||||
|         let fields_ids_map = self.fields_ids_map(rtxn)?; | ||||
|         let ids = self.displayed_fields(rtxn)? | ||||
|             .map(|fields| fields | ||||
|                 .into_iter() | ||||
|                 .map(|name| fields_ids_map.id(name).expect("Field not found")) | ||||
|                 .collect::<Vec<_>>()); | ||||
|         Ok(ids) | ||||
|     } | ||||
|  | ||||
|     /* searchable fields */ | ||||
|  | ||||
|     /// Writes the searchable fields, when this list is specified, only these are indexed. | ||||
|     pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> { | ||||
|         assert!(fields.windows(2).all(|win| win[0] < win[1])); // is sorted | ||||
|         self.main.put::<_, Str, ByteSlice>(wtxn, SEARCHABLE_FIELDS_KEY, fields) | ||||
|     pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, SEARCHABLE_FIELDS_KEY, &fields) | ||||
|     } | ||||
|  | ||||
|     /// Deletes the searchable fields, when no fields are specified, all fields are indexed. | ||||
| @@ -206,17 +215,36 @@ impl Index { | ||||
|         self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Returns the searchable fields ids, those are the fields that are indexed, | ||||
|     /// Returns the searchable fields, those are the fields that are indexed, | ||||
|     /// if the searchable fields aren't there it means that **all** the fields are indexed. | ||||
|     pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> { | ||||
|         self.main.get::<_, Str, ByteSlice>(rtxn, SEARCHABLE_FIELDS_KEY) | ||||
|     pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> { | ||||
|         self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, SEARCHABLE_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Identical to `searchable_fields`, but returns the ids instead. | ||||
|     pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> { | ||||
|         match self.searchable_fields(rtxn)? { | ||||
|             Some(names) => { | ||||
|                 let fields_map = self.fields_ids_map(rtxn)?; | ||||
|                 let mut ids = Vec::new(); | ||||
|                 for name in names { | ||||
|                     let id = fields_map | ||||
|                         .id(name) | ||||
|                         .ok_or_else(|| format!("field id map must contain {:?}", name)) | ||||
|                         .expect("corrupted data: "); | ||||
|                     ids.push(id); | ||||
|                 } | ||||
|                 Ok(Some(ids)) | ||||
|             } | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /* faceted fields */ | ||||
|  | ||||
|     /// Writes the facet fields ids associated with their facet type or `None` if | ||||
|     /// Writes the facet fields associated with their facet type or `None` if | ||||
|     /// the facet type is currently unknown. | ||||
|     pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<FieldId, FacetType>) -> heed::Result<()> { | ||||
|     pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<String, FacetType>) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types) | ||||
|     } | ||||
|  | ||||
| @@ -225,9 +253,26 @@ impl Index { | ||||
|         self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Returns the facet fields ids associated with their facet type. | ||||
|     pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> { | ||||
|         Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) | ||||
|     /// Returns the facet fields names associated with their facet type. | ||||
|     pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, FacetType>> { | ||||
|         Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
|     /// Same as `faceted_fields`, but returns ids instead. | ||||
|     pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> { | ||||
|         let faceted_fields = self.faceted_fields(rtxn)?; | ||||
|         let fields_ids_map = self.fields_ids_map(rtxn)?; | ||||
|         let faceted_fields = faceted_fields | ||||
|             .iter() | ||||
|             .map(|(k, v)| { | ||||
|                 let kid = fields_ids_map | ||||
|                     .id(k) | ||||
|                     .ok_or_else(|| format!("{:?} should be present in the field id map", k)) | ||||
|                     .expect("corrupted data: "); | ||||
|                 (kid, *v) | ||||
|             }) | ||||
|             .collect(); | ||||
|         Ok(faceted_fields) | ||||
|     } | ||||
|  | ||||
|     /* faceted documents ids */ | ||||
|   | ||||
| @@ -148,7 +148,7 @@ impl FacetCondition { | ||||
|     ) -> anyhow::Result<FacetCondition> | ||||
|     { | ||||
|         let fields_ids_map = index.fields_ids_map(rtxn)?; | ||||
|         let faceted_fields = index.faceted_fields(rtxn)?; | ||||
|         let faceted_fields = index.faceted_fields_ids(rtxn)?; | ||||
|         let lexed = FilterParser::parse(Rule::prgm, expression)?; | ||||
|         FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed) | ||||
|     } | ||||
| @@ -552,15 +552,15 @@ mod tests { | ||||
|         // Test that the facet condition is correctly generated. | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let condition = FacetCondition::from_str(&rtxn, &index, "channel = ponce").unwrap(); | ||||
|         let expected = OperatorString(1, FacetStringOperator::equal("Ponce")); | ||||
|         let expected = OperatorString(0, FacetStringOperator::equal("Ponce")); | ||||
|         assert_eq!(condition, expected); | ||||
|  | ||||
|         let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap(); | ||||
|         let expected = OperatorString(1, FacetStringOperator::not_equal("ponce")); | ||||
|         let expected = OperatorString(0, FacetStringOperator::not_equal("ponce")); | ||||
|         assert_eq!(condition, expected); | ||||
|  | ||||
|         let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); | ||||
|         let expected = OperatorString(1, FacetStringOperator::not_equal("ponce")); | ||||
|         let expected = OperatorString(0, FacetStringOperator::not_equal("ponce")); | ||||
|         assert_eq!(condition, expected); | ||||
|     } | ||||
|  | ||||
| @@ -581,13 +581,13 @@ mod tests { | ||||
|         // Test that the facet condition is correctly generated. | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); | ||||
|         let expected = OperatorI64(1, Between(22, 44)); | ||||
|         let expected = OperatorI64(0, Between(22, 44)); | ||||
|         assert_eq!(condition, expected); | ||||
|  | ||||
|         let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); | ||||
|         let expected = Or( | ||||
|             Box::new(OperatorI64(1, LowerThan(22))), | ||||
|             Box::new(OperatorI64(1, GreaterThan(44))), | ||||
|             Box::new(OperatorI64(0, LowerThan(22))), | ||||
|             Box::new(OperatorI64(0, GreaterThan(44))), | ||||
|         ); | ||||
|         assert_eq!(condition, expected); | ||||
|     } | ||||
|   | ||||
| @@ -285,9 +285,13 @@ impl<'a> Search<'a> { | ||||
|                 } | ||||
|             }).next(); | ||||
|             match result { | ||||
|                 Some((fid, is_ascending)) => { | ||||
|                     let faceted_fields = self.index.faceted_fields(self.rtxn)?; | ||||
|                     let ftype = *faceted_fields.get(&fid).context("unknown field id")?; | ||||
|                 Some((attr_name, is_ascending)) => { | ||||
|                     let field_id_map = self.index.fields_ids_map(self.rtxn)?; | ||||
|                     let fid = field_id_map.id(&attr_name).with_context(|| format!("unknown field: {:?}", attr_name))?; | ||||
|                     let faceted_fields = self.index.faceted_fields_ids(self.rtxn)?; | ||||
|                     let ftype = *faceted_fields.get(&fid) | ||||
|                         .with_context(|| format!("{:?} not found in the faceted fields.", attr_name)) | ||||
|                         .expect("corrupted data: "); | ||||
|                     Some((fid, ftype, is_ascending)) | ||||
|                 }, | ||||
|                 None => None, | ||||
|   | ||||
| @@ -342,7 +342,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho | ||||
|             if heap.len() > limit { heap.pop(); } | ||||
|         } | ||||
|  | ||||
|         let faceted_fields = index.faceted_fields(rtxn)?; | ||||
|         let faceted_fields = index.faceted_fields_ids(rtxn)?; | ||||
|         let fields_ids_map = index.fields_ids_map(rtxn)?; | ||||
|         for (field_id, field_type) in faceted_fields { | ||||
|             let facet_name = fields_ids_map.name(field_id).unwrap(); | ||||
| @@ -413,7 +413,7 @@ fn words_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, words: Vec<Strin | ||||
|  | ||||
| fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_name: String) -> anyhow::Result<()> { | ||||
|     let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||
|     let faceted_fields = index.faceted_fields(&rtxn)?; | ||||
|     let faceted_fields = index.faceted_fields_ids(&rtxn)?; | ||||
|  | ||||
|     let field_id = fields_ids_map.id(&field_name) | ||||
|         .with_context(|| format!("field {} not found", field_name))?; | ||||
| @@ -451,7 +451,7 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam | ||||
|  | ||||
| fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> { | ||||
|     let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||
|     let faceted_fields = index.faceted_fields(&rtxn)?; | ||||
|     let faceted_fields = index.faceted_fields_ids(&rtxn)?; | ||||
|  | ||||
|     let field_id = fields_ids_map.id(&field_name) | ||||
|         .with_context(|| format!("field {} not found", field_name))?; | ||||
|   | ||||
| @@ -1,4 +1,3 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::io::{self, BufRead, Write}; | ||||
| use std::iter::once; | ||||
| use std::path::PathBuf; | ||||
| @@ -47,9 +46,9 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { | ||||
|     let index = Index::new(options, &opt.database)?; | ||||
|     let rtxn = index.read_txn()?; | ||||
|     let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||
|     let displayed_fields = match index.displayed_fields(&rtxn)? { | ||||
|         Some(fields) => Cow::Borrowed(fields), | ||||
|         None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()), | ||||
|     let displayed_fields = match index.displayed_fields_ids(&rtxn)? { | ||||
|         Some(fields) => fields, | ||||
|         None => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||
|     }; | ||||
|  | ||||
|     let stdin = io::stdin(); | ||||
|   | ||||
| @@ -25,7 +25,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | ||||
|  | ||||
|         // We retrieve the number of documents ids that we are deleting. | ||||
|         let number_of_documents = self.index.number_of_documents(self.wtxn)?; | ||||
|         let faceted_fields = self.index.faceted_fields(self.wtxn)?; | ||||
|         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; | ||||
|  | ||||
|         // We clean some of the main engine datastructures. | ||||
|         self.index.put_words_fst(self.wtxn, &fst::Set::default())?; | ||||
|   | ||||
| @@ -188,7 +188,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | ||||
|         drop(iter); | ||||
|  | ||||
|         // Remove the documents ids from the faceted documents ids. | ||||
|         let faceted_fields = self.index.faceted_fields(self.wtxn)?; | ||||
|         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; | ||||
|         for (field_id, facet_type) in faceted_fields { | ||||
|             let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?; | ||||
|             docids.difference_with(&self.documents_ids); | ||||
|   | ||||
| @@ -51,7 +51,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { | ||||
|  | ||||
|     pub fn execute(self) -> anyhow::Result<()> { | ||||
|         // We get the faceted fields to be able to create the facet levels. | ||||
|         let faceted_fields = self.index.faceted_fields(self.wtxn)?; | ||||
|         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; | ||||
|  | ||||
|         debug!("Computing and writing the facet values levels docids into LMDB on disk..."); | ||||
|         for (field_id, facet_type) in faceted_fields { | ||||
|   | ||||
| @@ -338,8 +338,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|             FacetLevel0ValuesDocids, | ||||
|         } | ||||
|  | ||||
|         let faceted_fields = self.index.faceted_fields(self.wtxn)?; | ||||
|         let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? { | ||||
|         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; | ||||
|         let searchable_fields: HashSet<_> = match self.index.searchable_fields_ids(self.wtxn)? { | ||||
|             Some(fields) => fields.iter().copied().collect(), | ||||
|             None => fields_ids_map.iter().map(|(id, _name)| id).collect(), | ||||
|         }; | ||||
| @@ -485,7 +485,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|         self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; | ||||
|  | ||||
|         // We write the primary key field id into the main database | ||||
|         self.index.put_primary_key(self.wtxn, primary_key)?; | ||||
|         self.index.put_primary_key(self.wtxn, &primary_key)?; | ||||
|  | ||||
|         // We write the external documents ids into the main database. | ||||
|         self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; | ||||
|   | ||||
| @@ -10,13 +10,15 @@ use log::info; | ||||
| use roaring::RoaringBitmap; | ||||
| use serde_json::{Map, Value}; | ||||
|  | ||||
| use crate::{BEU32, MergeFn, Index, FieldId, FieldsIdsMap, ExternalDocumentsIds}; | ||||
| use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId}; | ||||
| use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; | ||||
| use super::merge_function::merge_two_obkvs; | ||||
| use super::{create_writer, create_sorter, IndexDocumentsMethod}; | ||||
|  | ||||
| const DEFAULT_PRIMARY_KEY_NAME: &str = "id"; | ||||
|  | ||||
| pub struct TransformOutput { | ||||
|     pub primary_key: FieldId, | ||||
|     pub primary_key: String, | ||||
|     pub fields_ids_map: FieldsIdsMap, | ||||
|     pub external_documents_ids: ExternalDocumentsIds<'static>, | ||||
|     pub new_documents_ids: RoaringBitmap, | ||||
| @@ -73,7 +75,6 @@ impl Transform<'_, '_> { | ||||
|     { | ||||
|         let mut fields_ids_map = self.index.fields_ids_map(self.rtxn)?; | ||||
|         let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap(); | ||||
|         let primary_key = self.index.primary_key(self.rtxn)?; | ||||
|  | ||||
|         // Deserialize the whole batch of documents in memory. | ||||
|         let mut documents: Peekable<Box<dyn Iterator<Item=serde_json::Result<Map<String, Value>>>>> = if is_stream { | ||||
| @@ -88,27 +89,15 @@ impl Transform<'_, '_> { | ||||
|         }; | ||||
|  | ||||
|         // We extract the primary key from the first document in | ||||
|         // the batch if it hasn't already been defined in the index. | ||||
|         let primary_key = match primary_key { | ||||
|             Some(primary_key) => primary_key, | ||||
|             None => { | ||||
|                 // We ignore a potential error here as we can't early return it now, | ||||
|                 // the peek method gives us only a reference on the next item, | ||||
|                 // we will eventually return it in the iteration just after. | ||||
|                 let first = documents.peek().and_then(|r| r.as_ref().ok()); | ||||
|                 match first.and_then(|doc| doc.keys().find(|k| k.contains("id"))) { | ||||
|                     Some(key) => fields_ids_map.insert(&key).context("field id limit reached")?, | ||||
|                     None => { | ||||
|                         if !self.autogenerate_docids { | ||||
|                             // If there is no primary key in the current document batch, we must | ||||
|                             // return an error and not automatically generate any document id. | ||||
|                             return Err(anyhow!("missing primary key")) | ||||
|                         } | ||||
|                         fields_ids_map.insert("id").context("field id limit reached")? | ||||
|                     }, | ||||
|                 } | ||||
|             }, | ||||
|         }; | ||||
|         // the batch if it hasn't already been defined in the index | ||||
|         let first = documents.peek().and_then(|r| r.as_ref().ok()); | ||||
|         let alternative_name = first.and_then(|doc| doc.keys().find(|k| k.contains(DEFAULT_PRIMARY_KEY_NAME)).cloned()); | ||||
|         let (primary_key_id, primary_key) = compute_primary_key_pair( | ||||
|             self.index.primary_key(self.rtxn)?, | ||||
|             &mut fields_ids_map, | ||||
|             alternative_name, | ||||
|             self.autogenerate_docids | ||||
|         )?; | ||||
|  | ||||
|         if documents.peek().is_none() { | ||||
|             return Ok(TransformOutput { | ||||
| @@ -122,13 +111,6 @@ impl Transform<'_, '_> { | ||||
|             }); | ||||
|         } | ||||
|  | ||||
|         // Get the primary key field name now, this way we will | ||||
|         // be able to get the value in the JSON Map document. | ||||
|         let primary_key_name = fields_ids_map | ||||
|             .name(primary_key) | ||||
|             .expect("found the primary key name") | ||||
|             .to_owned(); | ||||
|  | ||||
|         // We must choose the appropriate merge function for when two or more documents | ||||
|         // with the same user id must be merged or fully replaced in the same batch. | ||||
|         let merge_function = match self.index_documents_method { | ||||
| @@ -170,7 +152,7 @@ impl Transform<'_, '_> { | ||||
|  | ||||
|             // We retrieve the user id from the document based on the primary key name, | ||||
|             // if the document id isn't present we generate a uuid. | ||||
|             let external_id = match document.get(&primary_key_name) { | ||||
|             let external_id = match document.get(&primary_key) { | ||||
|                 Some(value) => match value { | ||||
|                     Value::String(string) => Cow::Borrowed(string.as_str()), | ||||
|                     Value::Number(number) => Cow::Owned(number.to_string()), | ||||
| @@ -196,7 +178,7 @@ impl Transform<'_, '_> { | ||||
|                     serde_json::to_writer(&mut json_buffer, value)?; | ||||
|                     writer.insert(field_id, &json_buffer)?; | ||||
|                 } | ||||
|                 else if field_id == primary_key { | ||||
|                 else if field_id == primary_key_id { | ||||
|                     // We validate the document id [a-zA-Z0-9\-_]. | ||||
|                     let external_id = match validate_document_id(&external_id) { | ||||
|                         Some(valid) => valid, | ||||
| @@ -240,42 +222,37 @@ impl Transform<'_, '_> { | ||||
|  | ||||
|         let mut csv = csv::Reader::from_reader(reader); | ||||
|         let headers = csv.headers()?; | ||||
|         let primary_key = self.index.primary_key(self.rtxn)?; | ||||
|  | ||||
|         // Generate the new fields ids based on the current fields ids and this CSV headers. | ||||
|         let mut fields_ids = Vec::new(); | ||||
|         // Generate the new fields ids based on the current fields ids and this CSV headers. | ||||
|         for (i, header) in headers.iter().enumerate() { | ||||
|             let id = fields_ids_map.insert(header).context("field id limit reached)")?; | ||||
|             fields_ids.push((id, i)); | ||||
|         } | ||||
|  | ||||
|         // Extract the position of the primary key in the current headers, None if not found. | ||||
|         let external_id_pos = match primary_key { | ||||
|         let primary_key_pos = match self.index.primary_key(self.rtxn)? { | ||||
|             Some(primary_key) => { | ||||
|                 // Te primary key have is known so we must find the position in the CSV headers. | ||||
|                 let name = fields_ids_map.name(primary_key).expect("found the primary key name"); | ||||
|                 headers.iter().position(|h| h == name) | ||||
|                // The primary key is known so we must find the position in the CSV headers. | ||||
|                headers.iter().position(|h| h == primary_key) | ||||
|             }, | ||||
|             None => headers.iter().position(|h| h.contains("id")), | ||||
|         }; | ||||
|  | ||||
|         // Returns the field id in the fileds ids map, create an "id" field | ||||
|         // Returns the field id in the fields ids map, create an "id" field | ||||
|         // in case it is not in the current headers. | ||||
|         let primary_key_field_id = match external_id_pos { | ||||
|             Some(pos) => fields_ids_map.id(&headers[pos]).expect("found the primary key"), | ||||
|             None => { | ||||
|                 if !self.autogenerate_docids { | ||||
|                     // If there is no primary key in the current document batch, we must | ||||
|                     // return an error and not automatically generate any document id. | ||||
|                     return Err(anyhow!("missing primary key")) | ||||
|                 } | ||||
|                 let field_id = fields_ids_map.insert("id").context("field id limit reached")?; | ||||
|                 // We make sure to add the primary key field id to the fields ids, | ||||
|                 // this way it is added to the obks. | ||||
|                 fields_ids.push((field_id, usize::max_value())); | ||||
|                 field_id | ||||
|             }, | ||||
|         }; | ||||
|         let alternative_name = primary_key_pos.map(|pos| headers[pos].to_string()); | ||||
|         let (primary_key_id, _) = compute_primary_key_pair( | ||||
|             self.index.primary_key(self.rtxn)?, | ||||
|             &mut fields_ids_map, | ||||
|             alternative_name, | ||||
|             self.autogenerate_docids | ||||
|         )?; | ||||
|  | ||||
|         // The primary key field is not present in the header, so we need to create it. | ||||
|         if primary_key_pos.is_none() { | ||||
|             fields_ids.push((primary_key_id, usize::max_value())); | ||||
|         } | ||||
|  | ||||
|         // We sort the fields ids by the fields ids map id, this way we are sure to iterate over | ||||
|         // the records fields in the fields ids map order and correctly generate the obkv. | ||||
| @@ -310,7 +287,7 @@ impl Transform<'_, '_> { | ||||
|             } | ||||
|  | ||||
|             // We extract the user id if we know where it is or generate an UUID V4 otherwise. | ||||
|             let external_id = match external_id_pos { | ||||
|             let external_id = match primary_key_pos { | ||||
|                 Some(pos) => { | ||||
|                     let external_id = &record[pos]; | ||||
|                     // We validate the document id [a-zA-Z0-9\-_]. | ||||
| @@ -326,7 +303,7 @@ impl Transform<'_, '_> { | ||||
|             // we return the generated document id instead of the record field. | ||||
|             let iter = fields_ids.iter() | ||||
|                 .map(|(fi, i)| { | ||||
|                     let field = if *fi == primary_key_field_id { external_id } else { &record[*i] }; | ||||
|                     let field = if *fi == primary_key_id { external_id } else { &record[*i] }; | ||||
|                     (fi, field) | ||||
|                 }); | ||||
|  | ||||
| @@ -349,9 +326,13 @@ impl Transform<'_, '_> { | ||||
|  | ||||
|         // Now that we have a valid sorter that contains the user id and the obkv we | ||||
|         // give it to the last transforming function which returns the TransformOutput. | ||||
|         let primary_key_name = fields_ids_map | ||||
|             .name(primary_key_id) | ||||
|             .map(String::from) | ||||
|             .expect("Primary key must be present in fields id map"); | ||||
|         self.output_from_sorter( | ||||
|             sorter, | ||||
|             primary_key_field_id, | ||||
|             primary_key_name, | ||||
|             fields_ids_map, | ||||
|             documents_count, | ||||
|             external_documents_ids, | ||||
| @@ -365,7 +346,7 @@ impl Transform<'_, '_> { | ||||
|     fn output_from_sorter<F>( | ||||
|         self, | ||||
|         sorter: grenad::Sorter<MergeFn>, | ||||
|         primary_key: FieldId, | ||||
|         primary_key: String, | ||||
|         fields_ids_map: FieldsIdsMap, | ||||
|         approximate_number_of_documents: usize, | ||||
|         mut external_documents_ids: ExternalDocumentsIds<'_>, | ||||
| @@ -477,11 +458,11 @@ impl Transform<'_, '_> { | ||||
|     // TODO this can be done in parallel by using the rayon `ThreadPool`. | ||||
|     pub fn remap_index_documents( | ||||
|         self, | ||||
|         primary_key: FieldId, | ||||
|         fields_ids_map: FieldsIdsMap, | ||||
|         primary_key: String, | ||||
|         old_fields_ids_map: FieldsIdsMap, | ||||
|         new_fields_ids_map: FieldsIdsMap, | ||||
|     ) -> anyhow::Result<TransformOutput> | ||||
|     { | ||||
|         let current_fields_ids_map = self.index.fields_ids_map(self.rtxn)?; | ||||
|         let external_documents_ids = self.index.external_documents_ids(self.rtxn)?; | ||||
|         let documents_ids = self.index.documents_ids(self.rtxn)?; | ||||
|         let documents_count = documents_ids.len() as usize; | ||||
| @@ -499,8 +480,8 @@ impl Transform<'_, '_> { | ||||
|             let mut obkv_writer = obkv::KvWriter::new(&mut obkv_buffer); | ||||
|  | ||||
|             // We iterate over the new `FieldsIdsMap` ids in order and construct the new obkv. | ||||
|             for (id, name) in fields_ids_map.iter() { | ||||
|                 if let Some(val) = current_fields_ids_map.id(name).and_then(|id| obkv.get(id)) { | ||||
|             for (id, name) in new_fields_ids_map.iter() { | ||||
|                 if let Some(val) = old_fields_ids_map.id(name).and_then(|id| obkv.get(id)) { | ||||
|                     obkv_writer.insert(id, val)?; | ||||
|                 } | ||||
|             } | ||||
| @@ -516,7 +497,7 @@ impl Transform<'_, '_> { | ||||
|  | ||||
|         Ok(TransformOutput { | ||||
|             primary_key, | ||||
|             fields_ids_map, | ||||
|             fields_ids_map: new_fields_ids_map, | ||||
|             external_documents_ids: external_documents_ids.into_static(), | ||||
|             new_documents_ids: documents_ids, | ||||
|             replaced_documents_ids: RoaringBitmap::default(), | ||||
| @@ -526,6 +507,42 @@ impl Transform<'_, '_> { | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Given an optional primary key and an optional alternative name, returns the (field_id, attr_name) | ||||
| /// for the primary key according to the following rules: | ||||
| /// - if primary_key is `Some`, returns the id and the name, else | ||||
| /// - if alternative_name is Some, adds alternative to the fields_ids_map, and returns the pair, else | ||||
| /// - if autogenerate_docids is true, insert the default id value in the field ids map ("id") and | ||||
| /// returns the pair, else | ||||
| /// - returns an error. | ||||
| fn compute_primary_key_pair( | ||||
|     primary_key: Option<&str>, | ||||
|     fields_ids_map: &mut FieldsIdsMap, | ||||
|     alternative_name: Option<String>, | ||||
|     autogenerate_docids: bool, | ||||
| ) -> anyhow::Result<(FieldId, String)> { | ||||
|     match primary_key { | ||||
|         Some(primary_key) => { | ||||
|             let id = fields_ids_map.id(primary_key).expect("primary key must be present in the fields id map"); | ||||
|             Ok((id, primary_key.to_string())) | ||||
|         } | ||||
|         None => { | ||||
|             let name = match alternative_name { | ||||
|                 Some(key) => key, | ||||
|                 None => { | ||||
|                     if !autogenerate_docids { | ||||
|                         // If there is no primary key in the current document batch, we must | ||||
|                         // return an error and not automatically generate any document id. | ||||
|                         anyhow::bail!("missing primary key") | ||||
|                     } | ||||
|                     DEFAULT_PRIMARY_KEY_NAME.to_string() | ||||
|                 }, | ||||
|             }; | ||||
|             let id = fields_ids_map.insert(&name).context("field id limit reached")?; | ||||
|             Ok((id, name)) | ||||
|         }, | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Only the last value associated with an id is kept. | ||||
| fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> { | ||||
|     obkvs.last().context("no last value").map(|last| last.clone().into_owned()) | ||||
| @@ -552,3 +569,73 @@ fn validate_document_id(document_id: &str) -> Option<&str> { | ||||
|         }) | ||||
|     }) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use super::*; | ||||
|  | ||||
|     mod compute_primary_key { | ||||
|         use super::compute_primary_key_pair; | ||||
|         use super::FieldsIdsMap; | ||||
|  | ||||
|         #[test] | ||||
|         #[should_panic] | ||||
|         fn should_panic_primary_key_not_in_map() { | ||||
|             let mut fields_map = FieldsIdsMap::new(); | ||||
|             let _result = compute_primary_key_pair( | ||||
|                 Some("toto"), | ||||
|                 &mut fields_map, | ||||
|                 None, | ||||
|                 false); | ||||
|         } | ||||
|  | ||||
|         #[test] | ||||
|         fn should_return_primary_key_if_is_some() { | ||||
|             let mut fields_map = FieldsIdsMap::new(); | ||||
|             fields_map.insert("toto").unwrap(); | ||||
|             let result = compute_primary_key_pair( | ||||
|                 Some("toto"), | ||||
|                 &mut fields_map, | ||||
|                 Some("tata".to_string()), | ||||
|                 false); | ||||
|             assert_eq!(result.unwrap(), (0u8, "toto".to_string())); | ||||
|             assert_eq!(fields_map.len(), 1); | ||||
|         } | ||||
|  | ||||
|         #[test] | ||||
|         fn should_return_alternative_if_primary_is_none() { | ||||
|             let mut fields_map = FieldsIdsMap::new(); | ||||
|             let result = compute_primary_key_pair( | ||||
|                 None, | ||||
|                 &mut fields_map, | ||||
|                 Some("tata".to_string()), | ||||
|                 false); | ||||
|             assert_eq!(result.unwrap(), (0u8, "tata".to_string())); | ||||
|             assert_eq!(fields_map.len(), 1); | ||||
|         } | ||||
|  | ||||
|         #[test] | ||||
|         fn should_return_default_if_both_are_none() { | ||||
|             let mut fields_map = FieldsIdsMap::new(); | ||||
|             let result = compute_primary_key_pair( | ||||
|                 None, | ||||
|                 &mut fields_map, | ||||
|                 None, | ||||
|                 true); | ||||
|             assert_eq!(result.unwrap(), (0u8, "id".to_string())); | ||||
|             assert_eq!(fields_map.len(), 1); | ||||
|         } | ||||
|  | ||||
|         #[test] | ||||
|         fn should_return_err_if_both_are_none_and_recompute_is_false(){ | ||||
|             let mut fields_map = FieldsIdsMap::new(); | ||||
|             let result = compute_primary_key_pair( | ||||
|                 None, | ||||
|                 &mut fields_map, | ||||
|                 None, | ||||
|                 false); | ||||
|             assert!(result.is_err()); | ||||
|             assert_eq!(fields_map.len(), 0); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,14 +1,16 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::str::FromStr; | ||||
|  | ||||
| use anyhow::{ensure, Context}; | ||||
| use anyhow::Context; | ||||
| use grenad::CompressionType; | ||||
| use itertools::Itertools; | ||||
| use rayon::ThreadPool; | ||||
|  | ||||
| use crate::criterion::Criterion; | ||||
| use crate::facet::FacetType; | ||||
| use crate::update::index_documents::{Transform, IndexDocumentsMethod}; | ||||
| use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::{Index, FieldsIdsMap, Criterion}; | ||||
| use crate::{Index, FieldsIdsMap}; | ||||
|  | ||||
| pub struct Settings<'a, 't, 'u, 'i> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
| @@ -26,7 +28,7 @@ pub struct Settings<'a, 't, 'u, 'i> { | ||||
|     // however if it is `Some(None)` it means that the user forced a reset of the setting. | ||||
|     searchable_fields: Option<Option<Vec<String>>>, | ||||
|     displayed_fields: Option<Option<Vec<String>>>, | ||||
|     faceted_fields: Option<HashMap<String, String>>, | ||||
|     faceted_fields: Option<Option<HashMap<String, String>>>, | ||||
|     criteria: Option<Option<Vec<String>>>, | ||||
| } | ||||
|  | ||||
| @@ -67,7 +69,11 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|     } | ||||
|  | ||||
|     pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) { | ||||
|         self.faceted_fields = Some(names_facet_types); | ||||
|         self.faceted_fields = Some(Some(names_facet_types)); | ||||
|     } | ||||
|  | ||||
|     pub fn reset_faceted_fields(&mut self) { | ||||
|         self.faceted_fields = Some(None); | ||||
|     } | ||||
|  | ||||
|     pub fn reset_criteria(&mut self) { | ||||
| @@ -78,183 +84,188 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|         self.criteria = Some(Some(criteria)); | ||||
|     } | ||||
|  | ||||
|     pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()> | ||||
|     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()> | ||||
|     where | ||||
|         F: Fn(UpdateIndexingStep) + Sync | ||||
|         F: Fn(UpdateIndexingStep) + Sync, | ||||
|     { | ||||
|         let mut updated_searchable_fields = None; | ||||
|         let mut updated_faceted_fields = None; | ||||
|         let mut updated_displayed_fields = None; | ||||
|         let mut updated_criteria = None; | ||||
|  | ||||
|         // Construct the new FieldsIdsMap based on the searchable fields order. | ||||
|         let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||
|         let mut fields_ids_map = match self.searchable_fields { | ||||
|             Some(Some(searchable_fields)) => { | ||||
|                 let mut new_fields_ids_map = FieldsIdsMap::new(); | ||||
|                 let mut new_searchable_fields = Vec::new(); | ||||
|         // if the settings are set before any document update, we don't need to do anything, and | ||||
|         // will set the primary key during the first document addition. | ||||
|         if self.index.number_of_documents(&self.wtxn)? == 0 { | ||||
|             return Ok(()) | ||||
|         } | ||||
|  | ||||
|                 for name in searchable_fields { | ||||
|                     let id = new_fields_ids_map.insert(&name).context("field id limit reached")?; | ||||
|                     new_searchable_fields.push(id); | ||||
|                 } | ||||
|  | ||||
|                 for (_, name) in fields_ids_map.iter() { | ||||
|                     new_fields_ids_map.insert(name).context("field id limit reached")?; | ||||
|                 } | ||||
|  | ||||
|                 updated_searchable_fields = Some(Some(new_searchable_fields)); | ||||
|                 new_fields_ids_map | ||||
|             }, | ||||
|             Some(None) => { | ||||
|                 updated_searchable_fields = Some(None); | ||||
|                 fields_ids_map | ||||
|             }, | ||||
|             None => fields_ids_map, | ||||
|         let transform = Transform { | ||||
|             rtxn: &self.wtxn, | ||||
|             index: self.index, | ||||
|             log_every_n: self.log_every_n, | ||||
|             chunk_compression_type: self.chunk_compression_type, | ||||
|             chunk_compression_level: self.chunk_compression_level, | ||||
|             chunk_fusing_shrink_size: self.chunk_fusing_shrink_size, | ||||
|             max_nb_chunks: self.max_nb_chunks, | ||||
|             max_memory: self.max_memory, | ||||
|             index_documents_method: IndexDocumentsMethod::ReplaceDocuments, | ||||
|             autogenerate_docids: false, | ||||
|         }; | ||||
|  | ||||
|         // We compute or generate the new primary key field id. | ||||
|         // TODO make the primary key settable. | ||||
|         let primary_key = match self.index.primary_key(&self.wtxn)? { | ||||
|             Some(id) => { | ||||
|                 let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||
|                 let name = current_fields_ids_map.name(id).unwrap(); | ||||
|                 fields_ids_map.insert(name).context("field id limit reached")? | ||||
|             }, | ||||
|             None => fields_ids_map.insert("id").context("field id limit reached")?, | ||||
|         }; | ||||
|         // There already has been a document addition, the primary key should be set by now. | ||||
|         let primary_key = self.index.primary_key(&self.wtxn)?.context("Index must have a primary key")?; | ||||
|  | ||||
|         let current_faceted_fields = self.index.faceted_fields(self.wtxn)?; | ||||
|         if let Some(fields_names_facet_types) = self.faceted_fields { | ||||
|             let mut faceted_fields = HashMap::new(); | ||||
|             for (name, sftype) in fields_names_facet_types { | ||||
|                 let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?; | ||||
|                 let id = fields_ids_map.insert(&name).context("field id limit reached")?; | ||||
|                 match current_faceted_fields.get(&id) { | ||||
|                     Some(pftype) => { | ||||
|                         ensure!(ftype == *pftype, "{} facet type changed from {} to {}", name, ftype, pftype); | ||||
|                         faceted_fields.insert(id, ftype) | ||||
|                     }, | ||||
|                     None => faceted_fields.insert(id, ftype), | ||||
|                 }; | ||||
|             } | ||||
|         // We remap the documents fields based on the new `FieldsIdsMap`. | ||||
|         let output = transform.remap_index_documents( | ||||
|             primary_key.to_string(), | ||||
|             old_fields_ids_map, | ||||
|             fields_ids_map.clone())?; | ||||
|  | ||||
|             updated_faceted_fields = Some(faceted_fields); | ||||
|         } | ||||
|  | ||||
|         // Check that the displayed attributes have been specified. | ||||
|         if let Some(value) = self.displayed_fields { | ||||
|             match value { | ||||
|                 Some(names) => { | ||||
|                     let mut new_displayed_fields = Vec::new(); | ||||
|                     for name in names { | ||||
|                         let id = fields_ids_map.insert(&name).context("field id limit reached")?; | ||||
|                         new_displayed_fields.push(id); | ||||
|                     } | ||||
|                     updated_displayed_fields = Some(Some(new_displayed_fields)); | ||||
|                 } | ||||
|                 None => updated_displayed_fields = Some(None), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if let Some(criteria) = self.criteria { | ||||
|             match criteria { | ||||
|                 Some(criteria_names) => { | ||||
|                     let mut new_criteria = Vec::new(); | ||||
|                     for name in criteria_names { | ||||
|                         let criterion = Criterion::from_str(&mut fields_ids_map, &name)?; | ||||
|                         if let Some(fid) = criterion.field_id() { | ||||
|                             let name = fields_ids_map.name(fid).unwrap(); | ||||
|                             let faceted_fields = updated_faceted_fields.as_ref().unwrap_or(¤t_faceted_fields); | ||||
|                             ensure!(faceted_fields.contains_key(&fid), "criterion field {} must be faceted", name); | ||||
|                         } | ||||
|                         new_criteria.push(criterion); | ||||
|                     } | ||||
|                     updated_criteria = Some(Some(new_criteria)); | ||||
|                 }, | ||||
|                 None => updated_criteria = Some(None), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // If any setting have modified any of the datastructures it means that we need | ||||
|         // to retrieve the documents and then reindex then with the new settings. | ||||
|         if updated_searchable_fields.is_some() || updated_faceted_fields.is_some() { | ||||
|             let transform = Transform { | ||||
|                 rtxn: &self.wtxn, | ||||
|                 index: self.index, | ||||
|                 log_every_n: self.log_every_n, | ||||
|                 chunk_compression_type: self.chunk_compression_type, | ||||
|                 chunk_compression_level: self.chunk_compression_level, | ||||
|                 chunk_fusing_shrink_size: self.chunk_fusing_shrink_size, | ||||
|                 max_nb_chunks: self.max_nb_chunks, | ||||
|                 max_memory: self.max_memory, | ||||
|                 index_documents_method: IndexDocumentsMethod::ReplaceDocuments, | ||||
|                 autogenerate_docids: false, | ||||
|             }; | ||||
|  | ||||
|             // We remap the documents fields based on the new `FieldsIdsMap`. | ||||
|             let output = transform.remap_index_documents(primary_key, fields_ids_map.clone())?; | ||||
|  | ||||
|             // We write the new FieldsIdsMap to the database | ||||
|             // this way next indexing methods will be based on that. | ||||
|             self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; | ||||
|  | ||||
|             if let Some(faceted_fields) = updated_faceted_fields { | ||||
|                 // We write the faceted_fields fields into the database here. | ||||
|                 self.index.put_faceted_fields(self.wtxn, &faceted_fields)?; | ||||
|             } | ||||
|  | ||||
|             if let Some(searchable_fields) = updated_searchable_fields { | ||||
|                 // The new searchable fields are also written down to make sure | ||||
|                 // that the IndexDocuments system takes only these ones into account. | ||||
|                 match searchable_fields { | ||||
|                     Some(fields) => self.index.put_searchable_fields(self.wtxn, &fields)?, | ||||
|                     None => self.index.delete_searchable_fields(self.wtxn).map(drop)?, | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             // We clear the full database (words-fst, documents ids and documents content). | ||||
|             ClearDocuments::new(self.wtxn, self.index).execute()?; | ||||
|  | ||||
|             // We index the generated `TransformOutput` which must contain | ||||
|             // all the documents with fields in the newly defined searchable order. | ||||
|             let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index); | ||||
|             indexing_builder.log_every_n = self.log_every_n; | ||||
|             indexing_builder.max_nb_chunks = self.max_nb_chunks; | ||||
|             indexing_builder.max_memory = self.max_memory; | ||||
|             indexing_builder.linked_hash_map_size = self.linked_hash_map_size; | ||||
|             indexing_builder.chunk_compression_type = self.chunk_compression_type; | ||||
|             indexing_builder.chunk_compression_level = self.chunk_compression_level; | ||||
|             indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size; | ||||
|             indexing_builder.thread_pool = self.thread_pool; | ||||
|             indexing_builder.execute_raw(output, &progress_callback)?; | ||||
|         } | ||||
|  | ||||
|         if let Some(displayed_fields) = updated_displayed_fields { | ||||
|             match displayed_fields { | ||||
|                 Some(fields) => self.index.put_displayed_fields(self.wtxn, &fields)?, | ||||
|                 None => self.index.delete_displayed_fields(self.wtxn).map(drop)?, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if let Some(criteria) = updated_criteria { | ||||
|             match criteria { | ||||
|                 Some(criteria) => self.index.put_criteria(self.wtxn, &criteria)?, | ||||
|                 None => self.index.delete_criteria(self.wtxn).map(drop)?, | ||||
|             } | ||||
|         } | ||||
|         // We clear the full database (words-fst, documents ids and documents content). | ||||
|         ClearDocuments::new(self.wtxn, self.index).execute()?; | ||||
|  | ||||
|         // We index the generated `TransformOutput` which must contain | ||||
|         // all the documents with fields in the newly defined searchable order. | ||||
|         let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index); | ||||
|         indexing_builder.log_every_n = self.log_every_n; | ||||
|         indexing_builder.max_nb_chunks = self.max_nb_chunks; | ||||
|         indexing_builder.max_memory = self.max_memory; | ||||
|         indexing_builder.linked_hash_map_size = self.linked_hash_map_size; | ||||
|         indexing_builder.chunk_compression_type = self.chunk_compression_type; | ||||
|         indexing_builder.chunk_compression_level = self.chunk_compression_level; | ||||
|         indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size; | ||||
|         indexing_builder.thread_pool = self.thread_pool; | ||||
|         indexing_builder.execute_raw(output, &cb)?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn update_displayed(&mut self) -> anyhow::Result<bool> { | ||||
|         match self.displayed_fields { | ||||
|             Some(Some(ref fields)) => { | ||||
|                 let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||
|                 // fields are deduplicated, only the first occurrence is taken into account | ||||
|                 let names: Vec<_> = fields | ||||
|                     .iter() | ||||
|                     .unique() | ||||
|                     .map(String::as_str) | ||||
|                     .collect(); | ||||
|  | ||||
|                 for name in names.iter() { | ||||
|                     fields_ids_map | ||||
|                         .insert(name) | ||||
|                         .context("field id limit exceeded")?; | ||||
|                 } | ||||
|                 self.index.put_displayed_fields(self.wtxn, &names)?; | ||||
|                 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; | ||||
|             } | ||||
|             Some(None) => { self.index.delete_displayed_fields(self.wtxn)?; }, | ||||
|             None => return Ok(false), | ||||
|         } | ||||
|         Ok(true) | ||||
|     } | ||||
|  | ||||
|     /// Udpates the index's searchable attributes. This causes the field map to be recomputed to | ||||
|     /// reflect the order of the searchable attributes. | ||||
|     fn update_searchable(&mut self) -> anyhow::Result<bool> { | ||||
|         match self.searchable_fields { | ||||
|             Some(Some(ref fields)) => { | ||||
|                 // every time the searchable attributes are updated, we need to update the | ||||
|                 // ids for any settings that uses the facets. (displayed_fields, | ||||
|                 // faceted_fields) | ||||
|                 let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||
|  | ||||
|                 let mut new_fields_ids_map = FieldsIdsMap::new(); | ||||
|                 // fields are deduplicated, only the first occurrence is taken into account | ||||
|                 let names = fields | ||||
|                     .iter() | ||||
|                     .unique() | ||||
|                     .map(String::as_str) | ||||
|                     .collect::<Vec<_>>(); | ||||
|  | ||||
|                 // Add all the searchable attributes to the field map, and then add the | ||||
|                 // remaining fields from the old field map to the new one | ||||
|                 for name in names.iter() { | ||||
|                     new_fields_ids_map | ||||
|                         .insert(&name) | ||||
|                         .context("field id limit exceeded")?; | ||||
|                 } | ||||
|  | ||||
|                 for (_, name) in old_fields_ids_map.iter() { | ||||
|                     new_fields_ids_map | ||||
|                         .insert(&name) | ||||
|                         .context("field id limit exceeded")?; | ||||
|                 } | ||||
|  | ||||
|                 self.index.put_searchable_fields(self.wtxn, &names)?; | ||||
|                 self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?; | ||||
|             } | ||||
|             Some(None) => { self.index.delete_searchable_fields(self.wtxn)?; }, | ||||
|             None => return Ok(false), | ||||
|         } | ||||
|         Ok(true) | ||||
|     } | ||||
|  | ||||
|     fn update_facets(&mut self) -> anyhow::Result<bool> { | ||||
|         match self.faceted_fields { | ||||
|             Some(Some(ref fields)) => { | ||||
|                 let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||
|                 let mut new_facets = HashMap::new(); | ||||
|                 for (name, ty) in fields { | ||||
|                     fields_ids_map.insert(name).context("field id limit exceeded")?; | ||||
|                     let ty = FacetType::from_str(&ty)?; | ||||
|                     new_facets.insert(name.clone(), ty); | ||||
|                 } | ||||
|                 self.index.put_faceted_fields(self.wtxn, &new_facets)?; | ||||
|                 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; | ||||
|             } | ||||
|             Some(None) => { self.index.delete_faceted_fields(self.wtxn)?; }, | ||||
|             None => return Ok(false) | ||||
|         } | ||||
|         Ok(true) | ||||
|     } | ||||
|  | ||||
|     fn update_criteria(&mut self) -> anyhow::Result<()> { | ||||
|         match self.criteria { | ||||
|             Some(Some(ref fields)) => { | ||||
|                 let faceted_fields = self.index.faceted_fields(&self.wtxn)?; | ||||
|                 let mut new_criteria = Vec::new(); | ||||
|                 for name in fields { | ||||
|                     let criterion = Criterion::from_str(&faceted_fields, &name)?; | ||||
|                     new_criteria.push(criterion); | ||||
|                 } | ||||
|                 self.index.put_criteria(self.wtxn, &new_criteria)?; | ||||
|             } | ||||
|             Some(None) => { self.index.delete_criteria(self.wtxn)?; } | ||||
|             None => (), | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()> | ||||
|     where | ||||
|         F: Fn(UpdateIndexingStep) + Sync | ||||
|         { | ||||
|             let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?; | ||||
|             self.update_displayed()?; | ||||
|             let facets_updated = self.update_facets()?; | ||||
|             // update_criteria MUST be called after update_facets, since criterion fields must be set | ||||
|             // as facets. | ||||
|             self.update_criteria()?; | ||||
|             let searchable_updated = self.update_searchable()?; | ||||
|  | ||||
|             if facets_updated || searchable_updated { | ||||
|                 self.reindex(&progress_callback, old_fields_ids_map)?; | ||||
|             } | ||||
|             Ok(()) | ||||
|         } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|     use crate::update::{IndexDocuments, UpdateFormat}; | ||||
|  | ||||
|     use heed::EnvOpenOptions; | ||||
|     use maplit::hashmap; | ||||
|  | ||||
|     use crate::facet::FacetType; | ||||
|     use crate::update::{IndexDocuments, UpdateFormat}; | ||||
|  | ||||
|     #[test] | ||||
|     fn set_and_reset_searchable_fields() { | ||||
|         let path = tempfile::tempdir().unwrap(); | ||||
| @@ -336,10 +347,8 @@ mod tests { | ||||
|  | ||||
|         // Check that the displayed fields are correctly set to `None` (default value). | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|         let fields_ids = index.displayed_fields(&rtxn).unwrap(); | ||||
|         let age_id = fields_ids_map.id("age").unwrap(); | ||||
|         assert_eq!(fields_ids, Some(&[age_id][..])); | ||||
|         assert_eq!(fields_ids.unwrap(), (&["age"][..])); | ||||
|         drop(rtxn); | ||||
|  | ||||
|         // We change the searchable fields to be the "name" field only. | ||||
| @@ -351,10 +360,8 @@ mod tests { | ||||
|  | ||||
|         // Check that the displayed fields always contains only the "age" field. | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|         let fields_ids = index.displayed_fields(&rtxn).unwrap(); | ||||
|         let age_id = fields_ids_map.id("age").unwrap(); | ||||
|         assert_eq!(fields_ids, Some(&[age_id][..])); | ||||
|         assert_eq!(fields_ids.unwrap(), &["age"][..]); | ||||
|         drop(rtxn); | ||||
|     } | ||||
|  | ||||
| @@ -402,10 +409,8 @@ mod tests { | ||||
|  | ||||
|         // Check that the displayed fields are correctly set to only the "age" field. | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|         let age_field_id = fields_ids_map.id("age").unwrap(); | ||||
|         let fields_ids = index.displayed_fields(&rtxn).unwrap(); | ||||
|         assert_eq!(fields_ids.unwrap(), &[age_field_id][..]); | ||||
|         assert_eq!(fields_ids.unwrap(), &["age"][..]); | ||||
|         drop(rtxn); | ||||
|  | ||||
|         // We reset the fields ids to become `None`, the default value. | ||||
| @@ -445,9 +450,9 @@ mod tests { | ||||
|         // Check that the displayed fields are correctly set. | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let fields_ids = index.faceted_fields(&rtxn).unwrap(); | ||||
|         assert_eq!(fields_ids, hashmap!{ 1 => FacetType::Integer }); | ||||
|         assert_eq!(fields_ids, hashmap!{ "age".to_string() => FacetType::Integer }); | ||||
|         // Only count the field_id 0 and level 0 facet values. | ||||
|         let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[1, 0]).unwrap().count(); | ||||
|         let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count(); | ||||
|         assert_eq!(count, 3); | ||||
|         drop(rtxn); | ||||
|  | ||||
| @@ -461,8 +466,49 @@ mod tests { | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         // Only count the field_id 0 and level 0 facet values. | ||||
|         let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[1, 0]).unwrap().count(); | ||||
|         let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count(); | ||||
|         assert_eq!(count, 4); | ||||
|         drop(rtxn); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn setting_searchable_recomputes_other_settings() { | ||||
|         let path = tempfile::tempdir().unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(10 * 1024 * 1024); // 10 MB | ||||
|         let index = Index::new(options, &path).unwrap(); | ||||
|  | ||||
|         // Set all the settings except searchable | ||||
|         let mut wtxn = index.write_txn().unwrap(); | ||||
|         let mut builder = Settings::new(&mut wtxn, &index); | ||||
|         builder.set_displayed_fields(vec!["hello".to_string()]); | ||||
|         builder.set_faceted_fields(hashmap!{ | ||||
|             "age".into() => "integer".into(), | ||||
|             "toto".into() => "integer".into(), | ||||
|         }); | ||||
|         builder.set_criteria(vec!["asc(toto)".to_string()]); | ||||
|         builder.execute(|_| ()).unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
|  | ||||
|         // check the output | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); | ||||
|         // since no documents have been pushed the primary key is still unset | ||||
|         assert!(index.primary_key(&rtxn).unwrap().is_none()); | ||||
|         assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); | ||||
|         drop(rtxn); | ||||
|  | ||||
|         // We set toto and age as searchable to force reordering of the fields | ||||
|         let mut wtxn = index.write_txn().unwrap(); | ||||
|         let mut builder = Settings::new(&mut wtxn, &index); | ||||
|         builder.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]); | ||||
|         builder.execute(|_| ()).unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); | ||||
|         assert!(index.primary_key(&rtxn).unwrap().is_none()); | ||||
|         assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); | ||||
|         drop(rtxn); | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user