mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Merge pull request #67 from meilisearch/fix-settings
Fix displayed and searchable attributes
This commit is contained in:
		
							
								
								
									
										481
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										481
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										632
									
								
								http-ui/Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										632
									
								
								http-ui/Cargo.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,4 +1,3 @@ | |||||||
| use std::borrow::Cow; |  | ||||||
| use std::collections::{HashMap, HashSet}; | use std::collections::{HashMap, HashSet}; | ||||||
| use std::fmt::Display; | use std::fmt::Display; | ||||||
| use std::fs::{File, create_dir_all}; | use std::fs::{File, create_dir_all}; | ||||||
| @@ -654,13 +653,13 @@ async fn main() -> anyhow::Result<()> { | |||||||
|  |  | ||||||
|             let mut documents = Vec::new(); |             let mut documents = Vec::new(); | ||||||
|             let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); |             let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||||
|             let displayed_fields = match index.displayed_fields(&rtxn).unwrap() { |             let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() { | ||||||
|                 Some(fields) => Cow::Borrowed(fields), |                 Some(fields) => fields, | ||||||
|                 None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()), |                 None => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||||
|             }; |             }; | ||||||
|             let attributes_to_highlight = match index.searchable_fields(&rtxn).unwrap() { |             let attributes_to_highlight = match index.searchable_fields(&rtxn).unwrap() { | ||||||
|                 Some(fields) => fields.iter().flat_map(|id| fields_ids_map.name(*id)).map(ToOwned::to_owned).collect(), |                 Some(fields) => fields.into_iter().map(String::from).collect(), | ||||||
|                 None => fields_ids_map.iter().map(|(_, name)| name).map(ToOwned::to_owned).collect(), |                 None => fields_ids_map.iter().map(|(_, name)| name).map(String::from).collect(), | ||||||
|             }; |             }; | ||||||
|  |  | ||||||
|             let stop_words = fst::Set::default(); |             let stop_words = fst::Set::default(); | ||||||
| @@ -690,9 +689,9 @@ async fn main() -> anyhow::Result<()> { | |||||||
|  |  | ||||||
|             let external_documents_ids = index.external_documents_ids(&rtxn).unwrap(); |             let external_documents_ids = index.external_documents_ids(&rtxn).unwrap(); | ||||||
|             let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); |             let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||||
|             let displayed_fields = match index.displayed_fields(&rtxn).unwrap() { |             let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() { | ||||||
|                 Some(fields) => Cow::Borrowed(fields), |                 Some(fields) => fields, | ||||||
|                 None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()), |                 None => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||||
|             }; |             }; | ||||||
|  |  | ||||||
|             match external_documents_ids.get(&id) { |             match external_documents_ids.get(&id) { | ||||||
|   | |||||||
| @@ -1,10 +1,12 @@ | |||||||
| use crate::{FieldsIdsMap, FieldId}; | use std::collections::HashMap; | ||||||
|  |  | ||||||
| use anyhow::{Context, bail}; | use anyhow::{Context, bail}; | ||||||
| use regex::Regex; | use regex::Regex; | ||||||
| use serde::{Serialize, Deserialize}; | use serde::{Serialize, Deserialize}; | ||||||
|  |  | ||||||
| #[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq, Eq)] | use crate::facet::FacetType; | ||||||
|  |  | ||||||
|  | #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] | ||||||
| pub enum Criterion { | pub enum Criterion { | ||||||
|     /// Sorted by increasing number of typos. |     /// Sorted by increasing number of typos. | ||||||
|     Typo, |     Typo, | ||||||
| @@ -21,13 +23,13 @@ pub enum Criterion { | |||||||
|     /// Sorted by the similarity of the matched words with the query words. |     /// Sorted by the similarity of the matched words with the query words. | ||||||
|     Exactness, |     Exactness, | ||||||
|     /// Sorted by the increasing value of the field specified. |     /// Sorted by the increasing value of the field specified. | ||||||
|     Asc(FieldId), |     Asc(String), | ||||||
|     /// Sorted by the decreasing value of the field specified. |     /// Sorted by the decreasing value of the field specified. | ||||||
|     Desc(FieldId), |     Desc(String), | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Criterion { | impl Criterion { | ||||||
|     pub fn from_str(fields_ids_map: &mut FieldsIdsMap, txt: &str) -> anyhow::Result<Criterion> { |     pub fn from_str(faceted_attributes: &HashMap<String, FacetType>, txt: &str) -> anyhow::Result<Criterion> { | ||||||
|         match txt { |         match txt { | ||||||
|             "typo" => Ok(Criterion::Typo), |             "typo" => Ok(Criterion::Typo), | ||||||
|             "words" => Ok(Criterion::Words), |             "words" => Ok(Criterion::Words), | ||||||
| @@ -40,22 +42,15 @@ impl Criterion { | |||||||
|                 let caps = re.captures(text).with_context(|| format!("unknown criterion name: {}", text))?; |                 let caps = re.captures(text).with_context(|| format!("unknown criterion name: {}", text))?; | ||||||
|                 let order = caps.get(1).unwrap().as_str(); |                 let order = caps.get(1).unwrap().as_str(); | ||||||
|                 let field_name = caps.get(2).unwrap().as_str(); |                 let field_name = caps.get(2).unwrap().as_str(); | ||||||
|                 let field_id = fields_ids_map.insert(field_name).context("field id limit reached")?; |                 faceted_attributes.get(field_name).with_context(|| format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name))?; | ||||||
|                 match order { |                 match order { | ||||||
|                     "asc" => Ok(Criterion::Asc(field_id)), |                     "asc" => Ok(Criterion::Asc(field_name.to_string())), | ||||||
|                     "desc" => Ok(Criterion::Desc(field_id)), |                     "desc" => Ok(Criterion::Desc(field_name.to_string())), | ||||||
|                     otherwise => bail!("unknown criterion name: {}", otherwise), |                     otherwise => bail!("unknown criterion name: {}", otherwise), | ||||||
|                 } |                 } | ||||||
|             }, |             }, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn field_id(&self) -> Option<FieldId> { |  | ||||||
|         match *self { |  | ||||||
|             Criterion::Asc(fid) | Criterion::Desc(fid) => Some(fid), |  | ||||||
|             _ => None, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn default_criteria() -> Vec<Criterion> { | pub fn default_criteria() -> Vec<Criterion> { | ||||||
|   | |||||||
							
								
								
									
										89
									
								
								src/index.rs
									
									
									
									
									
								
							
							
						
						
									
										89
									
								
								src/index.rs
									
									
									
									
									
								
							| @@ -112,8 +112,8 @@ impl Index { | |||||||
|     /* primary key */ |     /* primary key */ | ||||||
|  |  | ||||||
|     /// Writes the documents primary key, this is the field name that is used to store the id. |     /// Writes the documents primary key, this is the field name that is used to store the id. | ||||||
|     pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: FieldId) -> heed::Result<()> { |     pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> { | ||||||
|         self.main.put::<_, Str, OwnedType<FieldId>>(wtxn, PRIMARY_KEY_KEY, &primary_key) |         self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Deletes the primary key of the documents, this can be done to reset indexes settings. |     /// Deletes the primary key of the documents, this can be done to reset indexes settings. | ||||||
| @@ -122,8 +122,8 @@ impl Index { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Returns the documents primary key, `None` if it hasn't been defined. |     /// Returns the documents primary key, `None` if it hasn't been defined. | ||||||
|     pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result<Option<FieldId>> { |     pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> { | ||||||
|         self.main.get::<_, Str, OwnedType<FieldId>>(rtxn, PRIMARY_KEY_KEY) |         self.main.get::<_, Str, Str>(rtxn, PRIMARY_KEY_KEY) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /* external documents ids */ |     /* external documents ids */ | ||||||
| @@ -175,10 +175,10 @@ impl Index { | |||||||
|  |  | ||||||
|     /* displayed fields */ |     /* displayed fields */ | ||||||
|  |  | ||||||
|     /// Writes the fields ids that must be displayed in the defined order. |     /// Writes the fields that must be displayed in the defined order. | ||||||
|     /// There must be not be any duplicate field id. |     /// There must be not be any duplicate field id. | ||||||
|     pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> { |     pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> { | ||||||
|         self.main.put::<_, Str, ByteSlice>(wtxn, DISPLAYED_FIELDS_KEY, fields) |         self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, DISPLAYED_FIELDS_KEY, &fields) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Deletes the displayed fields ids, this will make the engine to display |     /// Deletes the displayed fields ids, this will make the engine to display | ||||||
| @@ -187,18 +187,27 @@ impl Index { | |||||||
|         self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY) |         self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Returns the displayed fields ids in the order they must be returned. If it returns |     /// Returns the displayed fields in the order they were set by the user. If it returns | ||||||
|     /// `None` it means that all the attributes are displayed in the order of the `FieldsIdsMap`. |     /// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`. | ||||||
|     pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> { |     pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> { | ||||||
|         self.main.get::<_, Str, ByteSlice>(rtxn, DISPLAYED_FIELDS_KEY) |         self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, DISPLAYED_FIELDS_KEY) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> { | ||||||
|  |         let fields_ids_map = self.fields_ids_map(rtxn)?; | ||||||
|  |         let ids = self.displayed_fields(rtxn)? | ||||||
|  |             .map(|fields| fields | ||||||
|  |                 .into_iter() | ||||||
|  |                 .map(|name| fields_ids_map.id(name).expect("Field not found")) | ||||||
|  |                 .collect::<Vec<_>>()); | ||||||
|  |         Ok(ids) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /* searchable fields */ |     /* searchable fields */ | ||||||
|  |  | ||||||
|     /// Writes the searchable fields, when this list is specified, only these are indexed. |     /// Writes the searchable fields, when this list is specified, only these are indexed. | ||||||
|     pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> { |     pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> { | ||||||
|         assert!(fields.windows(2).all(|win| win[0] < win[1])); // is sorted |         self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, SEARCHABLE_FIELDS_KEY, &fields) | ||||||
|         self.main.put::<_, Str, ByteSlice>(wtxn, SEARCHABLE_FIELDS_KEY, fields) |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Deletes the searchable fields, when no fields are specified, all fields are indexed. |     /// Deletes the searchable fields, when no fields are specified, all fields are indexed. | ||||||
| @@ -206,17 +215,36 @@ impl Index { | |||||||
|         self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY) |         self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Returns the searchable fields ids, those are the fields that are indexed, |     /// Returns the searchable fields, those are the fields that are indexed, | ||||||
|     /// if the searchable fields aren't there it means that **all** the fields are indexed. |     /// if the searchable fields aren't there it means that **all** the fields are indexed. | ||||||
|     pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> { |     pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> { | ||||||
|         self.main.get::<_, Str, ByteSlice>(rtxn, SEARCHABLE_FIELDS_KEY) |         self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, SEARCHABLE_FIELDS_KEY) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Identical to `searchable_fields`, but returns the ids instead. | ||||||
|  |     pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> { | ||||||
|  |         match self.searchable_fields(rtxn)? { | ||||||
|  |             Some(names) => { | ||||||
|  |                 let fields_map = self.fields_ids_map(rtxn)?; | ||||||
|  |                 let mut ids = Vec::new(); | ||||||
|  |                 for name in names { | ||||||
|  |                     let id = fields_map | ||||||
|  |                         .id(name) | ||||||
|  |                         .ok_or_else(|| format!("field id map must contain {:?}", name)) | ||||||
|  |                         .expect("corrupted data: "); | ||||||
|  |                     ids.push(id); | ||||||
|  |                 } | ||||||
|  |                 Ok(Some(ids)) | ||||||
|  |             } | ||||||
|  |             None => Ok(None), | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /* faceted fields */ |     /* faceted fields */ | ||||||
|  |  | ||||||
|     /// Writes the facet fields ids associated with their facet type or `None` if |     /// Writes the facet fields associated with their facet type or `None` if | ||||||
|     /// the facet type is currently unknown. |     /// the facet type is currently unknown. | ||||||
|     pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<FieldId, FacetType>) -> heed::Result<()> { |     pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<String, FacetType>) -> heed::Result<()> { | ||||||
|         self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types) |         self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types) | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -225,9 +253,26 @@ impl Index { | |||||||
|         self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY) |         self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Returns the facet fields ids associated with their facet type. |     /// Returns the facet fields names associated with their facet type. | ||||||
|     pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> { |     pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, FacetType>> { | ||||||
|         Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) |         Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Same as `faceted_fields`, but returns ids instead. | ||||||
|  |     pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> { | ||||||
|  |         let faceted_fields = self.faceted_fields(rtxn)?; | ||||||
|  |         let fields_ids_map = self.fields_ids_map(rtxn)?; | ||||||
|  |         let faceted_fields = faceted_fields | ||||||
|  |             .iter() | ||||||
|  |             .map(|(k, v)| { | ||||||
|  |                 let kid = fields_ids_map | ||||||
|  |                     .id(k) | ||||||
|  |                     .ok_or_else(|| format!("{:?} should be present in the field id map", k)) | ||||||
|  |                     .expect("corrupted data: "); | ||||||
|  |                 (kid, *v) | ||||||
|  |             }) | ||||||
|  |             .collect(); | ||||||
|  |         Ok(faceted_fields) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /* faceted documents ids */ |     /* faceted documents ids */ | ||||||
|   | |||||||
| @@ -148,7 +148,7 @@ impl FacetCondition { | |||||||
|     ) -> anyhow::Result<FacetCondition> |     ) -> anyhow::Result<FacetCondition> | ||||||
|     { |     { | ||||||
|         let fields_ids_map = index.fields_ids_map(rtxn)?; |         let fields_ids_map = index.fields_ids_map(rtxn)?; | ||||||
|         let faceted_fields = index.faceted_fields(rtxn)?; |         let faceted_fields = index.faceted_fields_ids(rtxn)?; | ||||||
|         let lexed = FilterParser::parse(Rule::prgm, expression)?; |         let lexed = FilterParser::parse(Rule::prgm, expression)?; | ||||||
|         FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed) |         FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed) | ||||||
|     } |     } | ||||||
| @@ -552,15 +552,15 @@ mod tests { | |||||||
|         // Test that the facet condition is correctly generated. |         // Test that the facet condition is correctly generated. | ||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         let condition = FacetCondition::from_str(&rtxn, &index, "channel = ponce").unwrap(); |         let condition = FacetCondition::from_str(&rtxn, &index, "channel = ponce").unwrap(); | ||||||
|         let expected = OperatorString(1, FacetStringOperator::equal("Ponce")); |         let expected = OperatorString(0, FacetStringOperator::equal("Ponce")); | ||||||
|         assert_eq!(condition, expected); |         assert_eq!(condition, expected); | ||||||
|  |  | ||||||
|         let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap(); |         let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap(); | ||||||
|         let expected = OperatorString(1, FacetStringOperator::not_equal("ponce")); |         let expected = OperatorString(0, FacetStringOperator::not_equal("ponce")); | ||||||
|         assert_eq!(condition, expected); |         assert_eq!(condition, expected); | ||||||
|  |  | ||||||
|         let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); |         let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); | ||||||
|         let expected = OperatorString(1, FacetStringOperator::not_equal("ponce")); |         let expected = OperatorString(0, FacetStringOperator::not_equal("ponce")); | ||||||
|         assert_eq!(condition, expected); |         assert_eq!(condition, expected); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -581,13 +581,13 @@ mod tests { | |||||||
|         // Test that the facet condition is correctly generated. |         // Test that the facet condition is correctly generated. | ||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); |         let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); | ||||||
|         let expected = OperatorI64(1, Between(22, 44)); |         let expected = OperatorI64(0, Between(22, 44)); | ||||||
|         assert_eq!(condition, expected); |         assert_eq!(condition, expected); | ||||||
|  |  | ||||||
|         let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); |         let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); | ||||||
|         let expected = Or( |         let expected = Or( | ||||||
|             Box::new(OperatorI64(1, LowerThan(22))), |             Box::new(OperatorI64(0, LowerThan(22))), | ||||||
|             Box::new(OperatorI64(1, GreaterThan(44))), |             Box::new(OperatorI64(0, GreaterThan(44))), | ||||||
|         ); |         ); | ||||||
|         assert_eq!(condition, expected); |         assert_eq!(condition, expected); | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -285,9 +285,13 @@ impl<'a> Search<'a> { | |||||||
|                 } |                 } | ||||||
|             }).next(); |             }).next(); | ||||||
|             match result { |             match result { | ||||||
|                 Some((fid, is_ascending)) => { |                 Some((attr_name, is_ascending)) => { | ||||||
|                     let faceted_fields = self.index.faceted_fields(self.rtxn)?; |                     let field_id_map = self.index.fields_ids_map(self.rtxn)?; | ||||||
|                     let ftype = *faceted_fields.get(&fid).context("unknown field id")?; |                     let fid = field_id_map.id(&attr_name).with_context(|| format!("unknown field: {:?}", attr_name))?; | ||||||
|  |                     let faceted_fields = self.index.faceted_fields_ids(self.rtxn)?; | ||||||
|  |                     let ftype = *faceted_fields.get(&fid) | ||||||
|  |                         .with_context(|| format!("{:?} not found in the faceted fields.", attr_name)) | ||||||
|  |                         .expect("corrupted data: "); | ||||||
|                     Some((fid, ftype, is_ascending)) |                     Some((fid, ftype, is_ascending)) | ||||||
|                 }, |                 }, | ||||||
|                 None => None, |                 None => None, | ||||||
|   | |||||||
| @@ -342,7 +342,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho | |||||||
|             if heap.len() > limit { heap.pop(); } |             if heap.len() > limit { heap.pop(); } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let faceted_fields = index.faceted_fields(rtxn)?; |         let faceted_fields = index.faceted_fields_ids(rtxn)?; | ||||||
|         let fields_ids_map = index.fields_ids_map(rtxn)?; |         let fields_ids_map = index.fields_ids_map(rtxn)?; | ||||||
|         for (field_id, field_type) in faceted_fields { |         for (field_id, field_type) in faceted_fields { | ||||||
|             let facet_name = fields_ids_map.name(field_id).unwrap(); |             let facet_name = fields_ids_map.name(field_id).unwrap(); | ||||||
| @@ -413,7 +413,7 @@ fn words_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, words: Vec<Strin | |||||||
|  |  | ||||||
| fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_name: String) -> anyhow::Result<()> { | fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_name: String) -> anyhow::Result<()> { | ||||||
|     let fields_ids_map = index.fields_ids_map(&rtxn)?; |     let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|     let faceted_fields = index.faceted_fields(&rtxn)?; |     let faceted_fields = index.faceted_fields_ids(&rtxn)?; | ||||||
|  |  | ||||||
|     let field_id = fields_ids_map.id(&field_name) |     let field_id = fields_ids_map.id(&field_name) | ||||||
|         .with_context(|| format!("field {} not found", field_name))?; |         .with_context(|| format!("field {} not found", field_name))?; | ||||||
| @@ -451,7 +451,7 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam | |||||||
|  |  | ||||||
| fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> { | fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> { | ||||||
|     let fields_ids_map = index.fields_ids_map(&rtxn)?; |     let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|     let faceted_fields = index.faceted_fields(&rtxn)?; |     let faceted_fields = index.faceted_fields_ids(&rtxn)?; | ||||||
|  |  | ||||||
|     let field_id = fields_ids_map.id(&field_name) |     let field_id = fields_ids_map.id(&field_name) | ||||||
|         .with_context(|| format!("field {} not found", field_name))?; |         .with_context(|| format!("field {} not found", field_name))?; | ||||||
|   | |||||||
| @@ -1,4 +1,3 @@ | |||||||
| use std::borrow::Cow; |  | ||||||
| use std::io::{self, BufRead, Write}; | use std::io::{self, BufRead, Write}; | ||||||
| use std::iter::once; | use std::iter::once; | ||||||
| use std::path::PathBuf; | use std::path::PathBuf; | ||||||
| @@ -47,9 +46,9 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { | |||||||
|     let index = Index::new(options, &opt.database)?; |     let index = Index::new(options, &opt.database)?; | ||||||
|     let rtxn = index.read_txn()?; |     let rtxn = index.read_txn()?; | ||||||
|     let fields_ids_map = index.fields_ids_map(&rtxn)?; |     let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|     let displayed_fields = match index.displayed_fields(&rtxn)? { |     let displayed_fields = match index.displayed_fields_ids(&rtxn)? { | ||||||
|         Some(fields) => Cow::Borrowed(fields), |         Some(fields) => fields, | ||||||
|         None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()), |         None => fields_ids_map.iter().map(|(id, _)| id).collect(), | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let stdin = io::stdin(); |     let stdin = io::stdin(); | ||||||
|   | |||||||
| @@ -25,7 +25,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | |||||||
|  |  | ||||||
|         // We retrieve the number of documents ids that we are deleting. |         // We retrieve the number of documents ids that we are deleting. | ||||||
|         let number_of_documents = self.index.number_of_documents(self.wtxn)?; |         let number_of_documents = self.index.number_of_documents(self.wtxn)?; | ||||||
|         let faceted_fields = self.index.faceted_fields(self.wtxn)?; |         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; | ||||||
|  |  | ||||||
|         // We clean some of the main engine datastructures. |         // We clean some of the main engine datastructures. | ||||||
|         self.index.put_words_fst(self.wtxn, &fst::Set::default())?; |         self.index.put_words_fst(self.wtxn, &fst::Set::default())?; | ||||||
|   | |||||||
| @@ -188,7 +188,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|         drop(iter); |         drop(iter); | ||||||
|  |  | ||||||
|         // Remove the documents ids from the faceted documents ids. |         // Remove the documents ids from the faceted documents ids. | ||||||
|         let faceted_fields = self.index.faceted_fields(self.wtxn)?; |         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; | ||||||
|         for (field_id, facet_type) in faceted_fields { |         for (field_id, facet_type) in faceted_fields { | ||||||
|             let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?; |             let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?; | ||||||
|             docids.difference_with(&self.documents_ids); |             docids.difference_with(&self.documents_ids); | ||||||
|   | |||||||
| @@ -51,7 +51,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { | |||||||
|  |  | ||||||
|     pub fn execute(self) -> anyhow::Result<()> { |     pub fn execute(self) -> anyhow::Result<()> { | ||||||
|         // We get the faceted fields to be able to create the facet levels. |         // We get the faceted fields to be able to create the facet levels. | ||||||
|         let faceted_fields = self.index.faceted_fields(self.wtxn)?; |         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; | ||||||
|  |  | ||||||
|         debug!("Computing and writing the facet values levels docids into LMDB on disk..."); |         debug!("Computing and writing the facet values levels docids into LMDB on disk..."); | ||||||
|         for (field_id, facet_type) in faceted_fields { |         for (field_id, facet_type) in faceted_fields { | ||||||
|   | |||||||
| @@ -338,8 +338,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | |||||||
|             FacetLevel0ValuesDocids, |             FacetLevel0ValuesDocids, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let faceted_fields = self.index.faceted_fields(self.wtxn)?; |         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; | ||||||
|         let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? { |         let searchable_fields: HashSet<_> = match self.index.searchable_fields_ids(self.wtxn)? { | ||||||
|             Some(fields) => fields.iter().copied().collect(), |             Some(fields) => fields.iter().copied().collect(), | ||||||
|             None => fields_ids_map.iter().map(|(id, _name)| id).collect(), |             None => fields_ids_map.iter().map(|(id, _name)| id).collect(), | ||||||
|         }; |         }; | ||||||
| @@ -485,7 +485,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | |||||||
|         self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; |         self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; | ||||||
|  |  | ||||||
|         // We write the primary key field id into the main database |         // We write the primary key field id into the main database | ||||||
|         self.index.put_primary_key(self.wtxn, primary_key)?; |         self.index.put_primary_key(self.wtxn, &primary_key)?; | ||||||
|  |  | ||||||
|         // We write the external documents ids into the main database. |         // We write the external documents ids into the main database. | ||||||
|         self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; |         self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; | ||||||
|   | |||||||
| @@ -10,13 +10,15 @@ use log::info; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use serde_json::{Map, Value}; | use serde_json::{Map, Value}; | ||||||
|  |  | ||||||
| use crate::{BEU32, MergeFn, Index, FieldId, FieldsIdsMap, ExternalDocumentsIds}; | use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId}; | ||||||
| use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; | use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; | ||||||
| use super::merge_function::merge_two_obkvs; | use super::merge_function::merge_two_obkvs; | ||||||
| use super::{create_writer, create_sorter, IndexDocumentsMethod}; | use super::{create_writer, create_sorter, IndexDocumentsMethod}; | ||||||
|  |  | ||||||
|  | const DEFAULT_PRIMARY_KEY_NAME: &str = "id"; | ||||||
|  |  | ||||||
| pub struct TransformOutput { | pub struct TransformOutput { | ||||||
|     pub primary_key: FieldId, |     pub primary_key: String, | ||||||
|     pub fields_ids_map: FieldsIdsMap, |     pub fields_ids_map: FieldsIdsMap, | ||||||
|     pub external_documents_ids: ExternalDocumentsIds<'static>, |     pub external_documents_ids: ExternalDocumentsIds<'static>, | ||||||
|     pub new_documents_ids: RoaringBitmap, |     pub new_documents_ids: RoaringBitmap, | ||||||
| @@ -73,7 +75,6 @@ impl Transform<'_, '_> { | |||||||
|     { |     { | ||||||
|         let mut fields_ids_map = self.index.fields_ids_map(self.rtxn)?; |         let mut fields_ids_map = self.index.fields_ids_map(self.rtxn)?; | ||||||
|         let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap(); |         let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap(); | ||||||
|         let primary_key = self.index.primary_key(self.rtxn)?; |  | ||||||
|  |  | ||||||
|         // Deserialize the whole batch of documents in memory. |         // Deserialize the whole batch of documents in memory. | ||||||
|         let mut documents: Peekable<Box<dyn Iterator<Item=serde_json::Result<Map<String, Value>>>>> = if is_stream { |         let mut documents: Peekable<Box<dyn Iterator<Item=serde_json::Result<Map<String, Value>>>>> = if is_stream { | ||||||
| @@ -88,27 +89,15 @@ impl Transform<'_, '_> { | |||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         // We extract the primary key from the first document in |         // We extract the primary key from the first document in | ||||||
|         // the batch if it hasn't already been defined in the index. |         // the batch if it hasn't already been defined in the index | ||||||
|         let primary_key = match primary_key { |         let first = documents.peek().and_then(|r| r.as_ref().ok()); | ||||||
|             Some(primary_key) => primary_key, |         let alternative_name = first.and_then(|doc| doc.keys().find(|k| k.contains(DEFAULT_PRIMARY_KEY_NAME)).cloned()); | ||||||
|             None => { |         let (primary_key_id, primary_key) = compute_primary_key_pair( | ||||||
|                 // We ignore a potential error here as we can't early return it now, |             self.index.primary_key(self.rtxn)?, | ||||||
|                 // the peek method gives us only a reference on the next item, |             &mut fields_ids_map, | ||||||
|                 // we will eventually return it in the iteration just after. |             alternative_name, | ||||||
|                 let first = documents.peek().and_then(|r| r.as_ref().ok()); |             self.autogenerate_docids | ||||||
|                 match first.and_then(|doc| doc.keys().find(|k| k.contains("id"))) { |         )?; | ||||||
|                     Some(key) => fields_ids_map.insert(&key).context("field id limit reached")?, |  | ||||||
|                     None => { |  | ||||||
|                         if !self.autogenerate_docids { |  | ||||||
|                             // If there is no primary key in the current document batch, we must |  | ||||||
|                             // return an error and not automatically generate any document id. |  | ||||||
|                             return Err(anyhow!("missing primary key")) |  | ||||||
|                         } |  | ||||||
|                         fields_ids_map.insert("id").context("field id limit reached")? |  | ||||||
|                     }, |  | ||||||
|                 } |  | ||||||
|             }, |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         if documents.peek().is_none() { |         if documents.peek().is_none() { | ||||||
|             return Ok(TransformOutput { |             return Ok(TransformOutput { | ||||||
| @@ -122,13 +111,6 @@ impl Transform<'_, '_> { | |||||||
|             }); |             }); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Get the primary key field name now, this way we will |  | ||||||
|         // be able to get the value in the JSON Map document. |  | ||||||
|         let primary_key_name = fields_ids_map |  | ||||||
|             .name(primary_key) |  | ||||||
|             .expect("found the primary key name") |  | ||||||
|             .to_owned(); |  | ||||||
|  |  | ||||||
|         // We must choose the appropriate merge function for when two or more documents |         // We must choose the appropriate merge function for when two or more documents | ||||||
|         // with the same user id must be merged or fully replaced in the same batch. |         // with the same user id must be merged or fully replaced in the same batch. | ||||||
|         let merge_function = match self.index_documents_method { |         let merge_function = match self.index_documents_method { | ||||||
| @@ -170,7 +152,7 @@ impl Transform<'_, '_> { | |||||||
|  |  | ||||||
|             // We retrieve the user id from the document based on the primary key name, |             // We retrieve the user id from the document based on the primary key name, | ||||||
|             // if the document id isn't present we generate a uuid. |             // if the document id isn't present we generate a uuid. | ||||||
|             let external_id = match document.get(&primary_key_name) { |             let external_id = match document.get(&primary_key) { | ||||||
|                 Some(value) => match value { |                 Some(value) => match value { | ||||||
|                     Value::String(string) => Cow::Borrowed(string.as_str()), |                     Value::String(string) => Cow::Borrowed(string.as_str()), | ||||||
|                     Value::Number(number) => Cow::Owned(number.to_string()), |                     Value::Number(number) => Cow::Owned(number.to_string()), | ||||||
| @@ -196,7 +178,7 @@ impl Transform<'_, '_> { | |||||||
|                     serde_json::to_writer(&mut json_buffer, value)?; |                     serde_json::to_writer(&mut json_buffer, value)?; | ||||||
|                     writer.insert(field_id, &json_buffer)?; |                     writer.insert(field_id, &json_buffer)?; | ||||||
|                 } |                 } | ||||||
|                 else if field_id == primary_key { |                 else if field_id == primary_key_id { | ||||||
|                     // We validate the document id [a-zA-Z0-9\-_]. |                     // We validate the document id [a-zA-Z0-9\-_]. | ||||||
|                     let external_id = match validate_document_id(&external_id) { |                     let external_id = match validate_document_id(&external_id) { | ||||||
|                         Some(valid) => valid, |                         Some(valid) => valid, | ||||||
| @@ -240,42 +222,37 @@ impl Transform<'_, '_> { | |||||||
|  |  | ||||||
|         let mut csv = csv::Reader::from_reader(reader); |         let mut csv = csv::Reader::from_reader(reader); | ||||||
|         let headers = csv.headers()?; |         let headers = csv.headers()?; | ||||||
|         let primary_key = self.index.primary_key(self.rtxn)?; |  | ||||||
|  |  | ||||||
|         // Generate the new fields ids based on the current fields ids and this CSV headers. |  | ||||||
|         let mut fields_ids = Vec::new(); |         let mut fields_ids = Vec::new(); | ||||||
|  |         // Generate the new fields ids based on the current fields ids and this CSV headers. | ||||||
|         for (i, header) in headers.iter().enumerate() { |         for (i, header) in headers.iter().enumerate() { | ||||||
|             let id = fields_ids_map.insert(header).context("field id limit reached)")?; |             let id = fields_ids_map.insert(header).context("field id limit reached)")?; | ||||||
|             fields_ids.push((id, i)); |             fields_ids.push((id, i)); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Extract the position of the primary key in the current headers, None if not found. |         // Extract the position of the primary key in the current headers, None if not found. | ||||||
|         let external_id_pos = match primary_key { |         let primary_key_pos = match self.index.primary_key(self.rtxn)? { | ||||||
|             Some(primary_key) => { |             Some(primary_key) => { | ||||||
|                 // Te primary key have is known so we must find the position in the CSV headers. |                // The primary key is known so we must find the position in the CSV headers. | ||||||
|                 let name = fields_ids_map.name(primary_key).expect("found the primary key name"); |                headers.iter().position(|h| h == primary_key) | ||||||
|                 headers.iter().position(|h| h == name) |  | ||||||
|             }, |             }, | ||||||
|             None => headers.iter().position(|h| h.contains("id")), |             None => headers.iter().position(|h| h.contains("id")), | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         // Returns the field id in the fileds ids map, create an "id" field |         // Returns the field id in the fields ids map, create an "id" field | ||||||
|         // in case it is not in the current headers. |         // in case it is not in the current headers. | ||||||
|         let primary_key_field_id = match external_id_pos { |         let alternative_name = primary_key_pos.map(|pos| headers[pos].to_string()); | ||||||
|             Some(pos) => fields_ids_map.id(&headers[pos]).expect("found the primary key"), |         let (primary_key_id, _) = compute_primary_key_pair( | ||||||
|             None => { |             self.index.primary_key(self.rtxn)?, | ||||||
|                 if !self.autogenerate_docids { |             &mut fields_ids_map, | ||||||
|                     // If there is no primary key in the current document batch, we must |             alternative_name, | ||||||
|                     // return an error and not automatically generate any document id. |             self.autogenerate_docids | ||||||
|                     return Err(anyhow!("missing primary key")) |         )?; | ||||||
|                 } |  | ||||||
|                 let field_id = fields_ids_map.insert("id").context("field id limit reached")?; |         // The primary key field is not present in the header, so we need to create it. | ||||||
|                 // We make sure to add the primary key field id to the fields ids, |         if primary_key_pos.is_none() { | ||||||
|                 // this way it is added to the obks. |             fields_ids.push((primary_key_id, usize::max_value())); | ||||||
|                 fields_ids.push((field_id, usize::max_value())); |         } | ||||||
|                 field_id |  | ||||||
|             }, |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         // We sort the fields ids by the fields ids map id, this way we are sure to iterate over |         // We sort the fields ids by the fields ids map id, this way we are sure to iterate over | ||||||
|         // the records fields in the fields ids map order and correctly generate the obkv. |         // the records fields in the fields ids map order and correctly generate the obkv. | ||||||
| @@ -310,7 +287,7 @@ impl Transform<'_, '_> { | |||||||
|             } |             } | ||||||
|  |  | ||||||
|             // We extract the user id if we know where it is or generate an UUID V4 otherwise. |             // We extract the user id if we know where it is or generate an UUID V4 otherwise. | ||||||
|             let external_id = match external_id_pos { |             let external_id = match primary_key_pos { | ||||||
|                 Some(pos) => { |                 Some(pos) => { | ||||||
|                     let external_id = &record[pos]; |                     let external_id = &record[pos]; | ||||||
|                     // We validate the document id [a-zA-Z0-9\-_]. |                     // We validate the document id [a-zA-Z0-9\-_]. | ||||||
| @@ -326,7 +303,7 @@ impl Transform<'_, '_> { | |||||||
|             // we return the generated document id instead of the record field. |             // we return the generated document id instead of the record field. | ||||||
|             let iter = fields_ids.iter() |             let iter = fields_ids.iter() | ||||||
|                 .map(|(fi, i)| { |                 .map(|(fi, i)| { | ||||||
|                     let field = if *fi == primary_key_field_id { external_id } else { &record[*i] }; |                     let field = if *fi == primary_key_id { external_id } else { &record[*i] }; | ||||||
|                     (fi, field) |                     (fi, field) | ||||||
|                 }); |                 }); | ||||||
|  |  | ||||||
| @@ -349,9 +326,13 @@ impl Transform<'_, '_> { | |||||||
|  |  | ||||||
|         // Now that we have a valid sorter that contains the user id and the obkv we |         // Now that we have a valid sorter that contains the user id and the obkv we | ||||||
|         // give it to the last transforming function which returns the TransformOutput. |         // give it to the last transforming function which returns the TransformOutput. | ||||||
|  |         let primary_key_name = fields_ids_map | ||||||
|  |             .name(primary_key_id) | ||||||
|  |             .map(String::from) | ||||||
|  |             .expect("Primary key must be present in fields id map"); | ||||||
|         self.output_from_sorter( |         self.output_from_sorter( | ||||||
|             sorter, |             sorter, | ||||||
|             primary_key_field_id, |             primary_key_name, | ||||||
|             fields_ids_map, |             fields_ids_map, | ||||||
|             documents_count, |             documents_count, | ||||||
|             external_documents_ids, |             external_documents_ids, | ||||||
| @@ -365,7 +346,7 @@ impl Transform<'_, '_> { | |||||||
|     fn output_from_sorter<F>( |     fn output_from_sorter<F>( | ||||||
|         self, |         self, | ||||||
|         sorter: grenad::Sorter<MergeFn>, |         sorter: grenad::Sorter<MergeFn>, | ||||||
|         primary_key: FieldId, |         primary_key: String, | ||||||
|         fields_ids_map: FieldsIdsMap, |         fields_ids_map: FieldsIdsMap, | ||||||
|         approximate_number_of_documents: usize, |         approximate_number_of_documents: usize, | ||||||
|         mut external_documents_ids: ExternalDocumentsIds<'_>, |         mut external_documents_ids: ExternalDocumentsIds<'_>, | ||||||
| @@ -477,11 +458,11 @@ impl Transform<'_, '_> { | |||||||
|     // TODO this can be done in parallel by using the rayon `ThreadPool`. |     // TODO this can be done in parallel by using the rayon `ThreadPool`. | ||||||
|     pub fn remap_index_documents( |     pub fn remap_index_documents( | ||||||
|         self, |         self, | ||||||
|         primary_key: FieldId, |         primary_key: String, | ||||||
|         fields_ids_map: FieldsIdsMap, |         old_fields_ids_map: FieldsIdsMap, | ||||||
|  |         new_fields_ids_map: FieldsIdsMap, | ||||||
|     ) -> anyhow::Result<TransformOutput> |     ) -> anyhow::Result<TransformOutput> | ||||||
|     { |     { | ||||||
|         let current_fields_ids_map = self.index.fields_ids_map(self.rtxn)?; |  | ||||||
|         let external_documents_ids = self.index.external_documents_ids(self.rtxn)?; |         let external_documents_ids = self.index.external_documents_ids(self.rtxn)?; | ||||||
|         let documents_ids = self.index.documents_ids(self.rtxn)?; |         let documents_ids = self.index.documents_ids(self.rtxn)?; | ||||||
|         let documents_count = documents_ids.len() as usize; |         let documents_count = documents_ids.len() as usize; | ||||||
| @@ -499,8 +480,8 @@ impl Transform<'_, '_> { | |||||||
|             let mut obkv_writer = obkv::KvWriter::new(&mut obkv_buffer); |             let mut obkv_writer = obkv::KvWriter::new(&mut obkv_buffer); | ||||||
|  |  | ||||||
|             // We iterate over the new `FieldsIdsMap` ids in order and construct the new obkv. |             // We iterate over the new `FieldsIdsMap` ids in order and construct the new obkv. | ||||||
|             for (id, name) in fields_ids_map.iter() { |             for (id, name) in new_fields_ids_map.iter() { | ||||||
|                 if let Some(val) = current_fields_ids_map.id(name).and_then(|id| obkv.get(id)) { |                 if let Some(val) = old_fields_ids_map.id(name).and_then(|id| obkv.get(id)) { | ||||||
|                     obkv_writer.insert(id, val)?; |                     obkv_writer.insert(id, val)?; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| @@ -516,7 +497,7 @@ impl Transform<'_, '_> { | |||||||
|  |  | ||||||
|         Ok(TransformOutput { |         Ok(TransformOutput { | ||||||
|             primary_key, |             primary_key, | ||||||
|             fields_ids_map, |             fields_ids_map: new_fields_ids_map, | ||||||
|             external_documents_ids: external_documents_ids.into_static(), |             external_documents_ids: external_documents_ids.into_static(), | ||||||
|             new_documents_ids: documents_ids, |             new_documents_ids: documents_ids, | ||||||
|             replaced_documents_ids: RoaringBitmap::default(), |             replaced_documents_ids: RoaringBitmap::default(), | ||||||
| @@ -526,6 +507,42 @@ impl Transform<'_, '_> { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// Given an optional primary key and an optional alternative name, returns the (field_id, attr_name) | ||||||
|  | /// for the primary key according to the following rules: | ||||||
|  | /// - if primary_key is `Some`, returns the id and the name, else | ||||||
|  | /// - if alternative_name is Some, adds alternative to the fields_ids_map, and returns the pair, else | ||||||
|  | /// - if autogenerate_docids is true, insert the default id value in the field ids map ("id") and | ||||||
|  | /// returns the pair, else | ||||||
|  | /// - returns an error. | ||||||
|  | fn compute_primary_key_pair( | ||||||
|  |     primary_key: Option<&str>, | ||||||
|  |     fields_ids_map: &mut FieldsIdsMap, | ||||||
|  |     alternative_name: Option<String>, | ||||||
|  |     autogenerate_docids: bool, | ||||||
|  | ) -> anyhow::Result<(FieldId, String)> { | ||||||
|  |     match primary_key { | ||||||
|  |         Some(primary_key) => { | ||||||
|  |             let id = fields_ids_map.id(primary_key).expect("primary key must be present in the fields id map"); | ||||||
|  |             Ok((id, primary_key.to_string())) | ||||||
|  |         } | ||||||
|  |         None => { | ||||||
|  |             let name = match alternative_name { | ||||||
|  |                 Some(key) => key, | ||||||
|  |                 None => { | ||||||
|  |                     if !autogenerate_docids { | ||||||
|  |                         // If there is no primary key in the current document batch, we must | ||||||
|  |                         // return an error and not automatically generate any document id. | ||||||
|  |                         anyhow::bail!("missing primary key") | ||||||
|  |                     } | ||||||
|  |                     DEFAULT_PRIMARY_KEY_NAME.to_string() | ||||||
|  |                 }, | ||||||
|  |             }; | ||||||
|  |             let id = fields_ids_map.insert(&name).context("field id limit reached")?; | ||||||
|  |             Ok((id, name)) | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| /// Only the last value associated with an id is kept. | /// Only the last value associated with an id is kept. | ||||||
| fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> { | fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> { | ||||||
|     obkvs.last().context("no last value").map(|last| last.clone().into_owned()) |     obkvs.last().context("no last value").map(|last| last.clone().into_owned()) | ||||||
| @@ -552,3 +569,73 @@ fn validate_document_id(document_id: &str) -> Option<&str> { | |||||||
|         }) |         }) | ||||||
|     }) |     }) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[cfg(test)] | ||||||
|  | mod test { | ||||||
|  |     use super::*; | ||||||
|  |  | ||||||
|  |     mod compute_primary_key { | ||||||
|  |         use super::compute_primary_key_pair; | ||||||
|  |         use super::FieldsIdsMap; | ||||||
|  |  | ||||||
|  |         #[test] | ||||||
|  |         #[should_panic] | ||||||
|  |         fn should_panic_primary_key_not_in_map() { | ||||||
|  |             let mut fields_map = FieldsIdsMap::new(); | ||||||
|  |             let _result = compute_primary_key_pair( | ||||||
|  |                 Some("toto"), | ||||||
|  |                 &mut fields_map, | ||||||
|  |                 None, | ||||||
|  |                 false); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         #[test] | ||||||
|  |         fn should_return_primary_key_if_is_some() { | ||||||
|  |             let mut fields_map = FieldsIdsMap::new(); | ||||||
|  |             fields_map.insert("toto").unwrap(); | ||||||
|  |             let result = compute_primary_key_pair( | ||||||
|  |                 Some("toto"), | ||||||
|  |                 &mut fields_map, | ||||||
|  |                 Some("tata".to_string()), | ||||||
|  |                 false); | ||||||
|  |             assert_eq!(result.unwrap(), (0u8, "toto".to_string())); | ||||||
|  |             assert_eq!(fields_map.len(), 1); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         #[test] | ||||||
|  |         fn should_return_alternative_if_primary_is_none() { | ||||||
|  |             let mut fields_map = FieldsIdsMap::new(); | ||||||
|  |             let result = compute_primary_key_pair( | ||||||
|  |                 None, | ||||||
|  |                 &mut fields_map, | ||||||
|  |                 Some("tata".to_string()), | ||||||
|  |                 false); | ||||||
|  |             assert_eq!(result.unwrap(), (0u8, "tata".to_string())); | ||||||
|  |             assert_eq!(fields_map.len(), 1); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         #[test] | ||||||
|  |         fn should_return_default_if_both_are_none() { | ||||||
|  |             let mut fields_map = FieldsIdsMap::new(); | ||||||
|  |             let result = compute_primary_key_pair( | ||||||
|  |                 None, | ||||||
|  |                 &mut fields_map, | ||||||
|  |                 None, | ||||||
|  |                 true); | ||||||
|  |             assert_eq!(result.unwrap(), (0u8, "id".to_string())); | ||||||
|  |             assert_eq!(fields_map.len(), 1); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         #[test] | ||||||
|  |         fn should_return_err_if_both_are_none_and_recompute_is_false(){ | ||||||
|  |             let mut fields_map = FieldsIdsMap::new(); | ||||||
|  |             let result = compute_primary_key_pair( | ||||||
|  |                 None, | ||||||
|  |                 &mut fields_map, | ||||||
|  |                 None, | ||||||
|  |                 false); | ||||||
|  |             assert!(result.is_err()); | ||||||
|  |             assert_eq!(fields_map.len(), 0); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -1,14 +1,16 @@ | |||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::str::FromStr; | use std::str::FromStr; | ||||||
|  |  | ||||||
| use anyhow::{ensure, Context}; | use anyhow::Context; | ||||||
| use grenad::CompressionType; | use grenad::CompressionType; | ||||||
|  | use itertools::Itertools; | ||||||
| use rayon::ThreadPool; | use rayon::ThreadPool; | ||||||
|  |  | ||||||
|  | use crate::criterion::Criterion; | ||||||
|  | use crate::facet::FacetType; | ||||||
| use crate::update::index_documents::{Transform, IndexDocumentsMethod}; | use crate::update::index_documents::{Transform, IndexDocumentsMethod}; | ||||||
| use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; | use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; | ||||||
| use crate::facet::FacetType; | use crate::{Index, FieldsIdsMap}; | ||||||
| use crate::{Index, FieldsIdsMap, Criterion}; |  | ||||||
|  |  | ||||||
| pub struct Settings<'a, 't, 'u, 'i> { | pub struct Settings<'a, 't, 'u, 'i> { | ||||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, |     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||||
| @@ -26,7 +28,7 @@ pub struct Settings<'a, 't, 'u, 'i> { | |||||||
|     // however if it is `Some(None)` it means that the user forced a reset of the setting. |     // however if it is `Some(None)` it means that the user forced a reset of the setting. | ||||||
|     searchable_fields: Option<Option<Vec<String>>>, |     searchable_fields: Option<Option<Vec<String>>>, | ||||||
|     displayed_fields: Option<Option<Vec<String>>>, |     displayed_fields: Option<Option<Vec<String>>>, | ||||||
|     faceted_fields: Option<HashMap<String, String>>, |     faceted_fields: Option<Option<HashMap<String, String>>>, | ||||||
|     criteria: Option<Option<Vec<String>>>, |     criteria: Option<Option<Vec<String>>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -67,7 +69,11 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) { |     pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) { | ||||||
|         self.faceted_fields = Some(names_facet_types); |         self.faceted_fields = Some(Some(names_facet_types)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn reset_faceted_fields(&mut self) { | ||||||
|  |         self.faceted_fields = Some(None); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn reset_criteria(&mut self) { |     pub fn reset_criteria(&mut self) { | ||||||
| @@ -78,183 +84,188 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|         self.criteria = Some(Some(criteria)); |         self.criteria = Some(Some(criteria)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()> |     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()> | ||||||
|     where |     where | ||||||
|         F: Fn(UpdateIndexingStep) + Sync |         F: Fn(UpdateIndexingStep) + Sync, | ||||||
|     { |     { | ||||||
|         let mut updated_searchable_fields = None; |  | ||||||
|         let mut updated_faceted_fields = None; |  | ||||||
|         let mut updated_displayed_fields = None; |  | ||||||
|         let mut updated_criteria = None; |  | ||||||
|  |  | ||||||
|         // Construct the new FieldsIdsMap based on the searchable fields order. |  | ||||||
|         let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; |         let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||||
|         let mut fields_ids_map = match self.searchable_fields { |         // if the settings are set before any document update, we don't need to do anything, and | ||||||
|             Some(Some(searchable_fields)) => { |         // will set the primary key during the first document addition. | ||||||
|                 let mut new_fields_ids_map = FieldsIdsMap::new(); |         if self.index.number_of_documents(&self.wtxn)? == 0 { | ||||||
|                 let mut new_searchable_fields = Vec::new(); |             return Ok(()) | ||||||
|  |         } | ||||||
|  |  | ||||||
|                 for name in searchable_fields { |         let transform = Transform { | ||||||
|                     let id = new_fields_ids_map.insert(&name).context("field id limit reached")?; |             rtxn: &self.wtxn, | ||||||
|                     new_searchable_fields.push(id); |             index: self.index, | ||||||
|                 } |             log_every_n: self.log_every_n, | ||||||
|  |             chunk_compression_type: self.chunk_compression_type, | ||||||
|                 for (_, name) in fields_ids_map.iter() { |             chunk_compression_level: self.chunk_compression_level, | ||||||
|                     new_fields_ids_map.insert(name).context("field id limit reached")?; |             chunk_fusing_shrink_size: self.chunk_fusing_shrink_size, | ||||||
|                 } |             max_nb_chunks: self.max_nb_chunks, | ||||||
|  |             max_memory: self.max_memory, | ||||||
|                 updated_searchable_fields = Some(Some(new_searchable_fields)); |             index_documents_method: IndexDocumentsMethod::ReplaceDocuments, | ||||||
|                 new_fields_ids_map |             autogenerate_docids: false, | ||||||
|             }, |  | ||||||
|             Some(None) => { |  | ||||||
|                 updated_searchable_fields = Some(None); |  | ||||||
|                 fields_ids_map |  | ||||||
|             }, |  | ||||||
|             None => fields_ids_map, |  | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         // We compute or generate the new primary key field id. |         // There already has been a document addition, the primary key should be set by now. | ||||||
|         // TODO make the primary key settable. |         let primary_key = self.index.primary_key(&self.wtxn)?.context("Index must have a primary key")?; | ||||||
|         let primary_key = match self.index.primary_key(&self.wtxn)? { |  | ||||||
|             Some(id) => { |  | ||||||
|                 let current_fields_ids_map = self.index.fields_ids_map(self.wtxn)?; |  | ||||||
|                 let name = current_fields_ids_map.name(id).unwrap(); |  | ||||||
|                 fields_ids_map.insert(name).context("field id limit reached")? |  | ||||||
|             }, |  | ||||||
|             None => fields_ids_map.insert("id").context("field id limit reached")?, |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         let current_faceted_fields = self.index.faceted_fields(self.wtxn)?; |         // We remap the documents fields based on the new `FieldsIdsMap`. | ||||||
|         if let Some(fields_names_facet_types) = self.faceted_fields { |         let output = transform.remap_index_documents( | ||||||
|             let mut faceted_fields = HashMap::new(); |             primary_key.to_string(), | ||||||
|             for (name, sftype) in fields_names_facet_types { |             old_fields_ids_map, | ||||||
|                 let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?; |             fields_ids_map.clone())?; | ||||||
|                 let id = fields_ids_map.insert(&name).context("field id limit reached")?; |  | ||||||
|                 match current_faceted_fields.get(&id) { |  | ||||||
|                     Some(pftype) => { |  | ||||||
|                         ensure!(ftype == *pftype, "{} facet type changed from {} to {}", name, ftype, pftype); |  | ||||||
|                         faceted_fields.insert(id, ftype) |  | ||||||
|                     }, |  | ||||||
|                     None => faceted_fields.insert(id, ftype), |  | ||||||
|                 }; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             updated_faceted_fields = Some(faceted_fields); |         // We clear the full database (words-fst, documents ids and documents content). | ||||||
|         } |         ClearDocuments::new(self.wtxn, self.index).execute()?; | ||||||
|  |  | ||||||
|         // Check that the displayed attributes have been specified. |  | ||||||
|         if let Some(value) = self.displayed_fields { |  | ||||||
|             match value { |  | ||||||
|                 Some(names) => { |  | ||||||
|                     let mut new_displayed_fields = Vec::new(); |  | ||||||
|                     for name in names { |  | ||||||
|                         let id = fields_ids_map.insert(&name).context("field id limit reached")?; |  | ||||||
|                         new_displayed_fields.push(id); |  | ||||||
|                     } |  | ||||||
|                     updated_displayed_fields = Some(Some(new_displayed_fields)); |  | ||||||
|                 } |  | ||||||
|                 None => updated_displayed_fields = Some(None), |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if let Some(criteria) = self.criteria { |  | ||||||
|             match criteria { |  | ||||||
|                 Some(criteria_names) => { |  | ||||||
|                     let mut new_criteria = Vec::new(); |  | ||||||
|                     for name in criteria_names { |  | ||||||
|                         let criterion = Criterion::from_str(&mut fields_ids_map, &name)?; |  | ||||||
|                         if let Some(fid) = criterion.field_id() { |  | ||||||
|                             let name = fields_ids_map.name(fid).unwrap(); |  | ||||||
|                             let faceted_fields = updated_faceted_fields.as_ref().unwrap_or(¤t_faceted_fields); |  | ||||||
|                             ensure!(faceted_fields.contains_key(&fid), "criterion field {} must be faceted", name); |  | ||||||
|                         } |  | ||||||
|                         new_criteria.push(criterion); |  | ||||||
|                     } |  | ||||||
|                     updated_criteria = Some(Some(new_criteria)); |  | ||||||
|                 }, |  | ||||||
|                 None => updated_criteria = Some(None), |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If any setting have modified any of the datastructures it means that we need |  | ||||||
|         // to retrieve the documents and then reindex then with the new settings. |  | ||||||
|         if updated_searchable_fields.is_some() || updated_faceted_fields.is_some() { |  | ||||||
|             let transform = Transform { |  | ||||||
|                 rtxn: &self.wtxn, |  | ||||||
|                 index: self.index, |  | ||||||
|                 log_every_n: self.log_every_n, |  | ||||||
|                 chunk_compression_type: self.chunk_compression_type, |  | ||||||
|                 chunk_compression_level: self.chunk_compression_level, |  | ||||||
|                 chunk_fusing_shrink_size: self.chunk_fusing_shrink_size, |  | ||||||
|                 max_nb_chunks: self.max_nb_chunks, |  | ||||||
|                 max_memory: self.max_memory, |  | ||||||
|                 index_documents_method: IndexDocumentsMethod::ReplaceDocuments, |  | ||||||
|                 autogenerate_docids: false, |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             // We remap the documents fields based on the new `FieldsIdsMap`. |  | ||||||
|             let output = transform.remap_index_documents(primary_key, fields_ids_map.clone())?; |  | ||||||
|  |  | ||||||
|             // We write the new FieldsIdsMap to the database |  | ||||||
|             // this way next indexing methods will be based on that. |  | ||||||
|             self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; |  | ||||||
|  |  | ||||||
|             if let Some(faceted_fields) = updated_faceted_fields { |  | ||||||
|                 // We write the faceted_fields fields into the database here. |  | ||||||
|                 self.index.put_faceted_fields(self.wtxn, &faceted_fields)?; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if let Some(searchable_fields) = updated_searchable_fields { |  | ||||||
|                 // The new searchable fields are also written down to make sure |  | ||||||
|                 // that the IndexDocuments system takes only these ones into account. |  | ||||||
|                 match searchable_fields { |  | ||||||
|                     Some(fields) => self.index.put_searchable_fields(self.wtxn, &fields)?, |  | ||||||
|                     None => self.index.delete_searchable_fields(self.wtxn).map(drop)?, |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // We clear the full database (words-fst, documents ids and documents content). |  | ||||||
|             ClearDocuments::new(self.wtxn, self.index).execute()?; |  | ||||||
|  |  | ||||||
|             // We index the generated `TransformOutput` which must contain |  | ||||||
|             // all the documents with fields in the newly defined searchable order. |  | ||||||
|             let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index); |  | ||||||
|             indexing_builder.log_every_n = self.log_every_n; |  | ||||||
|             indexing_builder.max_nb_chunks = self.max_nb_chunks; |  | ||||||
|             indexing_builder.max_memory = self.max_memory; |  | ||||||
|             indexing_builder.linked_hash_map_size = self.linked_hash_map_size; |  | ||||||
|             indexing_builder.chunk_compression_type = self.chunk_compression_type; |  | ||||||
|             indexing_builder.chunk_compression_level = self.chunk_compression_level; |  | ||||||
|             indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size; |  | ||||||
|             indexing_builder.thread_pool = self.thread_pool; |  | ||||||
|             indexing_builder.execute_raw(output, &progress_callback)?; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if let Some(displayed_fields) = updated_displayed_fields { |  | ||||||
|             match displayed_fields { |  | ||||||
|                 Some(fields) => self.index.put_displayed_fields(self.wtxn, &fields)?, |  | ||||||
|                 None => self.index.delete_displayed_fields(self.wtxn).map(drop)?, |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if let Some(criteria) = updated_criteria { |  | ||||||
|             match criteria { |  | ||||||
|                 Some(criteria) => self.index.put_criteria(self.wtxn, &criteria)?, |  | ||||||
|                 None => self.index.delete_criteria(self.wtxn).map(drop)?, |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|  |         // We index the generated `TransformOutput` which must contain | ||||||
|  |         // all the documents with fields in the newly defined searchable order. | ||||||
|  |         let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index); | ||||||
|  |         indexing_builder.log_every_n = self.log_every_n; | ||||||
|  |         indexing_builder.max_nb_chunks = self.max_nb_chunks; | ||||||
|  |         indexing_builder.max_memory = self.max_memory; | ||||||
|  |         indexing_builder.linked_hash_map_size = self.linked_hash_map_size; | ||||||
|  |         indexing_builder.chunk_compression_type = self.chunk_compression_type; | ||||||
|  |         indexing_builder.chunk_compression_level = self.chunk_compression_level; | ||||||
|  |         indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size; | ||||||
|  |         indexing_builder.thread_pool = self.thread_pool; | ||||||
|  |         indexing_builder.execute_raw(output, &cb)?; | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn update_displayed(&mut self) -> anyhow::Result<bool> { | ||||||
|  |         match self.displayed_fields { | ||||||
|  |             Some(Some(ref fields)) => { | ||||||
|  |                 let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||||
|  |                 // fields are deduplicated, only the first occurrence is taken into account | ||||||
|  |                 let names: Vec<_> = fields | ||||||
|  |                     .iter() | ||||||
|  |                     .unique() | ||||||
|  |                     .map(String::as_str) | ||||||
|  |                     .collect(); | ||||||
|  |  | ||||||
|  |                 for name in names.iter() { | ||||||
|  |                     fields_ids_map | ||||||
|  |                         .insert(name) | ||||||
|  |                         .context("field id limit exceeded")?; | ||||||
|  |                 } | ||||||
|  |                 self.index.put_displayed_fields(self.wtxn, &names)?; | ||||||
|  |                 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; | ||||||
|  |             } | ||||||
|  |             Some(None) => { self.index.delete_displayed_fields(self.wtxn)?; }, | ||||||
|  |             None => return Ok(false), | ||||||
|  |         } | ||||||
|  |         Ok(true) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Udpates the index's searchable attributes. This causes the field map to be recomputed to | ||||||
|  |     /// reflect the order of the searchable attributes. | ||||||
|  |     fn update_searchable(&mut self) -> anyhow::Result<bool> { | ||||||
|  |         match self.searchable_fields { | ||||||
|  |             Some(Some(ref fields)) => { | ||||||
|  |                 // every time the searchable attributes are updated, we need to update the | ||||||
|  |                 // ids for any settings that uses the facets. (displayed_fields, | ||||||
|  |                 // faceted_fields) | ||||||
|  |                 let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||||
|  |  | ||||||
|  |                 let mut new_fields_ids_map = FieldsIdsMap::new(); | ||||||
|  |                 // fields are deduplicated, only the first occurrence is taken into account | ||||||
|  |                 let names = fields | ||||||
|  |                     .iter() | ||||||
|  |                     .unique() | ||||||
|  |                     .map(String::as_str) | ||||||
|  |                     .collect::<Vec<_>>(); | ||||||
|  |  | ||||||
|  |                 // Add all the searchable attributes to the field map, and then add the | ||||||
|  |                 // remaining fields from the old field map to the new one | ||||||
|  |                 for name in names.iter() { | ||||||
|  |                     new_fields_ids_map | ||||||
|  |                         .insert(&name) | ||||||
|  |                         .context("field id limit exceeded")?; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 for (_, name) in old_fields_ids_map.iter() { | ||||||
|  |                     new_fields_ids_map | ||||||
|  |                         .insert(&name) | ||||||
|  |                         .context("field id limit exceeded")?; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 self.index.put_searchable_fields(self.wtxn, &names)?; | ||||||
|  |                 self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?; | ||||||
|  |             } | ||||||
|  |             Some(None) => { self.index.delete_searchable_fields(self.wtxn)?; }, | ||||||
|  |             None => return Ok(false), | ||||||
|  |         } | ||||||
|  |         Ok(true) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn update_facets(&mut self) -> anyhow::Result<bool> { | ||||||
|  |         match self.faceted_fields { | ||||||
|  |             Some(Some(ref fields)) => { | ||||||
|  |                 let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||||
|  |                 let mut new_facets = HashMap::new(); | ||||||
|  |                 for (name, ty) in fields { | ||||||
|  |                     fields_ids_map.insert(name).context("field id limit exceeded")?; | ||||||
|  |                     let ty = FacetType::from_str(&ty)?; | ||||||
|  |                     new_facets.insert(name.clone(), ty); | ||||||
|  |                 } | ||||||
|  |                 self.index.put_faceted_fields(self.wtxn, &new_facets)?; | ||||||
|  |                 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; | ||||||
|  |             } | ||||||
|  |             Some(None) => { self.index.delete_faceted_fields(self.wtxn)?; }, | ||||||
|  |             None => return Ok(false) | ||||||
|  |         } | ||||||
|  |         Ok(true) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn update_criteria(&mut self) -> anyhow::Result<()> { | ||||||
|  |         match self.criteria { | ||||||
|  |             Some(Some(ref fields)) => { | ||||||
|  |                 let faceted_fields = self.index.faceted_fields(&self.wtxn)?; | ||||||
|  |                 let mut new_criteria = Vec::new(); | ||||||
|  |                 for name in fields { | ||||||
|  |                     let criterion = Criterion::from_str(&faceted_fields, &name)?; | ||||||
|  |                     new_criteria.push(criterion); | ||||||
|  |                 } | ||||||
|  |                 self.index.put_criteria(self.wtxn, &new_criteria)?; | ||||||
|  |             } | ||||||
|  |             Some(None) => { self.index.delete_criteria(self.wtxn)?; } | ||||||
|  |             None => (), | ||||||
|  |         } | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()> | ||||||
|  |     where | ||||||
|  |         F: Fn(UpdateIndexingStep) + Sync | ||||||
|  |         { | ||||||
|  |             let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?; | ||||||
|  |             self.update_displayed()?; | ||||||
|  |             let facets_updated = self.update_facets()?; | ||||||
|  |             // update_criteria MUST be called after update_facets, since criterion fields must be set | ||||||
|  |             // as facets. | ||||||
|  |             self.update_criteria()?; | ||||||
|  |             let searchable_updated = self.update_searchable()?; | ||||||
|  |  | ||||||
|  |             if facets_updated || searchable_updated { | ||||||
|  |                 self.reindex(&progress_callback, old_fields_ids_map)?; | ||||||
|  |             } | ||||||
|  |             Ok(()) | ||||||
|  |         } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| mod tests { | mod tests { | ||||||
|     use super::*; |     use super::*; | ||||||
|     use crate::update::{IndexDocuments, UpdateFormat}; |  | ||||||
|     use heed::EnvOpenOptions; |     use heed::EnvOpenOptions; | ||||||
|     use maplit::hashmap; |     use maplit::hashmap; | ||||||
|  |  | ||||||
|  |     use crate::facet::FacetType; | ||||||
|  |     use crate::update::{IndexDocuments, UpdateFormat}; | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn set_and_reset_searchable_fields() { |     fn set_and_reset_searchable_fields() { | ||||||
|         let path = tempfile::tempdir().unwrap(); |         let path = tempfile::tempdir().unwrap(); | ||||||
| @@ -336,10 +347,8 @@ mod tests { | |||||||
|  |  | ||||||
|         // Check that the displayed fields are correctly set to `None` (default value). |         // Check that the displayed fields are correctly set to `None` (default value). | ||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); |  | ||||||
|         let fields_ids = index.displayed_fields(&rtxn).unwrap(); |         let fields_ids = index.displayed_fields(&rtxn).unwrap(); | ||||||
|         let age_id = fields_ids_map.id("age").unwrap(); |         assert_eq!(fields_ids.unwrap(), (&["age"][..])); | ||||||
|         assert_eq!(fields_ids, Some(&[age_id][..])); |  | ||||||
|         drop(rtxn); |         drop(rtxn); | ||||||
|  |  | ||||||
|         // We change the searchable fields to be the "name" field only. |         // We change the searchable fields to be the "name" field only. | ||||||
| @@ -351,10 +360,8 @@ mod tests { | |||||||
|  |  | ||||||
|         // Check that the displayed fields always contains only the "age" field. |         // Check that the displayed fields always contains only the "age" field. | ||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); |  | ||||||
|         let fields_ids = index.displayed_fields(&rtxn).unwrap(); |         let fields_ids = index.displayed_fields(&rtxn).unwrap(); | ||||||
|         let age_id = fields_ids_map.id("age").unwrap(); |         assert_eq!(fields_ids.unwrap(), &["age"][..]); | ||||||
|         assert_eq!(fields_ids, Some(&[age_id][..])); |  | ||||||
|         drop(rtxn); |         drop(rtxn); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -402,10 +409,8 @@ mod tests { | |||||||
|  |  | ||||||
|         // Check that the displayed fields are correctly set to only the "age" field. |         // Check that the displayed fields are correctly set to only the "age" field. | ||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); |  | ||||||
|         let age_field_id = fields_ids_map.id("age").unwrap(); |  | ||||||
|         let fields_ids = index.displayed_fields(&rtxn).unwrap(); |         let fields_ids = index.displayed_fields(&rtxn).unwrap(); | ||||||
|         assert_eq!(fields_ids.unwrap(), &[age_field_id][..]); |         assert_eq!(fields_ids.unwrap(), &["age"][..]); | ||||||
|         drop(rtxn); |         drop(rtxn); | ||||||
|  |  | ||||||
|         // We reset the fields ids to become `None`, the default value. |         // We reset the fields ids to become `None`, the default value. | ||||||
| @@ -445,9 +450,9 @@ mod tests { | |||||||
|         // Check that the displayed fields are correctly set. |         // Check that the displayed fields are correctly set. | ||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         let fields_ids = index.faceted_fields(&rtxn).unwrap(); |         let fields_ids = index.faceted_fields(&rtxn).unwrap(); | ||||||
|         assert_eq!(fields_ids, hashmap!{ 1 => FacetType::Integer }); |         assert_eq!(fields_ids, hashmap!{ "age".to_string() => FacetType::Integer }); | ||||||
|         // Only count the field_id 0 and level 0 facet values. |         // Only count the field_id 0 and level 0 facet values. | ||||||
|         let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[1, 0]).unwrap().count(); |         let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count(); | ||||||
|         assert_eq!(count, 3); |         assert_eq!(count, 3); | ||||||
|         drop(rtxn); |         drop(rtxn); | ||||||
|  |  | ||||||
| @@ -461,8 +466,49 @@ mod tests { | |||||||
|  |  | ||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         // Only count the field_id 0 and level 0 facet values. |         // Only count the field_id 0 and level 0 facet values. | ||||||
|         let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[1, 0]).unwrap().count(); |         let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count(); | ||||||
|         assert_eq!(count, 4); |         assert_eq!(count, 4); | ||||||
|         drop(rtxn); |         drop(rtxn); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn setting_searchable_recomputes_other_settings() { | ||||||
|  |         let path = tempfile::tempdir().unwrap(); | ||||||
|  |         let mut options = EnvOpenOptions::new(); | ||||||
|  |         options.map_size(10 * 1024 * 1024); // 10 MB | ||||||
|  |         let index = Index::new(options, &path).unwrap(); | ||||||
|  |  | ||||||
|  |         // Set all the settings except searchable | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |         let mut builder = Settings::new(&mut wtxn, &index); | ||||||
|  |         builder.set_displayed_fields(vec!["hello".to_string()]); | ||||||
|  |         builder.set_faceted_fields(hashmap!{ | ||||||
|  |             "age".into() => "integer".into(), | ||||||
|  |             "toto".into() => "integer".into(), | ||||||
|  |         }); | ||||||
|  |         builder.set_criteria(vec!["asc(toto)".to_string()]); | ||||||
|  |         builder.execute(|_| ()).unwrap(); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         // check the output | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); | ||||||
|  |         // since no documents have been pushed the primary key is still unset | ||||||
|  |         assert!(index.primary_key(&rtxn).unwrap().is_none()); | ||||||
|  |         assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); | ||||||
|  |         drop(rtxn); | ||||||
|  |  | ||||||
|  |         // We set toto and age as searchable to force reordering of the fields | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |         let mut builder = Settings::new(&mut wtxn, &index); | ||||||
|  |         builder.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]); | ||||||
|  |         builder.execute(|_| ()).unwrap(); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); | ||||||
|  |         assert!(index.primary_key(&rtxn).unwrap().is_none()); | ||||||
|  |         assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); | ||||||
|  |         drop(rtxn); | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user