mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Merge #3461
3461: Bring v1 changes into main r=curquiza a=Kerollmops Also bring back changes in milli (the remote repository) into main done during the pre-release Co-authored-by: LoĂŻc Lecrenier <loic.lecrenier@me.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: curquiza <curquiza@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Philipp Ahlner <philipp@ahlner.com> Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
		| @@ -1,5 +1,6 @@ | ||||
| use std::collections::BTreeSet; | ||||
| use std::convert::Infallible; | ||||
| use std::fmt::Write; | ||||
| use std::{io, str}; | ||||
|  | ||||
| use heed::{Error as HeedError, MdbError}; | ||||
| @@ -100,10 +101,11 @@ A document identifier can be of type integer or string, \ | ||||
| only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", .document_id.to_string() | ||||
|     )] | ||||
|     InvalidDocumentId { document_id: Value }, | ||||
|     #[error("Invalid facet distribution, the fields `{}` are not set as filterable.", | ||||
|         .invalid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ") | ||||
|      )] | ||||
|     InvalidFacetsDistribution { invalid_facets_name: BTreeSet<String> }, | ||||
|     #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))] | ||||
|     InvalidFacetsDistribution { | ||||
|         invalid_facets_name: BTreeSet<String>, | ||||
|         valid_facets_name: BTreeSet<String>, | ||||
|     }, | ||||
|     #[error(transparent)] | ||||
|     InvalidGeoField(#[from] GeoError), | ||||
|     #[error("{0}")] | ||||
| @@ -152,6 +154,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco | ||||
| pub enum GeoError { | ||||
|     #[error("The `_geo` field in the document with the id: `{document_id}` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `{value}`.")] | ||||
|     NotAnObject { document_id: Value, value: Value }, | ||||
|     #[error("The `_geo` field in the document with the id: `{document_id}` contains the following unexpected fields: `{value}`.")] | ||||
|     UnexpectedExtraFields { document_id: Value, value: Value }, | ||||
|     #[error("Could not find latitude nor longitude in the document with the id: `{document_id}`. Was expecting `_geo.lat` and `_geo.lng` fields.")] | ||||
|     MissingLatitudeAndLongitude { document_id: Value }, | ||||
|     #[error("Could not find latitude in the document with the id: `{document_id}`. Was expecting a `_geo.lat` field.")] | ||||
| @@ -166,6 +170,50 @@ pub enum GeoError { | ||||
|     BadLongitude { document_id: Value, value: Value }, | ||||
| } | ||||
|  | ||||
| fn format_invalid_filter_distribution( | ||||
|     invalid_facets_name: &BTreeSet<String>, | ||||
|     valid_facets_name: &BTreeSet<String>, | ||||
| ) -> String { | ||||
|     if valid_facets_name.is_empty() { | ||||
|         return "this index does not have configured filterable attributes.".into(); | ||||
|     } | ||||
|  | ||||
|     let mut result = String::new(); | ||||
|  | ||||
|     match invalid_facets_name.len() { | ||||
|         0 => (), | ||||
|         1 => write!( | ||||
|             result, | ||||
|             "attribute `{}` is not filterable.", | ||||
|             invalid_facets_name.first().unwrap() | ||||
|         ) | ||||
|         .unwrap(), | ||||
|         _ => write!( | ||||
|             result, | ||||
|             "attributes `{}` are not filterable.", | ||||
|             invalid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ") | ||||
|         ) | ||||
|         .unwrap(), | ||||
|     }; | ||||
|  | ||||
|     match valid_facets_name.len() { | ||||
|         1 => write!( | ||||
|             result, | ||||
|             " The available filterable attribute is `{}`.", | ||||
|             valid_facets_name.first().unwrap() | ||||
|         ) | ||||
|         .unwrap(), | ||||
|         _ => write!( | ||||
|             result, | ||||
|             " The available filterable attributes are `{}`.", | ||||
|             valid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ") | ||||
|         ) | ||||
|         .unwrap(), | ||||
|     } | ||||
|  | ||||
|     result | ||||
| } | ||||
|  | ||||
| /// A little macro helper to autogenerate From implementation that needs two `Into`. | ||||
| /// Given the following parameters: `error_from_sub_error!(FieldIdMapMissingEntry => InternalError)` | ||||
| /// the macro will create the following code: | ||||
|   | ||||
| @@ -426,7 +426,7 @@ impl Index { | ||||
|     } | ||||
|  | ||||
|     /// Returns the `rtree` which associates coordinates to documents ids. | ||||
|     pub fn geo_rtree(&self, rtxn: &'_ RoTxn) -> Result<Option<RTree<GeoPoint>>> { | ||||
|     pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result<Option<RTree<GeoPoint>>> { | ||||
|         match self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeBincode<RTree<GeoPoint>>>(rtxn, main_key::GEO_RTREE_KEY)? | ||||
| @@ -2394,4 +2394,69 @@ pub(crate) mod tests { | ||||
|             assert!(all_ids.insert(id)); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn bug_3007() { | ||||
|         // https://github.com/meilisearch/meilisearch/issues/3007 | ||||
|  | ||||
|         use crate::error::{GeoError, UserError}; | ||||
|         let index = TempIndex::new(); | ||||
|  | ||||
|         // Given is an index with a geo field NOT contained in the sortable_fields of the settings | ||||
|         index | ||||
|             .update_settings(|settings| { | ||||
|                 settings.set_primary_key("id".to_string()); | ||||
|                 settings.set_filterable_fields(HashSet::from(["_geo".to_string()])); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|  | ||||
|         // happy path | ||||
|         index.add_documents(documents!({ "id" : 5, "_geo": {"lat": 12.0, "lng": 11.0}})).unwrap(); | ||||
|  | ||||
|         db_snap!(index, geo_faceted_documents_ids); | ||||
|  | ||||
|         // both are unparseable, we expect GeoError::BadLatitudeAndLongitude | ||||
|         let err1 = index | ||||
|             .add_documents( | ||||
|                 documents!({ "id" : 6, "_geo": {"lat": "unparseable", "lng": "unparseable"}}), | ||||
|             ) | ||||
|             .unwrap_err(); | ||||
|         assert!(matches!( | ||||
|             err1, | ||||
|             Error::UserError(UserError::InvalidGeoField(GeoError::BadLatitudeAndLongitude { .. })) | ||||
|         )); | ||||
|  | ||||
|         db_snap!(index, geo_faceted_documents_ids); // ensure that no more document was inserted | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn unexpected_extra_fields_in_geo_field() { | ||||
|         let index = TempIndex::new(); | ||||
|  | ||||
|         index | ||||
|             .update_settings(|settings| { | ||||
|                 settings.set_primary_key("id".to_string()); | ||||
|                 settings.set_filterable_fields(HashSet::from(["_geo".to_string()])); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|  | ||||
|         let err = index | ||||
|             .add_documents( | ||||
|                 documents!({ "id" : "doggo", "_geo": { "lat": 1, "lng": 2, "doggo": "are the best" }}), | ||||
|             ) | ||||
|             .unwrap_err(); | ||||
|         insta::assert_display_snapshot!(err, @r###"The `_geo` field in the document with the id: `"\"doggo\""` contains the following unexpected fields: `{"doggo":"are the best"}`."###); | ||||
|  | ||||
|         db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted | ||||
|  | ||||
|         // multiple fields and complex values | ||||
|         let err = index | ||||
|             .add_documents( | ||||
|                 documents!({ "id" : "doggo", "_geo": { "lat": 1, "lng": 2, "doggo": "are the best", "and": { "all": ["cats", { "are": "beautiful" } ] } } }), | ||||
|             ) | ||||
|             .unwrap_err(); | ||||
|         insta::assert_display_snapshot!(err, @r###"The `_geo` field in the document with the id: `"\"doggo\""` contains the following unexpected fields: `{"and":{"all":["cats",{"are":"beautiful"}]},"doggo":"are the best"}`."###); | ||||
|  | ||||
|         db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -183,14 +183,14 @@ impl<'t> Criterion for Proximity<'t> { | ||||
| } | ||||
|  | ||||
| fn resolve_candidates( | ||||
|     ctx: &'_ dyn Context, | ||||
|     ctx: &dyn Context, | ||||
|     query_tree: &Operation, | ||||
|     proximity: u8, | ||||
|     cache: &mut Cache, | ||||
|     wdcache: &mut WordDerivationsCache, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     fn resolve_operation( | ||||
|         ctx: &'_ dyn Context, | ||||
|         ctx: &dyn Context, | ||||
|         query_tree: &Operation, | ||||
|         proximity: u8, | ||||
|         cache: &mut Cache, | ||||
| @@ -244,7 +244,7 @@ fn resolve_candidates( | ||||
|     } | ||||
|  | ||||
|     fn mdfs_pair( | ||||
|         ctx: &'_ dyn Context, | ||||
|         ctx: &dyn Context, | ||||
|         left: &Operation, | ||||
|         right: &Operation, | ||||
|         proximity: u8, | ||||
| @@ -299,7 +299,7 @@ fn resolve_candidates( | ||||
|     } | ||||
|  | ||||
|     fn mdfs( | ||||
|         ctx: &'_ dyn Context, | ||||
|         ctx: &dyn Context, | ||||
|         branches: &[Operation], | ||||
|         proximity: u8, | ||||
|         cache: &mut Cache, | ||||
|   | ||||
| @@ -240,14 +240,14 @@ fn alterate_query_tree( | ||||
| } | ||||
|  | ||||
| fn resolve_candidates( | ||||
|     ctx: &'_ dyn Context, | ||||
|     ctx: &dyn Context, | ||||
|     query_tree: &Operation, | ||||
|     number_typos: u8, | ||||
|     cache: &mut HashMap<(Operation, u8), RoaringBitmap>, | ||||
|     wdcache: &mut WordDerivationsCache, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     fn resolve_operation( | ||||
|         ctx: &'_ dyn Context, | ||||
|         ctx: &dyn Context, | ||||
|         query_tree: &Operation, | ||||
|         number_typos: u8, | ||||
|         cache: &mut HashMap<(Operation, u8), RoaringBitmap>, | ||||
| @@ -277,7 +277,7 @@ fn resolve_candidates( | ||||
|     } | ||||
|  | ||||
|     fn mdfs( | ||||
|         ctx: &'_ dyn Context, | ||||
|         ctx: &dyn Context, | ||||
|         branches: &[Operation], | ||||
|         mana: u8, | ||||
|         cache: &mut HashMap<(Operation, u8), RoaringBitmap>, | ||||
|   | ||||
| @@ -291,6 +291,7 @@ impl<'a> FacetDistribution<'a> { | ||||
|                 if !invalid_fields.is_empty() { | ||||
|                     return Err(UserError::InvalidFacetsDistribution { | ||||
|                         invalid_facets_name: invalid_fields.into_iter().cloned().collect(), | ||||
|                         valid_facets_name: filterable_fields.into_iter().collect(), | ||||
|                     } | ||||
|                     .into()); | ||||
|                 } else { | ||||
|   | ||||
| @@ -0,0 +1,4 @@ | ||||
| --- | ||||
| source: milli/src/index.rs | ||||
| --- | ||||
| [0, ] | ||||
| @@ -0,0 +1,4 @@ | ||||
| --- | ||||
| source: milli/src/index.rs | ||||
| --- | ||||
| [] | ||||
| @@ -575,8 +575,8 @@ fn remove_from_word_docids( | ||||
| } | ||||
|  | ||||
| fn remove_docids_from_field_id_docid_facet_value( | ||||
|     index: &'_ Index, | ||||
|     wtxn: &'_ mut heed::RwTxn, | ||||
|     index: &Index, | ||||
|     wtxn: &mut heed::RwTxn, | ||||
|     facet_type: FacetType, | ||||
|     field_id: FieldId, | ||||
|     to_remove: &RoaringBitmap, | ||||
|   | ||||
| @@ -159,7 +159,7 @@ impl FacetsUpdateIncrementalInner { | ||||
|     /// See documentation of `insert_in_level` | ||||
|     fn insert_in_level_0( | ||||
|         &self, | ||||
|         txn: &'_ mut RwTxn, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         facet_value: &[u8], | ||||
|         docids: &RoaringBitmap, | ||||
| @@ -213,7 +213,7 @@ impl FacetsUpdateIncrementalInner { | ||||
|     /// of the parent node should be incremented. | ||||
|     fn insert_in_level( | ||||
|         &self, | ||||
|         txn: &'_ mut RwTxn, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         level: u8, | ||||
|         facet_value: &[u8], | ||||
| @@ -350,7 +350,7 @@ impl FacetsUpdateIncrementalInner { | ||||
|     /// Insert the given facet value and corresponding document ids in the database. | ||||
|     pub fn insert( | ||||
|         &self, | ||||
|         txn: &'_ mut RwTxn, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         facet_value: &[u8], | ||||
|         docids: &RoaringBitmap, | ||||
| @@ -472,7 +472,7 @@ impl FacetsUpdateIncrementalInner { | ||||
|     /// its left bound as well. | ||||
|     fn delete_in_level( | ||||
|         &self, | ||||
|         txn: &'_ mut RwTxn, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         level: u8, | ||||
|         facet_value: &[u8], | ||||
| @@ -531,7 +531,7 @@ impl FacetsUpdateIncrementalInner { | ||||
|  | ||||
|     fn delete_in_level_0( | ||||
|         &self, | ||||
|         txn: &'_ mut RwTxn, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         facet_value: &[u8], | ||||
|         docids: &RoaringBitmap, | ||||
| @@ -559,7 +559,7 @@ impl FacetsUpdateIncrementalInner { | ||||
|  | ||||
|     pub fn delete( | ||||
|         &self, | ||||
|         txn: &'_ mut RwTxn, | ||||
|         txn: &mut RwTxn, | ||||
|         field_id: u16, | ||||
|         facet_value: &[u8], | ||||
|         docids: &RoaringBitmap, | ||||
|   | ||||
| @@ -98,7 +98,12 @@ pub fn enrich_documents_batch<R: Read + Seek>( | ||||
|     // If the settings specifies that a _geo field must be used therefore we must check the | ||||
|     // validity of it in all the documents of this batch and this is when we return `Some`. | ||||
|     let geo_field_id = match documents_batch_index.id("_geo") { | ||||
|         Some(geo_field_id) if index.sortable_fields(rtxn)?.contains("_geo") => Some(geo_field_id), | ||||
|         Some(geo_field_id) | ||||
|             if index.sortable_fields(rtxn)?.contains("_geo") | ||||
|                 || index.filterable_fields(rtxn)?.contains("_geo") => | ||||
|         { | ||||
|             Some(geo_field_id) | ||||
|         } | ||||
|         _otherwise => None, | ||||
|     }; | ||||
|  | ||||
| @@ -367,11 +372,17 @@ pub fn extract_finite_float_from_value(value: Value) -> StdResult<f64, Value> { | ||||
|  | ||||
| pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result<StdResult<(), GeoError>> { | ||||
|     use GeoError::*; | ||||
|     let debug_id = || Value::from(id.debug()); | ||||
|     let debug_id = || { | ||||
|         serde_json::from_slice(id.value().as_bytes()).unwrap_or_else(|_| Value::from(id.debug())) | ||||
|     }; | ||||
|     match serde_json::from_slice(bytes).map_err(InternalError::SerdeJson)? { | ||||
|         Value::Object(mut object) => match (object.remove("lat"), object.remove("lng")) { | ||||
|             (Some(lat), Some(lng)) => { | ||||
|                 match (extract_finite_float_from_value(lat), extract_finite_float_from_value(lng)) { | ||||
|                     (Ok(_), Ok(_)) if !object.is_empty() => Ok(Err(UnexpectedExtraFields { | ||||
|                         document_id: debug_id(), | ||||
|                         value: object.into(), | ||||
|                     })), | ||||
|                     (Ok(_), Ok(_)) => Ok(Ok(())), | ||||
|                     (Err(value), Ok(_)) => Ok(Err(BadLatitude { document_id: debug_id(), value })), | ||||
|                     (Ok(_), Err(value)) => Ok(Err(BadLongitude { document_id: debug_id(), value })), | ||||
|   | ||||
| @@ -965,34 +965,6 @@ mod tests { | ||||
|             .unwrap(); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn index_all_flavour_of_geo() { | ||||
|         let mut index = TempIndex::new(); | ||||
|         index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments; | ||||
|  | ||||
|         index | ||||
|             .update_settings(|settings| { | ||||
|                 settings.set_filterable_fields(hashset!(S("_geo"))); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|  | ||||
|         index | ||||
|             .add_documents(documents!([ | ||||
|               { "id": 0, "_geo": { "lat": 31, "lng": [42] } }, | ||||
|               { "id": 1, "_geo": { "lat": "31" }, "_geo.lng": 42 }, | ||||
|               { "id": 2, "_geo": { "lng": "42" }, "_geo.lat": "31" }, | ||||
|               { "id": 3, "_geo.lat": 31, "_geo.lng": "42" }, | ||||
|             ])) | ||||
|             .unwrap(); | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|         let mut search = crate::Search::new(&rtxn, &index); | ||||
|         search.filter(crate::Filter::from_str("_geoRadius(31, 42, 0.000001)").unwrap().unwrap()); | ||||
|         let crate::SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||
|         assert_eq!(documents_ids, vec![0, 1, 2, 3]); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn geo_error() { | ||||
|         let mut index = TempIndex::new(); | ||||
|   | ||||
| @@ -37,9 +37,6 @@ where | ||||
|             _ => T::deserialize_from_value(value, location).map(Setting::Set), | ||||
|         } | ||||
|     } | ||||
|     fn default() -> Option<Self> { | ||||
|         Some(Self::NotSet) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<T> Default for Setting<T> { | ||||
|   | ||||
| @@ -140,16 +140,20 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> { | ||||
|  | ||||
|         // We remove all the entries that are no more required in this word prefix position | ||||
|         // docids database. | ||||
|         let mut iter = | ||||
|             self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data(); | ||||
|         while let Some(((prefix, _), _)) = iter.next().transpose()? { | ||||
|             if del_prefix_fst_words.contains(prefix.as_bytes()) { | ||||
|                 unsafe { iter.del_current()? }; | ||||
|         // We also avoid iterating over the whole `word_prefix_position_docids` database if we know in | ||||
|         // advance that the `if del_prefix_fst_words.contains(prefix.as_bytes()) {` condition below | ||||
|         // will always be false (i.e. if `del_prefix_fst_words` is empty). | ||||
|         if !del_prefix_fst_words.is_empty() { | ||||
|             let mut iter = | ||||
|                 self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data(); | ||||
|             while let Some(((prefix, _), _)) = iter.next().transpose()? { | ||||
|                 if del_prefix_fst_words.contains(prefix.as_bytes()) { | ||||
|                     unsafe { iter.del_current()? }; | ||||
|                 } | ||||
|             } | ||||
|             drop(iter); | ||||
|         } | ||||
|  | ||||
|         drop(iter); | ||||
|  | ||||
|         // We finally write all the word prefix position docids into the LMDB database. | ||||
|         sorter_into_lmdb_database( | ||||
|             self.wtxn, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user