mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Fix errors, clippy warnings, and add review comments
This commit is contained in:
		| @@ -105,7 +105,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> { | ||||
|  | ||||
|     /// Refill the internal buffer of cached docids based on the strategy. | ||||
|     /// Drop the rtree if we don't need it anymore. | ||||
|     fn fill_buffer<'ctx>(&mut self, ctx: &mut SearchContext<'ctx>) -> Result<()> { | ||||
|     fn fill_buffer(&mut self, ctx: &mut SearchContext) -> Result<()> { | ||||
|         debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng"); | ||||
|         debug_assert!(self.cached_sorted_docids.is_empty()); | ||||
|  | ||||
| @@ -133,7 +133,13 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> { | ||||
|                 // and only keep the latest candidates. | ||||
|                 for point in rtree.nearest_neighbor_iter(&point) { | ||||
|                     if self.geo_candidates.contains(point.data.0) { | ||||
|                         self.cached_sorted_docids.pop_front(); | ||||
|                         // REVIEW COMMENT: that doesn't look right, because we only keep the furthest point in the cache. | ||||
|                         // Then the cache will be exhausted after the first bucket and we'll need to repopulate it again immediately. | ||||
|                         // I think it's okay if we keep every document id in the cache instead. It's a high memory usage, | ||||
|                         // but we already have the whole rtree in memory, which is bigger than a vector of all document ids. | ||||
|                         // | ||||
|                         //      self.cached_sorted_docids.pop_front(); | ||||
|                         // | ||||
|                         self.cached_sorted_docids.push_back(point.data.0); | ||||
|                     } | ||||
|                 } | ||||
| @@ -163,7 +169,10 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> { | ||||
|                     )) | ||||
|                 }) | ||||
|                 .collect::<Result<Vec<(u32, [f64; 2])>>>()?; | ||||
|             documents.sort_by_key(|(_, p)| distance_between_two_points(&self.point, &p) as usize); | ||||
|             // REVIEW COMMENT: the haversine distance function can be quite expensive, I think, so it's probably faster | ||||
|             // to use `sort_by_cached_key` instead of `sort_by_key`. | ||||
|             documents | ||||
|                 .sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize); | ||||
|             self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id)); | ||||
|         }; | ||||
|  | ||||
| @@ -193,7 +202,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> { | ||||
|         self.query = Some(query.clone()); | ||||
|         self.geo_candidates &= universe; | ||||
|  | ||||
|         if self.geo_candidates.len() == 0 { | ||||
|         if self.geo_candidates.is_empty() { | ||||
|             return Ok(()); | ||||
|         } | ||||
|  | ||||
| @@ -203,6 +212,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> { | ||||
|         self.field_ids = Some([lat, lng]); | ||||
|  | ||||
|         if self.strategy.use_rtree(self.geo_candidates.len() as usize) { | ||||
|             // REVIEW COMMENT: I would prefer to always keep the rtree in memory so that we don't have to deserialize it | ||||
|             // every time the geosort ranking rule starts iterating. | ||||
|             // So we'd initialize it in `::new` and never drop it. | ||||
|             // | ||||
|             self.rtree = Some(ctx.index.geo_rtree(ctx.txn)?.expect("geo candidates but no rtree")); | ||||
|         } | ||||
|  | ||||
| @@ -210,6 +223,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> { | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     #[allow(clippy::only_used_in_recursion)] | ||||
|     fn next_bucket( | ||||
|         &mut self, | ||||
|         ctx: &mut SearchContext<'ctx>, | ||||
|   | ||||
| @@ -401,8 +401,12 @@ pub fn execute_search( | ||||
|         let graph = QueryGraph::from_query(ctx, &query_terms)?; | ||||
|         located_query_terms = Some(query_terms); | ||||
|  | ||||
|         let ranking_rules = | ||||
|             get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?; | ||||
|         let ranking_rules = get_ranking_rules_for_query_graph_search( | ||||
|             ctx, | ||||
|             sort_criteria, | ||||
|             geo_strategy, | ||||
|             terms_matching_strategy, | ||||
|         )?; | ||||
|  | ||||
|         universe = | ||||
|             resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?; | ||||
|   | ||||
| @@ -1,14 +1,9 @@ | ||||
| /*! | ||||
| This module tests the `geo_sort` ranking rule: | ||||
|  | ||||
| 1. an error is returned if the sort ranking rule exists but no fields-to-sort were given at search time | ||||
| 2. an error is returned if the fields-to-sort are not sortable | ||||
| 3. it is possible to add multiple fields-to-sort at search time | ||||
| 4. custom sort ranking rules can be added to the settings, they interact with the generic `sort` ranking rule as expected | ||||
| 5. numbers appear before strings | ||||
| 6. documents with either: (1) no value, (2) null, or (3) an object for the field-to-sort appear at the end of the bucket | ||||
| 7. boolean values are translated to strings | ||||
| 8. if a field contains an array, it is sorted by the best value in the array according to the sort rule | ||||
| REVIEW COMMENT: | ||||
|     - nice tests :) | ||||
|     - add anything that seems not obvious about the behaviour of the geosort ranking rule here | ||||
| */ | ||||
|  | ||||
| use big_s::S; | ||||
| @@ -40,21 +35,21 @@ fn execute_iterative_and_rtree_returns_the_same<'a>( | ||||
| ) -> Vec<usize> { | ||||
|     search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(2)); | ||||
|     let SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||
|     let iterative_ids_bucketed = collect_field_values(&index, rtxn, "id", &documents_ids); | ||||
|     let iterative_ids_bucketed = collect_field_values(index, rtxn, "id", &documents_ids); | ||||
|  | ||||
|     search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000)); | ||||
|     let SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||
|     let iterative_ids = collect_field_values(&index, rtxn, "id", &documents_ids); | ||||
|     let iterative_ids = collect_field_values(index, rtxn, "id", &documents_ids); | ||||
|  | ||||
|     assert_eq!(iterative_ids_bucketed, iterative_ids, "iterative bucket"); | ||||
|  | ||||
|     search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(2)); | ||||
|     let SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||
|     let rtree_ids_bucketed = collect_field_values(&index, rtxn, "id", &documents_ids); | ||||
|     let rtree_ids_bucketed = collect_field_values(index, rtxn, "id", &documents_ids); | ||||
|  | ||||
|     search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000)); | ||||
|     let SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||
|     let rtree_ids = collect_field_values(&index, rtxn, "id", &documents_ids); | ||||
|     let rtree_ids = collect_field_values(index, rtxn, "id", &documents_ids); | ||||
|  | ||||
|     assert_eq!(rtree_ids_bucketed, rtree_ids, "rtree bucket"); | ||||
|  | ||||
| @@ -67,15 +62,24 @@ fn execute_iterative_and_rtree_returns_the_same<'a>( | ||||
| fn test_geo_sort() { | ||||
|     let index = create_index(); | ||||
|  | ||||
|     // REVIEW COMMENT: | ||||
|     // I prefer to make the external ids correspond to the internal ids so that | ||||
|     // we can check whether the ranking rules are actually doing work instead of | ||||
|     // returning documents in order of their internal ids. | ||||
|     // | ||||
|     index | ||||
|         .add_documents(documents!([ | ||||
|             { "id": 2, "_geo": { "lat": 2, "lng": -1 } }, | ||||
|             { "id": 3, "_geo": { "lat": -2, "lng": -2 } }, | ||||
|             { "id": 5, "_geo": { "lat": 6, "lng": -5 } }, | ||||
|             { "id": 4, "_geo": { "lat": 3, "lng": 5 } }, | ||||
|             { "id": 0, "_geo": { "lat": 0, "lng": 0 } }, | ||||
|             { "id": 1, "_geo": { "lat": 1, "lng": 1 } }, | ||||
|             { "id": 6 }, { "id": 8 }, { "id": 7 }, { "id": 10 }, { "id": 9 }, | ||||
|             { "id": 0, "_geo": { "lat": 2, "lng": -1 } }, | ||||
|             { "id": 1, "_geo": { "lat": -2, "lng": -2 } }, | ||||
|             { "id": 2, "_geo": { "lat": 6, "lng": -5 } }, | ||||
|             { "id": 3, "_geo": { "lat": 3, "lng": 5 } }, | ||||
|             { "id": 4, "_geo": { "lat": 0, "lng": 0 } }, | ||||
|             { "id": 5, "_geo": { "lat": 1, "lng": 1 } }, | ||||
|             { "id": 6 }, | ||||
|             { "id": 7 }, | ||||
|             { "id": 8 }, | ||||
|             { "id": 9 }, | ||||
|             { "id": 10 }, | ||||
|         ])) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -88,66 +92,54 @@ fn test_geo_sort() { | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::Dynamic(100)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::Dynamic(3)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     // --- desc | ||||
|     s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::Dynamic(100)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::Dynamic(3)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||
|  | ||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3)); | ||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); | ||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); | ||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user