mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Fix errors, clippy warnings, and add review comments
This commit is contained in:
		| @@ -105,7 +105,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> { | |||||||
|  |  | ||||||
|     /// Refill the internal buffer of cached docids based on the strategy. |     /// Refill the internal buffer of cached docids based on the strategy. | ||||||
|     /// Drop the rtree if we don't need it anymore. |     /// Drop the rtree if we don't need it anymore. | ||||||
|     fn fill_buffer<'ctx>(&mut self, ctx: &mut SearchContext<'ctx>) -> Result<()> { |     fn fill_buffer(&mut self, ctx: &mut SearchContext) -> Result<()> { | ||||||
|         debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng"); |         debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng"); | ||||||
|         debug_assert!(self.cached_sorted_docids.is_empty()); |         debug_assert!(self.cached_sorted_docids.is_empty()); | ||||||
|  |  | ||||||
| @@ -133,7 +133,13 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> { | |||||||
|                 // and only keep the latest candidates. |                 // and only keep the latest candidates. | ||||||
|                 for point in rtree.nearest_neighbor_iter(&point) { |                 for point in rtree.nearest_neighbor_iter(&point) { | ||||||
|                     if self.geo_candidates.contains(point.data.0) { |                     if self.geo_candidates.contains(point.data.0) { | ||||||
|                         self.cached_sorted_docids.pop_front(); |                         // REVIEW COMMENT: that doesn't look right, because we only keep the furthest point in the cache. | ||||||
|  |                         // Then the cache will be exhausted after the first bucket and we'll need to repopulate it again immediately. | ||||||
|  |                         // I think it's okay if we keep every document id in the cache instead. It's a high memory usage, | ||||||
|  |                         // but we already have the whole rtree in memory, which is bigger than a vector of all document ids. | ||||||
|  |                         // | ||||||
|  |                         //      self.cached_sorted_docids.pop_front(); | ||||||
|  |                         // | ||||||
|                         self.cached_sorted_docids.push_back(point.data.0); |                         self.cached_sorted_docids.push_back(point.data.0); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
| @@ -163,7 +169,10 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> { | |||||||
|                     )) |                     )) | ||||||
|                 }) |                 }) | ||||||
|                 .collect::<Result<Vec<(u32, [f64; 2])>>>()?; |                 .collect::<Result<Vec<(u32, [f64; 2])>>>()?; | ||||||
|             documents.sort_by_key(|(_, p)| distance_between_two_points(&self.point, &p) as usize); |             // REVIEW COMMENT: the haversine distance function can be quite expensive, I think, so it's probably faster | ||||||
|  |             // to use `sort_by_cached_key` instead of `sort_by_key`. | ||||||
|  |             documents | ||||||
|  |                 .sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize); | ||||||
|             self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id)); |             self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id)); | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
| @@ -193,7 +202,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> { | |||||||
|         self.query = Some(query.clone()); |         self.query = Some(query.clone()); | ||||||
|         self.geo_candidates &= universe; |         self.geo_candidates &= universe; | ||||||
|  |  | ||||||
|         if self.geo_candidates.len() == 0 { |         if self.geo_candidates.is_empty() { | ||||||
|             return Ok(()); |             return Ok(()); | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -203,6 +212,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> { | |||||||
|         self.field_ids = Some([lat, lng]); |         self.field_ids = Some([lat, lng]); | ||||||
|  |  | ||||||
|         if self.strategy.use_rtree(self.geo_candidates.len() as usize) { |         if self.strategy.use_rtree(self.geo_candidates.len() as usize) { | ||||||
|  |             // REVIEW COMMENT: I would prefer to always keep the rtree in memory so that we don't have to deserialize it | ||||||
|  |             // every time the geosort ranking rule starts iterating. | ||||||
|  |             // So we'd initialize it in `::new` and never drop it. | ||||||
|  |             // | ||||||
|             self.rtree = Some(ctx.index.geo_rtree(ctx.txn)?.expect("geo candidates but no rtree")); |             self.rtree = Some(ctx.index.geo_rtree(ctx.txn)?.expect("geo candidates but no rtree")); | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -210,6 +223,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> { | |||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[allow(clippy::only_used_in_recursion)] | ||||||
|     fn next_bucket( |     fn next_bucket( | ||||||
|         &mut self, |         &mut self, | ||||||
|         ctx: &mut SearchContext<'ctx>, |         ctx: &mut SearchContext<'ctx>, | ||||||
|   | |||||||
| @@ -401,8 +401,12 @@ pub fn execute_search( | |||||||
|         let graph = QueryGraph::from_query(ctx, &query_terms)?; |         let graph = QueryGraph::from_query(ctx, &query_terms)?; | ||||||
|         located_query_terms = Some(query_terms); |         located_query_terms = Some(query_terms); | ||||||
|  |  | ||||||
|         let ranking_rules = |         let ranking_rules = get_ranking_rules_for_query_graph_search( | ||||||
|             get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?; |             ctx, | ||||||
|  |             sort_criteria, | ||||||
|  |             geo_strategy, | ||||||
|  |             terms_matching_strategy, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|         universe = |         universe = | ||||||
|             resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?; |             resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?; | ||||||
|   | |||||||
| @@ -1,14 +1,9 @@ | |||||||
| /*! | /*! | ||||||
| This module tests the `geo_sort` ranking rule: | This module tests the `geo_sort` ranking rule: | ||||||
|  |  | ||||||
| 1. an error is returned if the sort ranking rule exists but no fields-to-sort were given at search time | REVIEW COMMENT: | ||||||
| 2. an error is returned if the fields-to-sort are not sortable |     - nice tests :) | ||||||
| 3. it is possible to add multiple fields-to-sort at search time |     - add anything that seems not obvious about the behaviour of the geosort ranking rule here | ||||||
| 4. custom sort ranking rules can be added to the settings, they interact with the generic `sort` ranking rule as expected |  | ||||||
| 5. numbers appear before strings |  | ||||||
| 6. documents with either: (1) no value, (2) null, or (3) an object for the field-to-sort appear at the end of the bucket |  | ||||||
| 7. boolean values are translated to strings |  | ||||||
| 8. if a field contains an array, it is sorted by the best value in the array according to the sort rule |  | ||||||
| */ | */ | ||||||
|  |  | ||||||
| use big_s::S; | use big_s::S; | ||||||
| @@ -40,21 +35,21 @@ fn execute_iterative_and_rtree_returns_the_same<'a>( | |||||||
| ) -> Vec<usize> { | ) -> Vec<usize> { | ||||||
|     search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(2)); |     search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(2)); | ||||||
|     let SearchResult { documents_ids, .. } = search.execute().unwrap(); |     let SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||||
|     let iterative_ids_bucketed = collect_field_values(&index, rtxn, "id", &documents_ids); |     let iterative_ids_bucketed = collect_field_values(index, rtxn, "id", &documents_ids); | ||||||
|  |  | ||||||
|     search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000)); |     search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000)); | ||||||
|     let SearchResult { documents_ids, .. } = search.execute().unwrap(); |     let SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||||
|     let iterative_ids = collect_field_values(&index, rtxn, "id", &documents_ids); |     let iterative_ids = collect_field_values(index, rtxn, "id", &documents_ids); | ||||||
|  |  | ||||||
|     assert_eq!(iterative_ids_bucketed, iterative_ids, "iterative bucket"); |     assert_eq!(iterative_ids_bucketed, iterative_ids, "iterative bucket"); | ||||||
|  |  | ||||||
|     search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(2)); |     search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(2)); | ||||||
|     let SearchResult { documents_ids, .. } = search.execute().unwrap(); |     let SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||||
|     let rtree_ids_bucketed = collect_field_values(&index, rtxn, "id", &documents_ids); |     let rtree_ids_bucketed = collect_field_values(index, rtxn, "id", &documents_ids); | ||||||
|  |  | ||||||
|     search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000)); |     search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000)); | ||||||
|     let SearchResult { documents_ids, .. } = search.execute().unwrap(); |     let SearchResult { documents_ids, .. } = search.execute().unwrap(); | ||||||
|     let rtree_ids = collect_field_values(&index, rtxn, "id", &documents_ids); |     let rtree_ids = collect_field_values(index, rtxn, "id", &documents_ids); | ||||||
|  |  | ||||||
|     assert_eq!(rtree_ids_bucketed, rtree_ids, "rtree bucket"); |     assert_eq!(rtree_ids_bucketed, rtree_ids, "rtree bucket"); | ||||||
|  |  | ||||||
| @@ -67,15 +62,24 @@ fn execute_iterative_and_rtree_returns_the_same<'a>( | |||||||
| fn test_geo_sort() { | fn test_geo_sort() { | ||||||
|     let index = create_index(); |     let index = create_index(); | ||||||
|  |  | ||||||
|  |     // REVIEW COMMENT: | ||||||
|  |     // I prefer to make the external ids correspond to the internal ids so that | ||||||
|  |     // we can check whether the ranking rules are actually doing work instead of | ||||||
|  |     // returning documents in order of their internal ids. | ||||||
|  |     // | ||||||
|     index |     index | ||||||
|         .add_documents(documents!([ |         .add_documents(documents!([ | ||||||
|             { "id": 2, "_geo": { "lat": 2, "lng": -1 } }, |             { "id": 0, "_geo": { "lat": 2, "lng": -1 } }, | ||||||
|             { "id": 3, "_geo": { "lat": -2, "lng": -2 } }, |             { "id": 1, "_geo": { "lat": -2, "lng": -2 } }, | ||||||
|             { "id": 5, "_geo": { "lat": 6, "lng": -5 } }, |             { "id": 2, "_geo": { "lat": 6, "lng": -5 } }, | ||||||
|             { "id": 4, "_geo": { "lat": 3, "lng": 5 } }, |             { "id": 3, "_geo": { "lat": 3, "lng": 5 } }, | ||||||
|             { "id": 0, "_geo": { "lat": 0, "lng": 0 } }, |             { "id": 4, "_geo": { "lat": 0, "lng": 0 } }, | ||||||
|             { "id": 1, "_geo": { "lat": 1, "lng": 1 } }, |             { "id": 5, "_geo": { "lat": 1, "lng": 1 } }, | ||||||
|             { "id": 6 }, { "id": 8 }, { "id": 7 }, { "id": 10 }, { "id": 9 }, |             { "id": 6 }, | ||||||
|  |             { "id": 7 }, | ||||||
|  |             { "id": 8 }, | ||||||
|  |             { "id": 9 }, | ||||||
|  |             { "id": 10 }, | ||||||
|         ])) |         ])) | ||||||
|         .unwrap(); |         .unwrap(); | ||||||
|  |  | ||||||
| @@ -88,66 +92,54 @@ fn test_geo_sort() { | |||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::Dynamic(100)); |     s.geo_sort_strategy(GeoSortStrategy::Dynamic(100)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::Dynamic(3)); |     s.geo_sort_strategy(GeoSortStrategy::Dynamic(3)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100)); |     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3)); |     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100)); |     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3)); |     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     // --- desc |     // --- desc | ||||||
|     s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]); |     s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]); | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::Dynamic(100)); |     s.geo_sort_strategy(GeoSortStrategy::Dynamic(100)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::Dynamic(3)); |     s.geo_sort_strategy(GeoSortStrategy::Dynamic(3)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100)); |     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3)); |     s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100)); |     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); |  | ||||||
|  |  | ||||||
|     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3)); |     s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3)); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     let ids = collect_field_values(&index, &txn, "id", &documents_ids); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]"); | ||||||
|     insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #[test] | #[test] | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user