mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	squash-me
This commit is contained in:
		| @@ -1,4 +1,5 @@ | ||||
| use std::cmp; | ||||
| use pathfinding::directed::dijkstra::dijkstra; | ||||
|  | ||||
| const ONE_ATTRIBUTE: u32 = 1000; | ||||
| const MAX_INDEX: u32 = ONE_ATTRIBUTE - 1; | ||||
| @@ -29,107 +30,40 @@ fn construct_position(attr: u32, index: u32) -> u32 { | ||||
|     attr * ONE_ATTRIBUTE + index | ||||
| } | ||||
|  | ||||
| // TODO we should use an sdset::Set for `next_positions`. | ||||
| // TODO We must not recursively search for the best proximity but return None if proximity is not found. | ||||
| // Returns the positions to focus that will give the best possible proximity. | ||||
| fn best_proximity_for(current_position: u32, proximity: u32, next_positions: &[u32]) -> Option<(u32, Vec<u32>)> { | ||||
|     let (current_attr, _) = extract_position(current_position); | ||||
| #[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] | ||||
| struct Path(Vec<u32>); | ||||
|  | ||||
|     match proximity { | ||||
|         // look at i+0 | ||||
|         0 => { | ||||
|             match next_positions.binary_search(¤t_position) { | ||||
|                 Ok(_) => Some((0, vec![current_position])), | ||||
|                 Err(_) => best_proximity_for(current_position, proximity + 1, next_positions), | ||||
|             } | ||||
|         }, | ||||
|         // look at i+1 | ||||
|         1 => { | ||||
|             let position = current_position + 1; | ||||
|             let (attr, _) = extract_position(position); | ||||
| impl Path { | ||||
|     fn new(positions: &[Vec<u32>]) -> Option<Path> { | ||||
|         let position = positions.first()?.first()?; | ||||
|         Some(Path(vec![*position])) | ||||
|     } | ||||
|  | ||||
|             // We must check that we do not overflowed the current attribute. If so, | ||||
|             // we must check for a bigger proximity that we will be able to find behind. | ||||
|             if current_attr == attr { | ||||
|                 match next_positions.binary_search(&position) { | ||||
|                     Ok(_) => Some((1, vec![position])), | ||||
|                     Err(_) => best_proximity_for(current_position, proximity + 1, next_positions), | ||||
|                 } | ||||
|             } else { | ||||
|                 best_proximity_for(current_position, proximity + 1, next_positions) | ||||
|             } | ||||
|         }, | ||||
|         // look at i-(p-1), i+p | ||||
|         2..=7 => { | ||||
|             let mut output = Vec::new(); | ||||
|     fn successors(&self, _positions: &[Vec<u32>]) -> Vec<(Path, u32)> { | ||||
|         vec![] | ||||
|     } | ||||
|  | ||||
|             // Behind the current_position | ||||
|             if let Some(position) = current_position.checked_sub(proximity - 1) { | ||||
|                 let (attr, _) = extract_position(position); | ||||
|                 // We must make sure we are not looking at a word at the end of another attribute. | ||||
|                 if current_attr == attr && next_positions.binary_search(&position).is_ok() { | ||||
|                     output.push(position); | ||||
|                 } | ||||
|             } | ||||
|     fn proximity(&self) -> u32 { | ||||
|         self.0.windows(2).map(|ps| positions_proximity(ps[0], ps[1])).sum::<u32>() | ||||
|     } | ||||
|  | ||||
|             // In front of the current_position | ||||
|             let position = current_position + proximity; | ||||
|             let (attr, _) = extract_position(position); | ||||
|             // We must make sure we are not looking at a word at the end of another attribute. | ||||
|             if current_attr == attr && next_positions.binary_search(&position).is_ok() { | ||||
|                 output.push(position); | ||||
|             } | ||||
|  | ||||
|             if output.is_empty() { | ||||
|                 best_proximity_for(current_position, proximity + 1, next_positions) | ||||
|             } else { | ||||
|                 Some((proximity, output)) | ||||
|             } | ||||
|         }, | ||||
|         // look at i+8 and all above and i-(8-1) and all below | ||||
|         8 => { | ||||
|             let mut output = Vec::new(); | ||||
|  | ||||
|             // Make sure we look at the latest index of the previous attr. | ||||
|             if let Some(previous_position) = construct_position(current_attr, 0).checked_sub(1) { | ||||
|                 let position = current_position.saturating_sub(7).max(previous_position); | ||||
|                 match dbg!(next_positions.binary_search(&position)) { | ||||
|                     Ok(i) => output.extend_from_slice(&next_positions[..=i]), | ||||
|                     Err(i) => if let Some(i) = i.checked_sub(1) { | ||||
|                         if let Some(positions) = next_positions.get(..=i) { | ||||
|                             output.extend_from_slice(positions) | ||||
|                         } | ||||
|                     }, | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             // Make sure the position doesn't overflow to the next attribute. | ||||
|             let position = (current_position + 8).min(construct_position(current_attr + 1, 0)); | ||||
|             match next_positions.binary_search(&position) { | ||||
|                 Ok(i) => output.extend_from_slice(&next_positions[i..]), | ||||
|                 Err(i) => if let Some(positions) = next_positions.get(i..) { | ||||
|                     output.extend_from_slice(positions); | ||||
|                 }, | ||||
|             } | ||||
|  | ||||
|             if output.is_empty() { | ||||
|                 None | ||||
|             } else { | ||||
|                 Some((8, output)) | ||||
|             } | ||||
|         } | ||||
|         _ => None, | ||||
|     fn is_complete(&self, positions: &[Vec<u32>]) -> bool { | ||||
|         positions.len() == self.0.len() | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct BestProximity { | ||||
|     positions: Vec<Vec<u32>>, | ||||
|     best_proximities: Option<Vec<u32>>, | ||||
|     best_proximity: u32, | ||||
| } | ||||
|  | ||||
| impl BestProximity { | ||||
|     pub fn new(positions: Vec<Vec<u32>>) -> BestProximity { | ||||
|         BestProximity { positions, best_proximities: None } | ||||
|         BestProximity { positions, best_proximity: 0 } | ||||
|     } | ||||
|  | ||||
|     fn is_path_successful(&self, path: &Path) -> bool { | ||||
|         path.is_complete(&self.positions) && path.proximity() >= self.best_proximity | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -137,59 +71,44 @@ impl Iterator for BestProximity { | ||||
|     type Item = (u32, Vec<Vec<u32>>); | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         match &mut self.best_proximities { | ||||
|             Some(best_proximities) => { | ||||
|                 let expected_proximity = best_proximities.iter().sum::<u32>() + 1; | ||||
|                 dbg!(expected_proximity); | ||||
|         let mut output: Option<(u32, Vec<Vec<u32>>)> = None; | ||||
|  | ||||
|                 for (i, (win, proximity)) in self.positions.windows(2).zip(best_proximities.iter()).enumerate() { | ||||
|                     let (posa, posb) = (&win[0], &win[1]); | ||||
|                     dbg!(proximity, posa, posb); | ||||
|                     let expected_proximity = proximity + 1; | ||||
|                     let best_proximity = posa.iter().filter_map(|pa| { | ||||
|                         best_proximity_for(*pa, expected_proximity, posb).map(|res| (*pa, res)) | ||||
|                     }).min(); | ||||
|                     dbg!(best_proximity); | ||||
|                 } | ||||
|         unimplemented!("we must use and update self.best_proximity"); | ||||
|  | ||||
|                 None | ||||
|             }, | ||||
|             None => { | ||||
|                 let expected_proximity = 0; | ||||
|                 let mut best_results = Vec::new(); | ||||
|         loop { | ||||
|             let start = Path::new(&self.positions)?; | ||||
|             let result = dijkstra( | ||||
|                 &start, | ||||
|                 |p| p.successors(&self.positions), | ||||
|                 |p| self.is_path_successful(p) && output.as_ref().map_or(true, |paths| !paths.1.contains(&p.0)), | ||||
|             ); | ||||
|  | ||||
|                 for win in self.positions.windows(2) { | ||||
|                     let (posa, posb) = (&win[0], &win[1]); | ||||
|                     match best_results.last() { | ||||
|                         Some((start, _)) => { | ||||
|                             // We know from where we must continue searching for the best path. | ||||
|                             let (best_proximity, positions) = dbg!(best_proximity_for(*start, expected_proximity, posb).unwrap()); | ||||
|                             best_results.push((positions[0], best_proximity)); | ||||
|             match result { | ||||
|                 Some((mut paths, proximity)) => { | ||||
|                     let positions = paths.pop().unwrap(); | ||||
|  | ||||
|                     // If the current output is | ||||
|                     match &mut output { | ||||
|                         Some((best_proximity, paths)) => { | ||||
|                             // If the shortest path we found is bigger than the one requested | ||||
|                             // it means that we found all the paths with the same proximity and can | ||||
|                             // return those to the user. | ||||
|                             if proximity > *best_proximity { | ||||
|                                 break; | ||||
|                             } | ||||
|  | ||||
|                             // We add the new path to the output list as this path is known | ||||
|                             // to be the requested distance. | ||||
|                             paths.push(positions.0); | ||||
|                         }, | ||||
|                         None => { | ||||
|                             // This is the first loop, we need to find the best start of the path. | ||||
|                             let best_proximity = posa.iter().filter_map(|pa| { | ||||
|                                 best_proximity_for(*pa, expected_proximity, posb).map(|res| (*pa, res)) | ||||
|                             }).min(); | ||||
|                             let (pa, (best_proximity, positions)) = best_proximity.unwrap(); | ||||
|                             // We must save the best start of path we found. | ||||
|                             best_results.push((pa, 0)); | ||||
|                             // And the next associated position along with the proximity between those. | ||||
|                             best_results.push((positions[0], best_proximity)); | ||||
|                         } | ||||
|                         None => output = Some((positions.proximity(), vec![positions.0])), | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 if best_results.is_empty() { | ||||
|                     None | ||||
|                 } else { | ||||
|                     let proximity = best_results.windows(2).map(|ps| positions_proximity(ps[0].0, ps[1].0)).sum::<u32>(); | ||||
|                     self.best_proximities = Some(best_results.iter().skip(1).map(|(_, p)| *p).collect()); | ||||
|                     let best_positions = best_results.into_iter().map(|(x, _)| vec![x]).collect(); | ||||
|                     Some((proximity, best_positions)) | ||||
|                 } | ||||
|                 }, | ||||
|                 None => break, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         output | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -217,26 +136,4 @@ mod tests { | ||||
|         // assert_eq!(iter.next(), Some((4+5, vec![4, 1, 6]))); // 9 | ||||
|         // assert_eq!(iter.next(), None); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn easy_best_proximity_for() { | ||||
|         // classic | ||||
|         assert_eq!(best_proximity_for(0, 0, &[0]),    Some((0, vec![0]))); | ||||
|         assert_eq!(best_proximity_for(0, 1, &[0]),    None); | ||||
|         assert_eq!(best_proximity_for(1, 1, &[0]),    Some((2, vec![0]))); | ||||
|         assert_eq!(best_proximity_for(0, 1, &[0, 1]), Some((1, vec![1]))); | ||||
|         assert_eq!(best_proximity_for(1, 1, &[0, 2]), Some((1, vec![2]))); | ||||
|         assert_eq!(best_proximity_for(1, 2, &[0, 2]), Some((2, vec![0]))); | ||||
|         assert_eq!(best_proximity_for(1, 2, &[0, 3]), Some((2, vec![0, 3]))); | ||||
|  | ||||
|         // limits | ||||
|         assert_eq!(best_proximity_for(2, 7, &[0, 9]),   Some((7, vec![9]))); | ||||
|         assert_eq!(best_proximity_for(12, 7, &[6, 19]), Some((7, vec![6, 19]))); | ||||
|  | ||||
|         // another attribute | ||||
|         assert_eq!(best_proximity_for(1000, 7, &[994, 1007]), Some((7, vec![1007]))); | ||||
|         assert_eq!(best_proximity_for(1004, 7, &[994, 1011]), Some((7, vec![1011]))); | ||||
|         assert_eq!(best_proximity_for(1004, 8, &[900, 913, 1000, 1012, 2012]), Some((8, vec![900, 913, 1012, 2012]))); | ||||
|         assert_eq!(best_proximity_for(1009, 8, &[900, 913, 1002, 1012, 2012]), Some((8, vec![900, 913, 1002, 2012]))); | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user