mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Optimize facet sort
5 to 10x speedup
This commit is contained in:
		| @@ -1558,19 +1558,32 @@ fn retrieve_documents<S: AsRef<str>>( | ||||
|         })? | ||||
|     } | ||||
|  | ||||
|     let mut facet_sort = None; | ||||
|     if let Some(sort) = sort_criteria { | ||||
|         candidates = recursive_facet_sort(index, &rtxn, &sort, candidates)?; | ||||
|         facet_sort = Some(recursive_facet_sort(index, &rtxn, &sort, &candidates)?) | ||||
|     } | ||||
|  | ||||
|     let (it, number_of_documents) = { | ||||
|     let (it, number_of_documents) = if let Some(facet_sort) = &facet_sort { | ||||
|         let number_of_documents = candidates.len(); | ||||
|         let iter = facet_sort.iter()?; | ||||
|         ( | ||||
|             itertools::Either::Left(some_documents( | ||||
|                 index, | ||||
|                 &rtxn, | ||||
|                 iter.map(|d| d.unwrap()).skip(offset).take(limit), | ||||
|                 retrieve_vectors, | ||||
|             )?), | ||||
|             number_of_documents, | ||||
|         ) | ||||
|     } else { | ||||
|         let number_of_documents = candidates.len(); | ||||
|         ( | ||||
|             some_documents( | ||||
|             itertools::Either::Right(some_documents( | ||||
|                 index, | ||||
|                 &rtxn, | ||||
|                 candidates.into_iter().skip(offset).take(limit), | ||||
|                 retrieve_vectors, | ||||
|             )?, | ||||
|             )?), | ||||
|             number_of_documents, | ||||
|         ) | ||||
|     }; | ||||
|   | ||||
| @@ -1,78 +1,256 @@ | ||||
| use roaring::RoaringBitmap; | ||||
| use heed::Database; | ||||
| use crate::{heed_codec::{facet::{FacetGroupKeyCodec, FacetGroupValueCodec}, BytesRefCodec}, search::{facet::{ascending_facet_sort, descending_facet_sort}, new::check_sort_criteria}, AscDesc, Member}; | ||||
| use crate::{heed_codec::{facet::{FacetGroupKeyCodec, FacetGroupValueCodec}, BytesRefCodec}, search::{facet::{ascending_facet_sort, descending_facet_sort}, new::check_sort_criteria}, AscDesc, DocumentId, Member}; | ||||
|  | ||||
| fn recursive_facet_sort_inner<'t>( | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
| /// Builder for a [`SortedDocumentsIterator`]. | ||||
| /// Most builders won't ever be built, because pagination will skip them. | ||||
| pub struct SortedDocumentsIteratorBuilder<'ctx> { | ||||
|     rtxn: &'ctx heed::RoTxn<'ctx>, | ||||
|     number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, | ||||
|     string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, | ||||
|     fields: &[(u16, bool)], | ||||
|     fields: &'ctx [(u16, bool)], | ||||
|     candidates: RoaringBitmap, | ||||
| ) -> heed::Result<RoaringBitmap> { | ||||
|     let (field_id, ascending) = match fields.first() { | ||||
|         Some(first) => *first, | ||||
|         None => return Ok(candidates), | ||||
|     }; | ||||
| } | ||||
|  | ||||
|     let (number_iter, string_iter) = if ascending { | ||||
|         let number_iter = ascending_facet_sort( | ||||
| impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> { | ||||
|     /// Performs the sort and builds a [`SortedDocumentsIterator`]. | ||||
|     fn build(self) -> heed::Result<SortedDocumentsIterator<'ctx>> { | ||||
|         let SortedDocumentsIteratorBuilder { | ||||
|             rtxn, | ||||
|             number_db, | ||||
|             field_id, | ||||
|             candidates.clone(), | ||||
|         )?; | ||||
|         let string_iter = ascending_facet_sort( | ||||
|             rtxn, | ||||
|             string_db, | ||||
|             field_id, | ||||
|             fields, | ||||
|             candidates, | ||||
|         )?; | ||||
|         } = self; | ||||
|         let size = candidates.len() as usize; | ||||
|  | ||||
|         (itertools::Either::Left(number_iter), itertools::Either::Left(string_iter)) | ||||
|     } else { | ||||
|         let number_iter = descending_facet_sort( | ||||
|             rtxn, | ||||
|             number_db, | ||||
|             field_id, | ||||
|             candidates.clone(), | ||||
|         )?; | ||||
|         let string_iter = descending_facet_sort( | ||||
|             rtxn, | ||||
|             string_db, | ||||
|             field_id, | ||||
|             candidates, | ||||
|         )?; | ||||
|         // There is no point sorting a 1-element array | ||||
|         if size <= 1 { | ||||
|             return Ok(SortedDocumentsIterator::Leaf { | ||||
|                 size, | ||||
|                 values: Box::new(candidates.into_iter()), | ||||
|             }); | ||||
|         } | ||||
|  | ||||
|         (itertools::Either::Right(number_iter), itertools::Either::Right(string_iter)) | ||||
|     }; | ||||
|         // There is no variable to sort on | ||||
|         let Some((field_id, ascending)) = fields.first().copied() else { | ||||
|             return Ok(SortedDocumentsIterator::Leaf { | ||||
|                 size, | ||||
|                 values: Box::new(candidates.into_iter()), | ||||
|             }); | ||||
|         }; | ||||
|  | ||||
|     let chained_iter = number_iter.chain(string_iter); | ||||
|     let mut result = RoaringBitmap::new(); | ||||
|     for part in chained_iter { | ||||
|         let (inner_candidates, _) = part?; | ||||
|         if inner_candidates.len() <= 1 || fields.len() <= 1 { | ||||
|             result |= inner_candidates; | ||||
|         // Perform the sort on the first field | ||||
|         let (number_iter, string_iter) = if ascending { | ||||
|             let number_iter = ascending_facet_sort( | ||||
|                 rtxn, | ||||
|                 number_db, | ||||
|                 field_id, | ||||
|                 candidates.clone(), | ||||
|             )?; | ||||
|             let string_iter = ascending_facet_sort( | ||||
|                 rtxn, | ||||
|                 string_db, | ||||
|                 field_id, | ||||
|                 candidates, | ||||
|             )?; | ||||
|  | ||||
|             (itertools::Either::Left(number_iter), itertools::Either::Left(string_iter)) | ||||
|         } else { | ||||
|             let inner_candidates = recursive_facet_sort_inner( | ||||
|             let number_iter = descending_facet_sort( | ||||
|                 rtxn, | ||||
|                 number_db, | ||||
|                 field_id, | ||||
|                 candidates.clone(), | ||||
|             )?; | ||||
|             let string_iter = descending_facet_sort( | ||||
|                 rtxn, | ||||
|                 string_db, | ||||
|                 field_id, | ||||
|                 candidates, | ||||
|             )?; | ||||
|  | ||||
|             (itertools::Either::Right(number_iter), itertools::Either::Right(string_iter)) | ||||
|         }; | ||||
|  | ||||
|         // Create builders for the next level of the tree | ||||
|         let number_db2 = number_db; | ||||
|         let string_db2 = string_db; | ||||
|         let number_iter = number_iter.map(move |r| -> heed::Result<SortedDocumentsIteratorBuilder> { | ||||
|             let (docids, _bytes) = r?; | ||||
|             Ok(SortedDocumentsIteratorBuilder { | ||||
|                 rtxn, | ||||
|                 number_db, | ||||
|                 string_db, | ||||
|                 &fields[1..], | ||||
|                 inner_candidates, | ||||
|             )?; | ||||
|             result |= inner_candidates; | ||||
|         } | ||||
|     } | ||||
|                 fields: &fields[1..], | ||||
|                 candidates: docids, | ||||
|             }) | ||||
|         }); | ||||
|         let string_iter = string_iter.map(move |r| -> heed::Result<SortedDocumentsIteratorBuilder> { | ||||
|             let (docids, _bytes) = r?; | ||||
|             Ok(SortedDocumentsIteratorBuilder { | ||||
|                 rtxn, | ||||
|                 number_db: number_db2, | ||||
|                 string_db: string_db2, | ||||
|                 fields: &fields[1..], | ||||
|                 candidates: docids, | ||||
|             }) | ||||
|         }); | ||||
|  | ||||
|     Ok(result) | ||||
|         Ok(SortedDocumentsIterator::Branch { | ||||
|             current_child: None, | ||||
|             next_children_size: size, | ||||
|             next_children: Box::new(number_iter.chain(string_iter)), | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn recursive_facet_sort<'t>( | ||||
|     index: &crate::Index, | ||||
|     rtxn: &'t heed::RoTxn<'t>, | ||||
| /// A [`SortedDocumentsIterator`] allows efficient access to a continuous range of sorted documents. | ||||
| /// This is ideal in the context of paginated queries in which only a small number of documents are needed at a time. | ||||
| /// Search operations will only be performed upon access. | ||||
| pub enum SortedDocumentsIterator<'ctx> { | ||||
|     Leaf { | ||||
|         /// The exact number of documents remaining | ||||
|         size: usize, | ||||
|         values: Box<dyn Iterator<Item = DocumentId> + 'ctx> | ||||
|     }, | ||||
|     Branch { | ||||
|         /// The current child, got from the children iterator | ||||
|         current_child: Option<Box<SortedDocumentsIterator<'ctx>>>, | ||||
|         /// The exact number of documents remaining, excluding documents in the current child | ||||
|         next_children_size: usize, | ||||
|         /// Iterators to become the current child once it is exhausted | ||||
|         next_children: Box<dyn Iterator<Item = heed::Result<SortedDocumentsIteratorBuilder<'ctx>>> + 'ctx>, | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl SortedDocumentsIterator<'_> { | ||||
|     /// Takes care of updating the current child if it is `None`, and also updates the size | ||||
|     fn update_current<'ctx>(current_child: &mut Option<Box<SortedDocumentsIterator<'ctx>>>, next_children_size: &mut usize, next_children: &mut Box<dyn Iterator<Item = heed::Result<SortedDocumentsIteratorBuilder<'ctx>>> + 'ctx>) -> heed::Result<()> { | ||||
|         if current_child.is_none() { | ||||
|             *current_child = match next_children.next() { | ||||
|                 Some(Ok(builder)) => { | ||||
|                     let next_child = Box::new(builder.build()?); | ||||
|                     *next_children_size -= next_child.size_hint().0; | ||||
|                     Some(next_child) | ||||
|                 }, | ||||
|                 Some(Err(e)) => return Err(e), | ||||
|                 None => return Ok(()), | ||||
|             }; | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Iterator for SortedDocumentsIterator<'_> { | ||||
|     type Item = heed::Result<DocumentId>; | ||||
|  | ||||
|     fn nth(&mut self, n: usize) -> Option<Self::Item> { | ||||
|         // If it's at the leaf level, just forward the call to the values iterator | ||||
|         let (current_child, next_children, next_children_size) = match self { | ||||
|             SortedDocumentsIterator::Leaf { values, size } => { | ||||
|                 *size = size.saturating_sub(n); | ||||
|                 return values.nth(n).map(Ok) | ||||
|             }, | ||||
|             SortedDocumentsIterator::Branch { current_child, next_children, next_children_size } => (current_child, next_children, next_children_size), | ||||
|         }; | ||||
|  | ||||
|         // Otherwise don't directly iterate over children, skip them if we know we will go further | ||||
|         let mut to_skip = n - 1; | ||||
|         while to_skip > 0 { | ||||
|             if let Err(e) = SortedDocumentsIterator::update_current(current_child, next_children_size, next_children) { | ||||
|                 return Some(Err(e)); | ||||
|             } | ||||
|             let Some(inner) = current_child else { | ||||
|                 return None; // No more inner iterators, everything has been consumed. | ||||
|             }; | ||||
|  | ||||
|             if to_skip >= inner.size_hint().0 { | ||||
|                 // The current child isn't large enough to contain the nth element. | ||||
|                 // Skip it and continue with the next one. | ||||
|                 to_skip -= inner.size_hint().0; | ||||
|                 *current_child = None; | ||||
|                 continue; | ||||
|             } else { | ||||
|                 // The current iterator is large enough, so we can forward the call to it. | ||||
|                 return inner.nth(to_skip + 1); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         self.next() | ||||
|     } | ||||
|  | ||||
|     fn size_hint(&self) -> (usize, Option<usize>) { | ||||
|         let size = match self { | ||||
|             SortedDocumentsIterator::Leaf { size, .. } => *size, | ||||
|             SortedDocumentsIterator::Branch { next_children_size, current_child: Some(current_child), .. } => current_child.size_hint().0 + next_children_size, | ||||
|             SortedDocumentsIterator::Branch { next_children_size, current_child: None, .. } => *next_children_size, | ||||
|         }; | ||||
|  | ||||
|         (size, Some(size)) | ||||
|     } | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         match self { | ||||
|             SortedDocumentsIterator::Leaf { values, size } => { | ||||
|                 let result = values.next().map(Ok); | ||||
|                 if result.is_some() { | ||||
|                     *size -= 1; | ||||
|                 } | ||||
|                 result | ||||
|             }, | ||||
|             SortedDocumentsIterator::Branch { current_child, next_children_size, next_children } => { | ||||
|                 let mut result = None; | ||||
|                 while result.is_none() { | ||||
|                     // Ensure we have selected an iterator to work with | ||||
|                     if let Err(e) = SortedDocumentsIterator::update_current(current_child, next_children_size, next_children) { | ||||
|                         return Some(Err(e)); | ||||
|                     } | ||||
|                     let Some(inner) = current_child else { | ||||
|                         return None; | ||||
|                     }; | ||||
|  | ||||
|                     result = inner.next(); | ||||
|  | ||||
|                     // If the current iterator is exhausted, we need to try the next one | ||||
|                     if result.is_none() { | ||||
|                         *current_child = None; | ||||
|                     } | ||||
|                 } | ||||
|                 result | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A structure owning the data needed during the lifetime of a [`SortedDocumentsIterator`]. | ||||
| pub struct SortedDocuments<'ctx> { | ||||
|     rtxn: &'ctx heed::RoTxn<'ctx>, | ||||
|     fields: Vec<(u16, bool)>, | ||||
|     number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, | ||||
|     string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, | ||||
|     candidates: &'ctx RoaringBitmap, | ||||
| } | ||||
|  | ||||
| impl <'ctx> SortedDocuments<'ctx> { | ||||
|     pub fn iter(&'ctx self) -> heed::Result<SortedDocumentsIterator<'ctx>> { | ||||
|         let builder = SortedDocumentsIteratorBuilder { | ||||
|             rtxn: self.rtxn, | ||||
|             number_db: self.number_db, | ||||
|             string_db: self.string_db, | ||||
|             fields: &self.fields, | ||||
|             candidates: self.candidates.clone(), | ||||
|         }; | ||||
|         builder.build() | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn recursive_facet_sort<'ctx>( | ||||
|     index: &'ctx crate::Index, | ||||
|     rtxn: &'ctx heed::RoTxn<'ctx>, | ||||
|     sort: &[AscDesc], | ||||
|     candidates: RoaringBitmap, | ||||
| ) -> crate::Result<RoaringBitmap> { | ||||
|     candidates: &'ctx RoaringBitmap, | ||||
| ) -> crate::Result<SortedDocuments<'ctx>> { | ||||
|     check_sort_criteria(index, rtxn, Some(sort))?; | ||||
|  | ||||
|     let mut fields = Vec::new(); | ||||
| @@ -96,6 +274,11 @@ pub fn recursive_facet_sort<'t>( | ||||
|         .facet_id_string_docids | ||||
|         .remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(); | ||||
|  | ||||
|     let candidates = recursive_facet_sort_inner(rtxn, number_db, string_db, &fields, candidates)?; | ||||
|     Ok(candidates) | ||||
|     Ok(SortedDocuments { | ||||
|         rtxn, | ||||
|         fields, | ||||
|         number_db, | ||||
|         string_db, | ||||
|         candidates, | ||||
|     }) | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user