mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 13:36:27 +00:00 
			
		
		
		
	Cache the prefix postings lists
This commit is contained in:
		| @@ -1,8 +1,10 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use fst::{set::OpBuilder, SetBuilder}; | ||||
| use sdset::{duo::Union, SetOperation}; | ||||
| use fst::{set::OpBuilder, SetBuilder, IntoStreamer, Streamer}; | ||||
| use sdset::{duo::Union, SetOperation, SetBuf}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use log::debug; | ||||
|  | ||||
| use crate::database::{MainT, UpdateT}; | ||||
| use crate::database::{UpdateEvent, UpdateEventsEmitter}; | ||||
| @@ -110,6 +112,7 @@ pub fn apply_documents_addition<'a, 'b>( | ||||
|     postings_lists_store: store::PostingsLists, | ||||
|     docs_words_store: store::DocsWords, | ||||
|     prefix_documents_cache_store: store::PrefixDocumentsCache, | ||||
|     prefix_postings_lists_cache_store: store::PrefixPostingsListsCache, | ||||
|     addition: Vec<HashMap<String, serde_json::Value>>, | ||||
| ) -> MResult<()> { | ||||
|     let mut documents_additions = HashMap::new(); | ||||
| @@ -180,7 +183,50 @@ pub fn apply_documents_addition<'a, 'b>( | ||||
|         &ranked_map, | ||||
|         number_of_inserted_documents, | ||||
|         indexer, | ||||
|     ) | ||||
|     )?; | ||||
|  | ||||
|  | ||||
|     // retrieve the words fst to compute all those prefixes | ||||
|     let words_fst = match main_store.words_fst(writer)? { | ||||
|         Some(fst) => fst, | ||||
|         None => return Ok(()), | ||||
|     }; | ||||
|  | ||||
|     // clear the prefixes | ||||
|     let pplc_store = prefix_postings_lists_cache_store; | ||||
|     pplc_store.clear(writer)?; | ||||
|  | ||||
|     const MAX_PREFIX_LENGTH: usize = 1; | ||||
|  | ||||
|     // compute prefixes and store those in the PrefixPostingsListsCache. | ||||
|     let mut stream = words_fst.into_stream(); | ||||
|     while let Some(input) = stream.next() { | ||||
|         for i in 1..=MAX_PREFIX_LENGTH { | ||||
|             let prefix = &input[..i]; | ||||
|             if let Some(postings_list) = postings_lists_store.postings_list(writer, prefix)? { | ||||
|                 if let (Ok(input), Ok(prefix)) = (std::str::from_utf8(input), std::str::from_utf8(prefix)) { | ||||
|                     debug!("{:?} postings list (prefix {:?}) length {}", input, prefix, postings_list.len()); | ||||
|                 } | ||||
|  | ||||
|                 // compute the new prefix postings lists | ||||
|                 let mut p = [0; 4]; | ||||
|                 let len = std::cmp::min(4, prefix.len()); | ||||
|                 p[..len].copy_from_slice(&prefix[..len]); | ||||
|  | ||||
|                 let previous = match pplc_store.prefix_postings_list(writer, p)? { | ||||
|                     Some(previous) => previous, | ||||
|                     None => Cow::Owned(SetBuf::default()), | ||||
|                 }; | ||||
|  | ||||
|                 let new_postings_list = Union::new(&postings_list, &previous).into_set_buf(); | ||||
|                 pplc_store.put_prefix_postings_list(writer, p, &new_postings_list)?; | ||||
|  | ||||
|                 debug!("new length {}", new_postings_list.len()); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub fn apply_documents_partial_addition<'a, 'b>( | ||||
|   | ||||
| @@ -309,62 +309,10 @@ pub fn update_task<'a, 'b>( | ||||
|                 index.postings_lists, | ||||
|                 index.docs_words, | ||||
|                 index.prefix_documents_cache, | ||||
|                 index.prefix_postings_lists_cache, | ||||
|                 documents, | ||||
|             ); | ||||
|  | ||||
|             let words_fst = index.main.words_fst(writer)?.unwrap(); | ||||
|             let mut stream = words_fst.into_stream(); | ||||
|             let mut previous_char = None; | ||||
|             while let Some(input) = stream.next() { | ||||
|                 let (s, c) = match std::str::from_utf8(input) { | ||||
|                     Ok(s) => { | ||||
|                         let c = s.chars().next().unwrap(); | ||||
|                         (&s[..c.len_utf8()], c) | ||||
|                     }, | ||||
|                     Err(_) => continue, | ||||
|                 }; | ||||
|  | ||||
|                 match previous_char { | ||||
|                     Some(pc) if pc != c => { | ||||
|                         debug!("searching and caching {:?}", s); | ||||
|  | ||||
|                         let documents = bucket_sort( | ||||
|                             writer, | ||||
|                             s, | ||||
|                             0..20, | ||||
|                             None as Option<fn(DocumentId) -> bool>, | ||||
|                             Criteria::default(), | ||||
|                             None, | ||||
|                             index.main, | ||||
|                             index.postings_lists, | ||||
|                             index.documents_fields_counts, | ||||
|                             index.synonyms, | ||||
|                             index.prefix_documents_cache, | ||||
|                         ).unwrap(); | ||||
|  | ||||
|                         let mut prefix = [0; 4]; | ||||
|                         let len = cmp::min(4, s.len()); | ||||
|                         prefix[..len].copy_from_slice(&s.as_bytes()[..len]); | ||||
|  | ||||
|                         for (i, document) in documents.into_iter().enumerate() { | ||||
|                             index.prefix_documents_cache.put_prefix_document( | ||||
|                                 writer, | ||||
|                                 prefix, | ||||
|                                 i, | ||||
|                                 document.id, | ||||
|                                 &document.highlights, | ||||
|                             ).unwrap(); | ||||
|                         } | ||||
|  | ||||
|                         previous_char = Some(c) | ||||
|                     }, | ||||
|                     Some(_) => (), | ||||
|                     None => previous_char = Some(c), | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             // TODO we forget to do it for the last prefix char | ||||
|  | ||||
|             (update_type, result, start.elapsed()) | ||||
|         } | ||||
|         UpdateData::DocumentsPartial(documents) => { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user