mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-14 08:46:26 +00:00
Take stop word in account
This commit is contained in:
@ -21,6 +21,7 @@ pub fn extract_docid_word_positions<R: io::Read>(
|
||||
mut obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
stop_words: Option<&fst::Set<&[u8]>>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>)> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@ -35,6 +36,10 @@ pub fn extract_docid_word_positions<R: io::Read>(
|
||||
|
||||
let mut key_buffer = Vec::new();
|
||||
let mut field_buffer = String::new();
|
||||
let mut config = AnalyzerConfig::default();
|
||||
if let Some(stop_words) = stop_words {
|
||||
config.stop_words(stop_words);
|
||||
}
|
||||
let analyzer = Analyzer::<Vec<u8>>::new(AnalyzerConfig::default());
|
||||
|
||||
while let Some((key, value)) = obkv_documents.next()? {
|
||||
|
@ -37,6 +37,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
lmdb_writer_sx: Sender<TypedChunk>,
|
||||
searchable_fields: Option<HashSet<FieldId>>,
|
||||
faceted_fields: HashSet<FieldId>,
|
||||
stop_words: Option<fst::Set<&[u8]>>,
|
||||
) -> Result<()> {
|
||||
let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks
|
||||
.par_bridge()
|
||||
@ -54,6 +55,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
documents_chunk.clone(),
|
||||
indexer.clone(),
|
||||
&searchable_fields,
|
||||
stop_words.as_ref(),
|
||||
)?;
|
||||
|
||||
// send documents_ids to DB writer
|
||||
|
Reference in New Issue
Block a user