implement a first version of the stop_words

The front must provide a BTreeSet containing the stop words
The stop_words are set at None if an empty Set is provided
add the stop-words in the http-ui interface

Use maplit in the test
and remove all the useless drop(rtxn) at the end of all tests
This commit is contained in:
tamo
2021-03-29 19:15:47 +02:00
parent 62a8f1d707
commit a2f46029c7
7 changed files with 203 additions and 56 deletions

View File

@ -410,6 +410,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
None => fields_ids_map.iter().map(|(id, _name)| id).collect(),
};
let stop_words = self.index.stop_words(self.wtxn)?;
let stop_words = stop_words.as_ref();
let linked_hash_map_size = self.linked_hash_map_size;
let max_nb_chunks = self.max_nb_chunks;
let max_memory = self.max_memory;
@ -436,7 +438,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
let readers = rayon::iter::repeatn(documents, num_threads)
.enumerate()
.map(|(i, documents)| {
let stop_words = fst::Set::default();
let store = Store::new(
searchable_fields.clone(),
faceted_fields.clone(),
@ -446,7 +447,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
chunk_compression_type,
chunk_compression_level,
chunk_fusing_shrink_size,
&stop_words,
stop_words,
)?;
store.index(
documents,

View File

@ -86,7 +86,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
chunk_compression_type: CompressionType,
chunk_compression_level: Option<u32>,
chunk_fusing_shrink_size: Option<u64>,
stop_words: &'s Set<A>,
stop_words: Option<&'s Set<A>>,
) -> anyhow::Result<Self>
{
// We divide the max memory by the number of sorter the Store have.
@ -141,7 +141,11 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
create_writer(chunk_compression_type, chunk_compression_level, f)
})?;
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words));
let mut config = AnalyzerConfig::default();
if let Some(stop_words) = stop_words {
config.stop_words(stop_words);
}
let analyzer = Analyzer::new(config);
Ok(Store {
// Indexing parameters.