fix(http, index): init analyzer with optional stop words

Next release

update tokenizer
This commit is contained in:
Alexey Shekhirin
2021-04-01 17:49:11 +03:00
parent f881e8691e
commit 51ba1bd7d3
3 changed files with 21 additions and 34 deletions

28
Cargo.lock generated
View File

@@ -1847,7 +1847,7 @@ dependencies = [
"log", "log",
"main_error", "main_error",
"meilisearch-error", "meilisearch-error",
"meilisearch-tokenizer 0.1.1 (git+https://github.com/meilisearch/Tokenizer.git?branch=main)", "meilisearch-tokenizer",
"memmap", "memmap",
"milli", "milli",
"mime", "mime",
@@ -1893,22 +1893,6 @@ dependencies = [
"whatlang", "whatlang",
] ]
[[package]]
name = "meilisearch-tokenizer"
version = "0.1.1"
source = "git+https://github.com/meilisearch/Tokenizer.git?branch=main#31ba3ff4a15501f12b7d37ac64ddce7c35a9757c"
dependencies = [
"character_converter",
"cow-utils",
"deunicode",
"fst",
"jieba-rs",
"once_cell",
"slice-group-by",
"unicode-segmentation",
"whatlang",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.3.4" version = "2.3.4"
@@ -1937,7 +1921,7 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/meilisearch/milli.git?rev=2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c#2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c" source = "git+https://github.com/meilisearch/milli.git?tag=v0.1.0#2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bstr", "bstr",
@@ -1957,7 +1941,7 @@ dependencies = [
"linked-hash-map", "linked-hash-map",
"log", "log",
"logging_timer", "logging_timer",
"meilisearch-tokenizer 0.1.1 (git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.0)", "meilisearch-tokenizer",
"memmap", "memmap",
"num-traits", "num-traits",
"obkv", "obkv",
@@ -2252,7 +2236,8 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]] [[package]]
name = "pest" name = "pest"
version = "2.1.3" version = "2.1.3"
source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
dependencies = [ dependencies = [
"ucd-trie", "ucd-trie",
] ]
@@ -2260,8 +2245,7 @@ dependencies = [
[[package]] [[package]]
name = "pest" name = "pest"
version = "2.1.3" version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67"
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
dependencies = [ dependencies = [
"ucd-trie", "ucd-trie",
] ]

View File

@@ -35,14 +35,14 @@ futures-util = "0.3.8"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = "0.10.6" heed = "0.10.6"
http = "0.2.1" http = "0.2.1"
indexmap = { version = "1.3.2", features = ["serde-1"] } indexmap = { version = "1.3.2", features = ["serde-1"] }
itertools = "0.10.0" itertools = "0.10.0"
log = "0.4.8" log = "0.4.8"
main_error = "0.1.0" main_error = "0.1.0"
meilisearch-error = { path = "../meilisearch-error" } meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.0" }
memmap = "0.7.0" memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", rev = "2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c" } milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.1.0" }
mime = "0.3.16" mime = "0.3.16"
once_cell = "1.5.2" once_cell = "1.5.2"
parking_lot = "0.11.1" parking_lot = "0.11.1"
@@ -66,14 +66,14 @@ oxidized-json-checker = "0.3.2"
[dependencies.sentry] [dependencies.sentry]
default-features = false default-features = false
features = [ features = [
"with_client_implementation", "with_client_implementation",
"with_panic", "with_panic",
"with_failure", "with_failure",
"with_device_info", "with_device_info",
"with_rust_info", "with_rust_info",
"with_reqwest_transport", "with_reqwest_transport",
"with_rustls", "with_rustls",
"with_env_logger" "with_env_logger"
] ]
optional = true optional = true
version = "0.18.1" version = "0.18.1"

View File

@@ -155,7 +155,10 @@ pub struct Highlighter<'a, A> {
impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
pub fn new(stop_words: &'a fst::Set<A>) -> Self { pub fn new(stop_words: &'a fst::Set<A>) -> Self {
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)); let mut config = AnalyzerConfig::default();
config.stop_words(stop_words);
let analyzer = Analyzer::new(config);
Self { analyzer } Self { analyzer }
} }