Compare commits

...

2 Commits

3 changed files with 123 additions and 2 deletions

View File

@ -1836,3 +1836,120 @@ async fn change_facet_casing() {
})
.await;
}
#[actix_rt::test]
async fn phrase_search_containing_stop_word() {
let documents = json!([
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": ["green", "red"],
},
{
"title": "How Train Dragon",
"id": "166429",
"color": ["green", "red"],
},
{
"title": "How bad Train good Dragon",
"id": "166427",
"color": ["green", "red"],
},
{
"title": "How bad bad Train good good Dragon",
"id": "166425",
"color": ["green", "red"],
},
{
"title": "Gläss",
"id": "450465",
"color": ["blue", "red"],
}
]);
// Test simple search with stop words
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({"stopWords": ["the", "to", "your", "Your"]}),
&json!({"q": "how to train your dragon"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": [
"green",
"red"
]
},
{
"title": "How bad Train good Dragon",
"id": "166427",
"color": [
"green",
"red"
]
},
{
"title": "How Train Dragon",
"id": "166429",
"color": [
"green",
"red"
]
},
{
"title": "How bad bad Train good good Dragon",
"id": "166425",
"color": [
"green",
"red"
]
}
]
"###);
},
)
.await;
// Test phrase search with stop words
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({"stopWords": ["the", "to", "your", "Your"]}),
&json!({"q": "\"how to train your dragon\""}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "How bad Train good Dragon",
"id": "166427",
"color": [
"green",
"red"
]
},
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": [
"green",
"red"
]
},
{
"title": "How Train Dragon",
"id": "166429",
"color": [
"green",
"red"
]
}
]
"###);
},
)
.await;
}

View File

@ -174,7 +174,7 @@ impl<'a> PartialMatch<'a> {
let is_matching = match matching_words.first()? {
Some(word) => &token.lemma() == word,
// a None value in the phrase corresponds to a stop word,
// the walue is considered a match if the current token is categorized as a stop word.
// the value is considered a match if the current token is categorized as a stop word.
None => token.is_stopword(),
};

View File

@ -194,7 +194,11 @@ pub fn compute_phrase_docids(
return Ok(RoaringBitmap::new());
}
let mut candidates = None;
for word in words.iter().flatten().copied() {
for word in words.iter().copied() {
let Some(word) = word else {
continue;
};
if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? {
if let Some(candidates) = candidates.as_mut() {
*candidates &= word_docids;