mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-26 13:36:27 +00:00
Merge #3866
3866: Update charabia v0.8.0 r=dureuill a=ManyTheFish
# Pull Request
Update Charabia:
- enhance Japanese segmentation
- enhance Latin Tokenization
- words containing `_` are now properly segmented into several words
- brackets `{([])}` are no more considered as context separators so word separated by brackets are now considered near together for the proximity ranking rule
- fixes #3815
- fixes #3778
- fixes [product#151](https://github.com/meilisearch/product/discussions/151)
> Important note: now the float numbers are segmented around the `.` so `3.22` is segmented as [`3`, `.`, `22`] but the middle dot isn't considered as a hard separator, which means that if we search `3.22` we find documents containing `3.22`
Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
@@ -814,10 +814,10 @@ fn extract_field(
|
||||
}
|
||||
}
|
||||
|
||||
fn format_fields<A: AsRef<[u8]>>(
|
||||
fn format_fields<'a>(
|
||||
document: &Document,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
builder: &MatcherBuilder<'_, A>,
|
||||
builder: &'a MatcherBuilder<'a>,
|
||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
||||
compute_matches: bool,
|
||||
displayable_ids: &BTreeSet<FieldId>,
|
||||
@@ -862,9 +862,9 @@ fn format_fields<A: AsRef<[u8]>>(
|
||||
Ok((matches_position, document))
|
||||
}
|
||||
|
||||
fn format_value<A: AsRef<[u8]>>(
|
||||
fn format_value<'a>(
|
||||
value: Value,
|
||||
builder: &MatcherBuilder<'_, A>,
|
||||
builder: &'a MatcherBuilder<'a>,
|
||||
format_options: Option<FormatOptions>,
|
||||
infos: &mut Vec<MatchBounds>,
|
||||
compute_matches: bool,
|
||||
|
||||
Reference in New Issue
Block a user