mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Merge #4945
4945: Add swedish in default pipelines r=dureuill a=ManyTheFish
# Summary
## Fix Swedish support
In Swedish the characters `å`/`ä`/`ö` are completely different than `a` or `o`  and should not be normalized as the same character.
because the Swedish specialized pipeline was not activated by default, these characters were normalized even with the settings:
```json
{
  "localizedAttributes": [ { "locales": ["swe"], "attributePatterns": ["*"] } ]
}
```
## Update Charabia adding German support
German segmentation will now be activated using the setting:
```json
{
  "localizedAttributes": [ { "locales": ["deu"], "attributePatterns": ["*"] } ]
}
```
# TODO
- [x] Activate Swedish Pipeline
- [x] Add a test to avoid future regressions
- [x] Update Charabia
Co-authored-by: ManyTheFish <many@meilisearch.com>
			
			
This commit is contained in:
		| @@ -17,7 +17,7 @@ bincode = "1.3.3" | ||||
| bstr = "1.9.1" | ||||
| bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] } | ||||
| byteorder = "1.5.0" | ||||
| charabia = { version = "0.9.0", default-features = false } | ||||
| charabia = { version = "0.9.1", default-features = false } | ||||
| concat-arrays = "0.1.2" | ||||
| crossbeam-channel = "0.5.13" | ||||
| deserr = "0.6.2" | ||||
| @@ -106,6 +106,8 @@ all-tokenizations = [ | ||||
|     "charabia/greek", | ||||
|     "charabia/khmer", | ||||
|     "charabia/vietnamese", | ||||
|     "charabia/swedish-recomposition", | ||||
|     "charabia/german-segmentation", | ||||
| ] | ||||
|  | ||||
| # Use POSIX semaphores instead of SysV semaphores in LMDB | ||||
| @@ -138,6 +140,9 @@ khmer = ["charabia/khmer"] | ||||
| # allow vietnamese specialized tokenization | ||||
| vietnamese = ["charabia/vietnamese"] | ||||
|  | ||||
| # allow german specialized tokenization | ||||
| german = ["charabia/german-segmentation"] | ||||
|  | ||||
| # force swedish character recomposition | ||||
| swedish-recomposition = ["charabia/swedish-recomposition"] | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user