Compare commits

..

1 Commits

Author SHA1 Message Date
ManyTheFish
a87c390244 Activate only the necessary features for Japanese 2024-07-08 18:17:11 +02:00
13 changed files with 59 additions and 75 deletions

View File

@@ -1,6 +1,4 @@
name: Look for flaky tests
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
on:
workflow_dispatch:
schedule:

View File

@@ -1,6 +1,5 @@
name: Run the indexing fuzzer
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
on:
push:
branches:

View File

@@ -15,8 +15,6 @@ jobs:
debian:
name: Publish debian packagge
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest
needs: check-version
container:

View File

@@ -35,8 +35,6 @@ jobs:
publish-linux:
name: Publish binary for Linux
runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
needs: check-version
container:
# Use ubuntu-18.04 to compile with glibc 2.27
@@ -134,8 +132,6 @@ jobs:
name: Publish binary for aarch64
runs-on: ubuntu-latest
needs: check-version
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container:
# Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04

View File

@@ -21,8 +21,6 @@ jobs:
test-linux:
name: Tests on ubuntu-18.04
runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
@@ -79,8 +77,6 @@ jobs:
test-all-features:
name: Tests almost all features
runs-on: ubuntu-latest
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
@@ -104,8 +100,6 @@ jobs:
test-disabled-tokenization:
name: Test disabled tokenization
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest
container:
image: ubuntu:18.04
@@ -133,8 +127,6 @@ jobs:
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations

42
Cargo.lock generated
View File

@@ -503,7 +503,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]]
name = "benchmarks"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"anyhow",
"bytes",
@@ -648,7 +648,7 @@ dependencies = [
[[package]]
name = "build-info"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"anyhow",
"time",
@@ -1579,7 +1579,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"anyhow",
"big_s",
@@ -1804,7 +1804,7 @@ dependencies = [
[[package]]
name = "file-store"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"faux",
"tempfile",
@@ -1827,7 +1827,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"insta",
"nom",
@@ -1847,7 +1847,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"criterion",
"serde_json",
@@ -1965,7 +1965,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"arbitrary",
"clap",
@@ -2452,7 +2452,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]]
name = "index-scheduler"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"anyhow",
"arroy",
@@ -2649,7 +2649,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"criterion",
"serde_json",
@@ -3257,7 +3257,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"insta",
"md5",
@@ -3266,7 +3266,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"actix-cors",
"actix-http",
@@ -3358,7 +3358,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"base64 0.21.7",
"enum-iterator",
@@ -3377,7 +3377,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"actix-web",
"anyhow",
@@ -3407,7 +3407,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"anyhow",
"clap",
@@ -3446,7 +3446,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"arroy",
"big_s",
@@ -3886,7 +3886,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "permissive-json-pointer"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"big_s",
"serde_json",
@@ -5098,9 +5098,9 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.36"
version = "0.3.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749"
dependencies = [
"deranged",
"itoa",
@@ -5121,9 +5121,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]]
name = "time-macros"
version = "0.2.18"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf"
checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774"
dependencies = [
"num-conv",
"time-core",
@@ -6042,7 +6042,7 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.9.1"
version = "1.9.0"
dependencies = [
"anyhow",
"build-info",

View File

@@ -22,7 +22,7 @@ members = [
]
[workspace.package]
version = "1.9.1"
version = "1.9.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -17,7 +17,7 @@ RUN set -eux; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release -p meilisearch -p meilitool
cargo build --release -p meilisearch -p meilitool --no-default-features --features "analytics mini-dashboard japanese"
# Run
FROM alpine:3.16

View File

@@ -644,12 +644,7 @@ async fn get_document_with_vectors() {
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [],
"regenerate": false
}
}
"_vectors": {}
}
],
"offset": 0,
@@ -705,12 +700,7 @@ async fn get_document_with_vectors() {
},
{
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [],
"regenerate": false
}
}
"_vectors": {}
}
],
"offset": 0,

View File

@@ -119,12 +119,7 @@ async fn add_remove_user_provided() {
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [],
"regenerate": false
}
}
"_vectors": {}
}
],
"offset": 0,
@@ -146,12 +141,7 @@ async fn add_remove_user_provided() {
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [],
"regenerate": false
}
}
"_vectors": {}
}
],
"offset": 0,
@@ -587,12 +577,7 @@ async fn add_remove_one_vector_4588() {
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [],
"regenerate": false
}
}
"_vectors": {}
}
],
"offset": 0,

View File

@@ -141,6 +141,3 @@ swedish-recomposition = ["charabia/swedish-recomposition"]
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
cuda = ["candle-core/cuda"]
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] }

View File

@@ -1660,7 +1660,9 @@ impl Index {
}
}
res.insert(embedder_name.to_owned(), embeddings);
if !embeddings.is_empty() {
res.insert(embedder_name.to_owned(), embeddings);
}
}
Ok(res)
}

View File

@@ -2180,6 +2180,33 @@ mod tests {
index.add_documents(doc1).unwrap();
}
#[cfg(feature = "default")]
#[test]
fn store_detected_script_and_language_per_document_during_indexing() {
use charabia::{Language, Script};
let index = TempIndex::new();
index
.add_documents(documents!([
{ "id": 1, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": 2, "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
{ "id": 3, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
{ "id": 4, "title": "関西国際空港限定トートバッグ すもももももももものうち" },
{ "id": 5, "title": "ภาษาไทยง่ายนิดเดียว" },
{ "id": 6, "title": "The quick 在尊嚴和權利上一律平等。" },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let key_jpn = (Script::Cj, Language::Jpn);
let key_cmn = (Script::Cj, Language::Cmn);
let cj_jpn_docs = index.script_language_documents_ids(&rtxn, &key_jpn).unwrap().unwrap();
let cj_cmn_docs = index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap();
let expected_cj_jpn_docids = [3].iter().collect();
assert_eq!(cj_jpn_docs, expected_cj_jpn_docids);
let expected_cj_cmn_docids = [1, 5].iter().collect();
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
}
#[test]
fn add_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();