Compare commits

...

32 Commits

Author SHA1 Message Date
Kerollmops
bc1aa5af07 Do not crash when filtering with hannoy 2025-07-24 09:58:27 +02:00
Kerollmops
baec26b5d5 Change the ef construction to 48 2025-07-21 22:38:21 +02:00
Kerollmops
46aff4e706 Change the ef construction to 64 2025-07-21 20:27:22 +02:00
Kerollmops
b228b2efd9 Change the ef construction from default 100 to 33 2025-07-21 18:09:25 +02:00
Kerollmops
4b7d88e2f4 First version with hannoy 2025-07-21 11:42:46 +02:00
Tamo
421a23ee3d Merge pull request #3265 from LeSuisse/sign-container-image-cosign
Sign container image using Cosign in keyless mode
2025-07-16 08:54:57 +00:00
Thomas Gerbet
191ea340ed Sign container image using Cosign in keyless mode
Cosign keyless mode makes possible to sign the container image using the
OIDC Identity Tokens provided by GitHub Actions [0][1].
The signature is published to the registry storing the image and to the
public Rekor transparency log instance [2].

Cosign keyless mode has already been adopted by some major projects like
Kubernetes [3].

The image signature can be manually verified using:
```
$ cosign verify \
	--certificate-oidc-issuer='https://token.actions.githubusercontent.com' \
	--certificate-identity-regexp='^https://github.com/meilisearch/meilisearch/.github/workflows/publish-docker-images.yaml' \
	<image_name>
```

See #2179.
Note that a similar approach can be used to sign the release binaries.

[0] https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/about-security-hardening-with-openid-connect
[1] https://docs.sigstore.dev/cosign/signing/signing_with_containers/
[2] https://docs.sigstore.dev/rekor/overview
[3] https://kubernetes.io/docs/tasks/administer-cluster/verify-signed-artifacts/#verifying-image-signatures
2025-07-16 10:04:18 +02:00
Tamo
8d22972d84 Merge pull request #5626 from martin-g/faster-batches-it-tests
tests: Faster batches:: IT tests
2025-07-16 07:01:16 +00:00
Martin Grigorov
8772b5af87 Merge branch 'main' into faster-batches-it-tests 2025-07-15 15:21:32 +03:00
Tamo
df2e7cde53 Merge pull request #5703 from martin-g/all-use-server-wait-task
tests: Use Server::wait_task() instead of Index::wait_task()
2025-07-15 09:18:12 +00:00
Clément Renault
02b2ae6142 Merge pull request #5756 from meilisearch/fix-integration-test
Fix Rails CI
2025-07-15 07:38:06 +00:00
curquiza
f813eb7ca4 Fix 2025-07-13 12:35:54 +02:00
curquiza
d072edaa49 Fix Rails CI 2025-07-13 12:26:56 +02:00
Martin Tzvetanov Grigorov
e3daa907c5 Update redactions
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-11 11:14:39 +03:00
Martin Tzvetanov Grigorov
a39223822a More tests fixes
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-11 11:11:46 +03:00
Martin Grigorov
1eb6cd38ce Merge branch 'main' into faster-batches-it-tests 2025-07-11 10:49:22 +03:00
Martin Tzvetanov Grigorov
eb6ad3ef9c Fix batch id detection
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-11 10:24:25 +03:00
Martin Tzvetanov Grigorov
3bef4f4413 Use Server::wait_task() instead of Index::wait_task()
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-11 10:16:25 +03:00
Martin Tzvetanov Grigorov
9f89881b0d More tests fixes
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-11 10:11:58 +03:00
Martin Tzvetanov Grigorov
126aefc207 Fix more tests
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-10 16:47:04 +03:00
Martin Tzvetanov Grigorov
e7a60555d6 Formatting
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-10 14:35:40 +03:00
Martin Tzvetanov Grigorov
ae912c4c3f Pass the Server as an extra parameter when the Index needs to wait for a task
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-10 14:28:57 +03:00
Martin Tzvetanov Grigorov
13ea29e511 Fix some search+replace issues. Make Server::wait_task() available for Index:: methods
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-10 14:03:16 +03:00
Martin Tzvetanov Grigorov
5342df26fe tests: Use Server::wait_task() instead of Index::wait_task()
The code is mostly duplicated. Server::wait_task() has better handling for errors and more retries.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-07-10 14:03:15 +03:00
Tamo
61bc95e8d6 Merge pull request #5740 from meilisearch/ignore-flaky-test-2
Ignore yet another flaky test
2025-07-09 13:25:45 +00:00
Louis Dureuil
074744b8a6 Ignore yet-another flaky test 2025-07-08 10:54:39 +02:00
Martin Tzvetanov Grigorov
9e31d6ceff Add batch_uid to all successful and failed tasks too
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:48 +03:00
Martin Tzvetanov Grigorov
139ec8c782 Add task.batch_uid() helper method
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:48 +03:00
Martin Tzvetanov Grigorov
2691999bd3 Add a helper method for getting the latest batch
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:47 +03:00
Martin Tzvetanov Grigorov
48460678df More assertion fixes
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:47 +03:00
Martin Tzvetanov Grigorov
cb15e5c67e WIP: More snapshot updates
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:46 +03:00
Martin Tzvetanov Grigorov
7380808b26 tests: Faster batches:: IT tests
Use shared server + unique indices where possible

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:12:46 +03:00
50 changed files with 911 additions and 812 deletions

View File

@@ -16,6 +16,8 @@ on:
jobs:
docker:
runs-on: docker
permissions:
id-token: write # This is needed to use Cosign in keyless mode
steps:
- uses: actions/checkout@v3
@@ -62,6 +64,9 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Install cosign
uses: sigstore/cosign-installer@3454372f43399081ed03b604cb2d021dabca52bb # tag=v3.8.2
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
@@ -85,6 +90,7 @@ jobs:
- name: Build and push
uses: docker/build-push-action@v6
id: build-and-push
with:
push: true
platforms: linux/amd64,linux/arm64
@@ -94,6 +100,17 @@ jobs:
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
- name: Sign the images with GitHub OIDC Token
env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
TAGS: ${{ steps.meta.outputs.tags }}
run: |
images=""
for tag in ${TAGS}; do
images+="${tag}@${DIGEST} "
done
cosign sign --yes ${images}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)

View File

@@ -344,15 +344,23 @@ jobs:
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
env:
RAILS_VERSION: '7.0'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rails
- name: Set up Ruby 3
- name: Install SQLite dependencies
run: sudo apt-get update && sudo apt-get install -y libsqlite3-dev
- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: 3
bundler-cache: true
- name: Start MongoDB
uses: supercharge/mongodb-github-action@1.12.0
with:
mongodb-version: 8.0
- name: Run tests
run: bundle exec rspec

85
Cargo.lock generated
View File

@@ -442,28 +442,6 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arroy"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08e6111f351d004bd13e95ab540721272136fd3218b39d3ec95a2ea1c4e6a0a6"
dependencies = [
"bytemuck",
"byteorder",
"enum-iterator",
"heed",
"memmap2",
"nohash",
"ordered-float 4.6.0",
"page_size",
"rand 0.8.5",
"rayon",
"roaring",
"tempfile",
"thiserror 2.0.12",
"tracing",
]
[[package]]
name = "assert-json-diff"
version = "2.0.2"
@@ -2600,6 +2578,32 @@ dependencies = [
"rand_distr",
]
[[package]]
name = "hannoy"
version = "0.7.0"
source = "git+https://github.com/kerollmops/hannoy?branch=expose-private-types#98c7adda0d96a9f98203797d24c26a4de06cab3e"
dependencies = [
"bytemuck",
"byteorder",
"enum-iterator",
"hashbrown 0.15.4",
"heed",
"memmap2",
"min-max-heap",
"nohash",
"ordered-float 5.0.0",
"page_size",
"papaya",
"rand 0.8.5",
"rayon",
"roaring",
"slice-group-by",
"tempfile",
"thiserror 2.0.12",
"tinyvec",
"tracing",
]
[[package]]
name = "hash32"
version = "0.3.1"
@@ -3921,7 +3925,6 @@ name = "milli"
version = "1.16.0"
dependencies = [
"allocator-api2 0.3.0",
"arroy",
"bbqueue",
"big_s",
"bimap",
@@ -3949,6 +3952,7 @@ dependencies = [
"fxhash",
"geoutils",
"grenad",
"hannoy",
"hashbrown 0.15.4",
"heed",
"hf-hub",
@@ -4018,6 +4022,12 @@ dependencies = [
"unicase",
]
[[package]]
name = "min-max-heap"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2687e6cf9c00f48e9284cf9fd15f2ef341d03cc7743abf9df4c5f07fdee50b18"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
@@ -4358,15 +4368,6 @@ dependencies = [
"num-traits",
]
[[package]]
name = "ordered-float"
version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951"
dependencies = [
"num-traits",
]
[[package]]
name = "ordered-float"
version = "5.0.0"
@@ -4398,6 +4399,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "papaya"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f92dd0b07c53a0a0c764db2ace8c541dc47320dad97c2200c2a637ab9dd2328f"
dependencies = [
"equivalent",
"seize",
]
[[package]]
name = "parking_lot"
version = "0.12.4"
@@ -5449,6 +5460,16 @@ dependencies = [
"time",
]
[[package]]
name = "seize"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4b8d813387d566f627f3ea1b914c068aac94c40ae27ec43f5f33bde65abefe7"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "semver"
version = "1.0.26"

View File

@@ -143,10 +143,10 @@ impl IndexStats {
///
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
let arroy_stats = index.arroy_stats(rtxn)?;
let hannoy_stats = index.hannoy_stats(rtxn)?;
Ok(IndexStats {
number_of_embeddings: Some(arroy_stats.number_of_embeddings),
number_of_embedded_documents: Some(arroy_stats.documents.len()),
number_of_embeddings: Some(hannoy_stats.number_of_embeddings),
number_of_embedded_documents: Some(hannoy_stats.documents.len()),
documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(),
number_of_documents: None,
database_size: index.on_disk_size()?,

View File

@@ -304,7 +304,7 @@ async fn access_authorized_stats_restricted_index() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on `products` index only.
let content = json!({
@@ -344,7 +344,7 @@ async fn access_authorized_stats_no_index_restriction() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on all indexes.
let content = json!({
@@ -384,7 +384,7 @@ async fn list_authorized_indexes_restricted_index() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on `products` index only.
let content = json!({
@@ -425,7 +425,7 @@ async fn list_authorized_indexes_no_index_restriction() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on all indexes.
let content = json!({
@@ -507,10 +507,10 @@ async fn access_authorized_index_patterns() {
server.use_api_key(MASTER_KEY);
// refer to products_1 with modified api key.
// refer to products_1 with a modified api key.
let index_1 = server.index("products_1");
index_1.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index_1.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -578,19 +578,19 @@ async fn raise_error_non_authorized_index_patterns() {
assert_eq!(202, code, "{:?}", &response);
let task2_id = response["taskUid"].as_u64().unwrap();
// Adding document to test index. Should Fail with 403 -- invalid_api_key
// Adding a document to test index. Should Fail with 403 -- invalid_api_key
let (response, code) = test_index.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
server.use_api_key(MASTER_KEY);
// refer to products_1 with modified api key.
// refer to products_1 with a modified api key.
let product_1_index = server.index("products_1");
// refer to products_2 with modified api key.
let product_2_index = server.index("products_2");
// refer to products_2 with a modified api key.
// let product_2_index = server.index("products_2");
product_1_index.wait_task(task1_id).await;
product_2_index.wait_task(task2_id).await;
server.wait_task(task1_id).await;
server.wait_task(task2_id).await;
let (response, code) = product_1_index.get_task(task1_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -603,7 +603,7 @@ async fn raise_error_non_authorized_index_patterns() {
#[actix_rt::test]
async fn pattern_indexes() {
// Create server with master key
// Create a server with master key
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
@@ -650,7 +650,7 @@ async fn list_authorized_tasks_restricted_index() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on `products` index only.
let content = json!({
@@ -690,7 +690,7 @@ async fn list_authorized_tasks_no_index_restriction() {
let (response, code) = index.create(Some("product_id")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
// create key with access on all indexes.
let content = json!({
@@ -757,7 +757,7 @@ async fn error_creating_index_without_action() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
let response = index.wait_task(task_id).await;
let response = server.wait_task(task_id).await;
assert_eq!(response["status"], "failed");
assert_eq!(response["error"], expected_error.clone());
@@ -768,7 +768,7 @@ async fn error_creating_index_without_action() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
let response = index.wait_task(task_id).await;
let response = server.wait_task(task_id).await;
assert_eq!(response["status"], "failed");
assert_eq!(response["error"], expected_error.clone());
@@ -778,7 +778,7 @@ async fn error_creating_index_without_action() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
let response = index.wait_task(task_id).await;
let response = server.wait_task(task_id).await;
assert_eq!(response["status"], "failed");
assert_eq!(response["error"], expected_error.clone());
@@ -830,7 +830,7 @@ async fn lazy_create_index() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -844,7 +844,7 @@ async fn lazy_create_index() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -856,7 +856,7 @@ async fn lazy_create_index() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -911,7 +911,7 @@ async fn lazy_create_index_from_pattern() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -929,7 +929,7 @@ async fn lazy_create_index_from_pattern() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
@@ -949,7 +949,7 @@ async fn lazy_create_index_from_pattern() {
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
server.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);

View File

@@ -100,11 +100,11 @@ macro_rules! compute_authorized_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task1,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task1.uid()).await.succeeded();
server.wait_task(task1.uid()).await.succeeded();
let (task2,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(task2.uid()).await.succeeded();
server.wait_task(task2.uid()).await.succeeded();
drop(index);
for key_content in ACCEPTED_KEYS.iter() {
@@ -147,7 +147,7 @@ macro_rules! compute_forbidden_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task, _status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
for key_content in $parent_keys.iter() {

View File

@@ -268,21 +268,21 @@ macro_rules! compute_authorized_single_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (add_task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(add_task.uid()).await.succeeded();
server.wait_task(add_task.uid()).await.succeeded();
let (update_task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(update_task.uid()).await.succeeded();
server.wait_task(update_task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (add_task2,_status_code) = index.add_documents(documents, None).await;
index.wait_task(add_task2.uid()).await.succeeded();
server.wait_task(add_task2.uid()).await.succeeded();
let (update_task2,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
index.wait_task(update_task2.uid()).await.succeeded();
server.wait_task(update_task2.uid()).await.succeeded();
drop(index);
@@ -339,21 +339,21 @@ macro_rules! compute_authorized_multiple_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
@@ -423,21 +423,21 @@ macro_rules! compute_forbidden_single_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
assert_eq!($parent_keys.len(), $failed_query_indexes.len(), "keys != query_indexes");
@@ -499,21 +499,21 @@ macro_rules! compute_forbidden_multiple_search {
let index = server.index("sales");
let documents = DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["color"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
let index = server.index("products");
let documents = NESTED_DOCUMENTS.clone();
let (task,_status_code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task,_status_code) = index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
drop(index);
assert_eq!($parent_keys.len(), $failed_query_indexes.len(), "keys != query_indexes");

File diff suppressed because it is too large Load Diff

View File

@@ -1,15 +1,13 @@
use std::fmt::Write;
use std::marker::PhantomData;
use std::panic::{catch_unwind, resume_unwind, UnwindSafe};
use std::time::Duration;
use actix_web::http::StatusCode;
use tokio::time::sleep;
use urlencoding::encode as urlencode;
use super::encoder::Encoder;
use super::service::Service;
use super::{Owned, Shared, Value};
use super::{Owned, Server, Shared, Value};
use crate::json;
pub struct Index<'a, State = Owned> {
@@ -33,7 +31,7 @@ impl<'a> Index<'a, Owned> {
Index { uid: self.uid.clone(), service: self.service, encoder, marker: PhantomData }
}
pub async fn load_test_set(&self) -> u64 {
pub async fn load_test_set<State>(&self, waiter: &Server<State>) -> u64 {
let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref()));
let (response, code) = self
.service
@@ -44,12 +42,12 @@ impl<'a> Index<'a, Owned> {
)
.await;
assert_eq!(code, 202);
let update_id = response["taskUid"].as_i64().unwrap();
self.wait_task(update_id as u64).await;
update_id as u64
let update_id = response["taskUid"].as_u64().unwrap();
waiter.wait_task(update_id).await;
update_id
}
pub async fn load_test_set_ndjson(&self) -> u64 {
pub async fn load_test_set_ndjson<State>(&self, waiter: &Server<State>) -> u64 {
let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref()));
let (response, code) = self
.service
@@ -60,9 +58,9 @@ impl<'a> Index<'a, Owned> {
)
.await;
assert_eq!(code, 202);
let update_id = response["taskUid"].as_i64().unwrap();
self.wait_task(update_id as u64).await;
update_id as u64
let update_id = response["taskUid"].as_u64().unwrap();
waiter.wait_task(update_id).await;
update_id
}
pub async fn create(&self, primary_key: Option<&str>) -> (Value, StatusCode) {
@@ -267,10 +265,14 @@ impl Index<'_, Shared> {
/// You cannot modify the content of a shared index, thus the delete_document_by_filter call
/// must fail. If the task successfully enqueue itself, we'll wait for the task to finishes,
/// and if it succeed the function will panic.
pub async fn delete_document_by_filter_fail(&self, body: Value) -> (Value, StatusCode) {
pub async fn delete_document_by_filter_fail<State>(
&self,
body: Value,
waiter: &Server<State>,
) -> (Value, StatusCode) {
let (mut task, code) = self._delete_document_by_filter(body).await;
if code.is_success() {
task = self.wait_task(task.uid()).await;
task = waiter.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`delete_document_by_filter_fail` succeeded: {}",
@@ -281,10 +283,10 @@ impl Index<'_, Shared> {
(task, code)
}
pub async fn delete_index_fail(&self) -> (Value, StatusCode) {
pub async fn delete_index_fail<State>(&self, waiter: &Server<State>) -> (Value, StatusCode) {
let (mut task, code) = self._delete().await;
if code.is_success() {
task = self.wait_task(task.uid()).await;
task = waiter.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`delete_index_fail` succeeded: {}",
@@ -295,10 +297,14 @@ impl Index<'_, Shared> {
(task, code)
}
pub async fn update_index_fail(&self, primary_key: Option<&str>) -> (Value, StatusCode) {
pub async fn update_index_fail<State>(
&self,
primary_key: Option<&str>,
waiter: &Server<State>,
) -> (Value, StatusCode) {
let (mut task, code) = self._update(primary_key).await;
if code.is_success() {
task = self.wait_task(task.uid()).await;
task = waiter.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`update_index_fail` succeeded: {}",
@@ -364,23 +370,6 @@ impl<State> Index<'_, State> {
self.service.delete(url).await
}
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);
for _ in 0..100 {
let (response, status_code) = self.service.get(&url).await;
assert_eq!(200, status_code, "response: {}", response);
if response["status"] == "succeeded" || response["status"] == "failed" {
return response;
}
// wait 0.5 second.
sleep(Duration::from_millis(500)).await;
}
panic!("Timeout waiting for update id");
}
pub async fn get_task(&self, update_id: u64) -> (Value, StatusCode) {
let url = format!("/tasks/{}", update_id);
self.service.get(url).await

View File

@@ -38,6 +38,15 @@ impl Value {
self["uid"].as_u64().is_some() || self["taskUid"].as_u64().is_some()
}
#[track_caller]
pub fn batch_uid(&self) -> u32 {
if let Some(batch_uid) = self["batchUid"].as_u64() {
batch_uid as u32
} else {
panic!("Didn't find `batchUid` in: {self}");
}
}
/// Return `true` if the `status` field is set to `succeeded`.
/// Panic if the `status` field doesn't exists.
#[track_caller]
@@ -181,7 +190,7 @@ pub async fn shared_empty_index() -> &'static Index<'static, Shared> {
let server = Server::new_shared();
let index = server._index("EMPTY_INDEX").to_shared();
let (response, _code) = index._create(None).await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
})
.await
@@ -229,13 +238,13 @@ pub async fn shared_index_with_documents() -> &'static Index<'static, Shared> {
let index = server._index("SHARED_DOCUMENTS").to_shared();
let documents = DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["id", "title"], "sortableAttributes": ["id", "title"]}),
)
.await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
}).await
}
@@ -272,13 +281,13 @@ pub async fn shared_index_with_score_documents() -> &'static Index<'static, Shar
let index = server._index("SHARED_SCORE_DOCUMENTS").to_shared();
let documents = SCORE_DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["id", "title"], "sortableAttributes": ["id", "title"]}),
)
.await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
}).await
}
@@ -349,13 +358,13 @@ pub async fn shared_index_with_nested_documents() -> &'static Index<'static, Sha
let index = server._index("SHARED_NESTED_DOCUMENTS").to_shared();
let documents = NESTED_DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["father", "doggos", "cattos"], "sortableAttributes": ["doggos"]}),
)
.await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
}).await
}
@@ -449,7 +458,7 @@ pub async fn shared_index_with_test_set() -> &'static Index<'static, Shared> {
)
.await;
assert_eq!(code, 202);
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
})
.await
@@ -496,14 +505,14 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
let server = Server::new_shared();
let index = server._index("SHARED_GEO_DOCUMENTS").to_shared();
let (response, _code) = index._add_documents(GEO_DOCUMENTS.clone(), None).await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["_geo"], "sortableAttributes": ["_geo"]}),
)
.await;
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
index
})
.await

View File

@@ -408,12 +408,12 @@ impl<State> Server<State> {
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);
let max_attempts = 400; // 200 seconds total, 0.5s per attempt
let url = format!("/tasks/{update_id}");
let max_attempts = 400; // 200 seconds in total, 0.5secs per attempt
for i in 0..max_attempts {
let (response, status_code) = self.service.get(&url).await;
assert_eq!(200, status_code, "response: {}", response);
let (response, status_code) = self.service.get(url.clone()).await;
assert_eq!(200, status_code, "response: {response}");
if response["status"] == "succeeded" || response["status"] == "failed" {
return response;

View File

@@ -1318,7 +1318,7 @@ async fn add_no_documents() {
async fn add_larger_dataset() {
let server = Server::new_shared();
let index = server.unique_index();
let update_id = index.load_test_set().await;
let update_id = index.load_test_set(server).await;
let (response, code) = index.get_task(update_id).await;
assert_eq!(code, 200);
assert_eq!(response["status"], "succeeded");
@@ -1333,7 +1333,7 @@ async fn add_larger_dataset() {
// x-ndjson add large test
let index = server.unique_index();
let update_id = index.load_test_set_ndjson().await;
let update_id = index.load_test_set_ndjson(server).await;
let (response, code) = index.get_task(update_id).await;
assert_eq!(code, 200);
assert_eq!(response["status"], "succeeded");

View File

@@ -7,7 +7,8 @@ use crate::json;
async fn delete_one_document_unexisting_index() {
let server = Server::new_shared();
let index = shared_does_not_exists_index().await;
let (task, code) = index.delete_document_by_filter_fail(json!({"filter": "a = b"})).await;
let (task, code) =
index.delete_document_by_filter_fail(json!({"filter": "a = b"}), server).await;
assert_eq!(code, 202);
server.wait_task(task.uid()).await.failed();

View File

@@ -559,7 +559,7 @@ async fn delete_document_by_filter() {
let index = shared_does_not_exists_index().await;
// index does not exists
let (response, _code) =
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"})).await;
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"}), server).await;
snapshot!(response, @r###"
{
"uid": "[uid]",
@@ -589,7 +589,7 @@ async fn delete_document_by_filter() {
// no filterable are set
let index = shared_empty_index().await;
let (response, _code) =
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"})).await;
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"}), server).await;
snapshot!(response, @r###"
{
"uid": "[uid]",
@@ -619,7 +619,7 @@ async fn delete_document_by_filter() {
// not filterable while there is a filterable attribute
let index = shared_index_with_documents().await;
let (response, code) =
index.delete_document_by_filter_fail(json!({ "filter": "catto = jorts"})).await;
index.delete_document_by_filter_fail(json!({ "filter": "catto = jorts"}), server).await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response.uid()).await.failed();
snapshot!(response, @r###"

View File

@@ -334,7 +334,7 @@ async fn get_document_s_nested_attributes_to_retrieve() {
async fn get_documents_displayed_attributes_is_ignored() {
let server = Server::new_shared();
let index = server.unique_index();
index.load_test_set().await;
index.load_test_set(server).await;
index.update_settings(json!({"displayedAttributes": ["gender"]})).await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;

View File

@@ -2366,7 +2366,7 @@ async fn generate_and_import_dump_containing_vectors() {
))
.await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
let response = server.wait_task(response.uid()).await;
snapshot!(response);
let (response, code) = index
.add_documents(
@@ -2381,12 +2381,12 @@ async fn generate_and_import_dump_containing_vectors() {
)
.await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
let response = server.wait_task(response.uid()).await;
snapshot!(response);
let (response, code) = server.create_dump().await;
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
let response = server.wait_task(response.uid()).await;
snapshot!(response["status"], @r###""succeeded""###);
// ========= We made a dump, now we should clear the DB and try to import our dump

View File

@@ -161,9 +161,9 @@ async fn test_create_multiple_indexes() {
let (task2, _) = index2.create(None).await;
let (task3, _) = index3.create(None).await;
index1.wait_task(task1.uid()).await.succeeded();
index2.wait_task(task2.uid()).await.succeeded();
index3.wait_task(task3.uid()).await.succeeded();
server.wait_task(task1.uid()).await.succeeded();
server.wait_task(task2.uid()).await.succeeded();
server.wait_task(task3.uid()).await.succeeded();
assert_eq!(index1.get().await.1, 200);
assert_eq!(index2.get().await.1, 200);

View File

@@ -26,7 +26,7 @@ async fn create_and_delete_index() {
async fn error_delete_unexisting_index() {
let server = Server::new_shared();
let index = shared_does_not_exists_index().await;
let (task, code) = index.delete_index_fail().await;
let (task, code) = index.delete_index_fail(server).await;
assert_eq!(code, 202);
server.wait_task(task.uid()).await.failed();

View File

@@ -60,8 +60,8 @@ async fn list_multiple_indexes() {
let index_with_key = server.unique_index();
let (response_with_key, _status_code) = index_with_key.create(Some("key")).await;
index_without_key.wait_task(response_without_key.uid()).await.succeeded();
index_with_key.wait_task(response_with_key.uid()).await.succeeded();
server.wait_task(response_without_key.uid()).await.succeeded();
server.wait_task(response_with_key.uid()).await.succeeded();
let (response, code) = server.list_indexes(None, Some(1000)).await;
assert_eq!(code, 200);
@@ -81,8 +81,9 @@ async fn get_and_paginate_indexes() {
let server = Server::new().await;
const NB_INDEXES: usize = 50;
for i in 0..NB_INDEXES {
server.index(format!("test_{i:02}")).create(None).await;
server.index(format!("test_{i:02}")).wait_task(i as u64).await;
let (task, code) = server.index(format!("test_{i:02}")).create(None).await;
assert_eq!(code, 202);
server.wait_task(task.uid()).await;
}
// basic

View File

@@ -72,7 +72,7 @@ async fn error_update_existing_primary_key() {
let server = Server::new_shared();
let index = shared_index_with_documents().await;
let (update_task, code) = index.update_index_fail(Some("primary")).await;
let (update_task, code) = index.update_index_fail(Some("primary"), server).await;
assert_eq!(code, 202);
let response = server.wait_task(update_task.uid()).await.failed();
@@ -91,7 +91,7 @@ async fn error_update_existing_primary_key() {
async fn error_update_unexisting_index() {
let server = Server::new_shared();
let index = shared_does_not_exists_index().await;
let (task, code) = index.update_index_fail(Some("my-primary-key")).await;
let (task, code) = index.update_index_fail(Some("my-primary-key"), server).await;
assert_eq!(code, 202);

View File

@@ -158,11 +158,11 @@ async fn remote_sharding() {
let index1 = ms1.index("test");
let index2 = ms2.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index2.add_documents(json!(documents[3..5]), None).await;
index2.wait_task(task.uid()).await.succeeded();
ms2.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -454,9 +454,9 @@ async fn error_unregistered_remote() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -572,9 +572,9 @@ async fn error_no_weighted_score() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -705,9 +705,9 @@ async fn error_bad_response() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -842,9 +842,9 @@ async fn error_bad_request() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -972,10 +972,10 @@ async fn error_bad_request_facets_by_index() {
let index0 = ms0.index("test0");
let index1 = ms1.index("test1");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -1113,13 +1113,13 @@ async fn error_bad_request_facets_by_index_facet() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index0.update_settings_filterable_attributes(json!(["id"])).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -1224,6 +1224,7 @@ async fn error_bad_request_facets_by_index_facet() {
}
#[actix_rt::test]
#[ignore]
async fn error_remote_does_not_answer() {
let ms0 = Server::new().await;
let ms1 = Server::new().await;
@@ -1262,9 +1263,9 @@ async fn error_remote_does_not_answer() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -1463,9 +1464,9 @@ async fn error_remote_404() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -1658,9 +1659,9 @@ async fn error_remote_sharding_auth() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
ms1.clear_api_key();
@@ -1818,9 +1819,9 @@ async fn remote_sharding_auth() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
ms1.clear_api_key();
@@ -1973,9 +1974,9 @@ async fn error_remote_500() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -2152,9 +2153,9 @@ async fn error_remote_500_once() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);
@@ -2335,9 +2336,9 @@ async fn error_remote_timeout() {
let index0 = ms0.index("test");
let index1 = ms1.index("test");
let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await;
index0.wait_task(task.uid()).await.succeeded();
ms0.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await;
index1.wait_task(task.uid()).await.succeeded();
ms1.wait_task(task.uid()).await.succeeded();
// wrap servers
let ms0 = Arc::new(ms0);

View File

@@ -298,7 +298,7 @@ async fn similar_bad_filter() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let (response, code) =
index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await;
@@ -335,7 +335,7 @@ async fn filter_invalid_syntax_object() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
index
.similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
@@ -373,7 +373,7 @@ async fn filter_invalid_syntax_array() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
index
.similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
@@ -411,7 +411,7 @@ async fn filter_invalid_syntax_string() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
@@ -451,7 +451,7 @@ async fn filter_invalid_attribute_array() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
index
.similar(
@@ -492,7 +492,7 @@ async fn filter_invalid_attribute_string() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
index
.similar(
@@ -533,7 +533,7 @@ async fn filter_reserved_geo_attribute_array() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
@@ -573,7 +573,7 @@ async fn filter_reserved_geo_attribute_string() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
@@ -613,7 +613,7 @@ async fn filter_reserved_attribute_array() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
@@ -653,7 +653,7 @@ async fn filter_reserved_attribute_string() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
@@ -693,7 +693,7 @@ async fn filter_reserved_geo_point_array() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
@@ -733,7 +733,7 @@ async fn filter_reserved_geo_point_string() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
@@ -825,7 +825,7 @@ async fn similar_bad_embedder() {
let documents = DOCUMENTS.clone();
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
server.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Cannot find embedder with name `auto`.",

View File

@@ -51,12 +51,12 @@ async fn perform_snapshot() {
}))
.await;
index.load_test_set().await;
index.load_test_set(&server).await;
let (task, code) = server.index("test1").create(Some("prim")).await;
meili_snap::snapshot!(code, @"202 Accepted");
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
// wait for the _next task_ to process, aka the snapshot that should be enqueued at some point
@@ -128,13 +128,13 @@ async fn perform_on_demand_snapshot() {
}))
.await;
index.load_test_set().await;
index.load_test_set(&server).await;
let (task, _status_code) = server.index("doggo").create(Some("bone")).await;
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (task, _status_code) = server.index("doggo").create(Some("bone")).await;
index.wait_task(task.uid()).await.failed();
server.wait_task(task.uid()).await.failed();
let (task, code) = server.create_snapshot().await;
snapshot!(code, @"202 Accepted");
@@ -147,7 +147,7 @@ async fn perform_on_demand_snapshot() {
"enqueuedAt": "[date]"
}
"###);
let task = index.wait_task(task.uid()).await;
let task = server.wait_task(task.uid()).await;
snapshot!(json_string!(task, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 4,

View File

@@ -32,7 +32,7 @@ async fn stats() {
let (task, code) = index.create(Some("id")).await;
assert_eq!(code, 202);
index.wait_task(task.uid()).await.succeeded();
server.wait_task(task.uid()).await.succeeded();
let (response, code) = server.stats().await;
@@ -58,7 +58,7 @@ async fn stats() {
assert_eq!(code, 202, "{response}");
assert_eq!(response["taskUid"], 1);
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let timestamp = OffsetDateTime::now_utc();
let (response, code) = server.stats().await;
@@ -107,7 +107,7 @@ async fn add_remove_embeddings() {
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@@ -135,7 +135,7 @@ async fn add_remove_embeddings() {
let (response, code) = index.update_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@@ -163,7 +163,7 @@ async fn add_remove_embeddings() {
let (response, code) = index.update_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@@ -192,7 +192,7 @@ async fn add_remove_embeddings() {
let (response, code) = index.update_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@@ -245,7 +245,7 @@ async fn add_remove_embedded_documents() {
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@@ -269,7 +269,7 @@ async fn add_remove_embedded_documents() {
// delete one embedded document, remaining 1 embedded documents for 3 embeddings in total
let (response, code) = index.delete_document(0).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {
@@ -305,7 +305,7 @@ async fn update_embedder_settings() {
let (response, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats, {

View File

@@ -88,7 +88,7 @@ async fn binary_quantize_before_sending_documents() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
// Make sure the documents are binary quantized
let (documents, _code) = index
@@ -161,7 +161,7 @@ async fn binary_quantize_after_sending_documents() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let (response, code) = index
.update_settings(json!({
@@ -305,7 +305,7 @@ async fn binary_quantize_clear_documents() {
server.wait_task(response.uid()).await.succeeded();
let (value, _code) = index.clear_all_documents().await;
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
// Make sure the documents DB has been cleared
let (documents, _code) = index
@@ -320,7 +320,7 @@ async fn binary_quantize_clear_documents() {
}
"###);
// Make sure the arroy DB has been cleared
// Make sure the hannoy DB has been cleared
let (documents, _code) =
index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await;
snapshot!(documents, @r###"

View File

@@ -42,7 +42,7 @@ async fn add_remove_user_provided() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
@@ -95,7 +95,7 @@ async fn add_remove_user_provided() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
@@ -138,7 +138,7 @@ async fn add_remove_user_provided() {
let (value, code) = index.delete_document(0).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
@@ -187,7 +187,7 @@ async fn user_provide_mismatched_embedding_dimension() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -218,7 +218,7 @@ async fn user_provide_mismatched_embedding_dimension() {
]);
let (response, code) = index.add_documents(new_document, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(response.uid()).await;
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -270,7 +270,7 @@ async fn generate_default_user_provided_documents(server: &Server) -> Index {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
index
}
@@ -285,7 +285,7 @@ async fn user_provided_embeddings_error() {
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [0, 0, 0] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -315,7 +315,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": {}}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -346,7 +346,7 @@ async fn user_provided_embeddings_error() {
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": "yes please" }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -375,7 +375,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true, "regenerate": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -404,7 +404,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true], "regenerate": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -433,7 +433,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]], "regenerate": false }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -462,20 +462,20 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [23, 0.1, -12], "regenerate": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [0.1, [0.2, 0.3]] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -504,7 +504,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, 0.2], 0.3] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -533,7 +533,7 @@ async fn user_provided_embeddings_error() {
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, true], 0.3] }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -574,7 +574,7 @@ async fn user_provided_vectors_error() {
let documents = json!([{"id": 40, "name": "kefir"}, {"id": 41, "name": "intel"}, {"id": 42, "name": "max"}, {"id": 43, "name": "venus"}, {"id": 44, "name": "eva"}]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -604,7 +604,7 @@ async fn user_provided_vectors_error() {
let documents = json!({"id": 42, "name": "kefir", "_vector": { "manaul": [0, 0, 0] }});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -634,7 +634,7 @@ async fn user_provided_vectors_error() {
let documents = json!({"id": 42, "name": "kefir", "_vectors": { "manaul": [0, 0, 0] }});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -667,7 +667,7 @@ async fn clear_documents() {
let index = generate_default_user_provided_documents(&server).await;
let (value, _code) = index.clear_all_documents().await;
index.wait_task(value.uid()).await.succeeded();
server.wait_task(value.uid()).await.succeeded();
// Make sure the documents DB has been cleared
let (documents, _code) = index
@@ -682,7 +682,7 @@ async fn clear_documents() {
}
"###);
// Make sure the arroy DB has been cleared
// Make sure the hannoy DB has been cleared
let (documents, _code) =
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await;
snapshot!(documents, @r###"
@@ -723,7 +723,7 @@ async fn add_remove_one_vector_4588() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, name: "document-added");
let documents = json!([
@@ -731,7 +731,7 @@ async fn add_remove_one_vector_4588() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, name: "document-deleted");
let (documents, _code) = index

View File

@@ -117,7 +117,7 @@ async fn test_both_apis() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",

View File

@@ -370,7 +370,7 @@ async fn it_works() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -601,7 +601,7 @@ async fn tokenize_long_text() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -657,7 +657,7 @@ async fn bad_api_key() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
@@ -805,7 +805,7 @@ async fn bad_model() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
@@ -883,7 +883,7 @@ async fn bad_dimensions() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
@@ -992,7 +992,7 @@ async fn smaller_dimensions() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -1224,7 +1224,7 @@ async fn small_embedding_model() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -1455,7 +1455,7 @@ async fn legacy_embedding_model() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -1687,7 +1687,7 @@ async fn it_still_works() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -1916,7 +1916,7 @@ async fn timeout() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",

View File

@@ -1099,7 +1099,7 @@ async fn add_vector_and_user_provided() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -1616,7 +1616,7 @@ async fn server_returns_multiple() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -1722,7 +1722,7 @@ async fn server_single_input_returns_in_array() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
@@ -1828,7 +1828,7 @@ async fn server_raw() {
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
let task = server.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",

View File

@@ -243,7 +243,7 @@ async fn reset_embedder_documents() {
}
"###);
// Make sure the arroy DB has been cleared
// Make sure the hannoy DB has been cleared
let (documents, _code) =
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await;
snapshot!(json_string!(documents), @r###"

View File

@@ -141,8 +141,8 @@ enum Command {
#[derive(Clone, ValueEnum)]
enum IndexPart {
/// Will make the arroy index hot.
Arroy,
/// Will make the hannoy index hot.
Hannoy,
}
fn main() -> anyhow::Result<()> {
@@ -648,12 +648,12 @@ fn hair_dryer(
let rtxn = index.read_txn()?;
for part in index_parts {
match part {
IndexPart::Arroy => {
IndexPart::Hannoy => {
let mut count = 0;
let total = index.vector_arroy.len(&rtxn)?;
eprintln!("Hair drying arroy for {uid}...");
let total = index.vector_hannoy.len(&rtxn)?;
eprintln!("Hair drying hannoy for {uid}...");
for (i, result) in index
.vector_arroy
.vector_hannoy
.remap_types::<Bytes, Bytes>()
.iter(&rtxn)?
.enumerate()

View File

@@ -68,7 +68,7 @@ pub fn v1_10_to_v1_11(
)
})?;
let index_read_database =
try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY)
try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_HANNOY)
.with_context(|| format!("while updating date format for index `{uid}`"))?;
let mut index_wtxn = index_env.write_txn().with_context(|| {
@@ -79,15 +79,16 @@ pub fn v1_10_to_v1_11(
})?;
let index_write_database =
try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY)
try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_HANNOY)
.with_context(|| format!("while updating date format for index `{uid}`"))?;
meilisearch_types::milli::arroy::upgrade::cosine_from_0_4_to_0_5(
&index_rtxn,
index_read_database.remap_types(),
&mut index_wtxn,
index_write_database.remap_types(),
)?;
// meilisearch_types::milli::hannoy::upgrade::cosine_from_0_4_to_0_5(
// &index_rtxn,
// index_read_database.remap_types(),
// &mut index_wtxn,
// index_write_database.remap_types(),
// )?;
unimplemented!("Hannoy doesn't support upgrading");
index_wtxn.commit()?;
}

View File

@@ -87,7 +87,7 @@ rhai = { version = "1.22.2", features = [
"no_time",
"sync",
] }
arroy = "0.6.1"
hannoy = { git = "https://github.com/kerollmops/hannoy", branch = "expose-private-types" }
rand = "0.8.5"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }

View File

@@ -76,7 +76,7 @@ pub enum InternalError {
#[error("Cannot upgrade to the following version: v{0}.{1}.{2}.")]
CannotUpgradeToVersion(u32, u32, u32),
#[error(transparent)]
ArroyError(#[from] arroy::Error),
HannoyError(#[from] hannoy::Error),
#[error(transparent)]
VectorEmbeddingError(#[from] crate::vector::Error),
}
@@ -405,23 +405,24 @@ impl From<crate::vector::Error> for Error {
}
}
impl From<arroy::Error> for Error {
fn from(value: arroy::Error) -> Self {
impl From<hannoy::Error> for Error {
fn from(value: hannoy::Error) -> Self {
match value {
arroy::Error::Heed(heed) => heed.into(),
arroy::Error::Io(io) => io.into(),
arroy::Error::InvalidVecDimension { expected, received } => {
hannoy::Error::Heed(heed) => heed.into(),
hannoy::Error::Io(io) => io.into(),
hannoy::Error::InvalidVecDimension { expected, received } => {
Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
}
arroy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation),
arroy::Error::DatabaseFull
| arroy::Error::InvalidItemAppend
| arroy::Error::UnmatchingDistance { .. }
| arroy::Error::NeedBuild(_)
| arroy::Error::MissingKey { .. }
| arroy::Error::MissingMetadata(_)
| arroy::Error::CannotDecodeKeyMode { .. } => {
Error::InternalError(InternalError::ArroyError(value))
hannoy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation),
hannoy::Error::DatabaseFull
| hannoy::Error::InvalidItemAppend
| hannoy::Error::UnmatchingDistance { .. }
| hannoy::Error::NeedBuild(_)
| hannoy::Error::MissingKey { .. }
| hannoy::Error::MissingMetadata(_)
| hannoy::Error::UnknownVersion { .. }
| hannoy::Error::CannotDecodeKeyMode { .. } => {
Error::InternalError(InternalError::HannoyError(value))
}
}
}

View File

@@ -31,7 +31,7 @@ use crate::prompt::PromptData;
use crate::proximity::ProximityPrecision;
use crate::update::new::StdResult;
use crate::vector::db::IndexEmbeddingConfigs;
use crate::vector::{ArroyStats, ArroyWrapper, Embedding};
use crate::vector::{Embedding, HannoyStats, HannoyWrapper};
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@@ -113,7 +113,7 @@ pub mod db_name {
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
pub const VECTOR_ARROY: &str = "vector-arroy";
pub const VECTOR_HANNOY: &str = "vector-hannoy";
pub const DOCUMENTS: &str = "documents";
}
const NUMBER_OF_DBS: u32 = 25;
@@ -177,10 +177,10 @@ pub struct Index {
/// Maps the document id, the facet field id and the strings.
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
/// Maps an embedder name to its id in the arroy store.
/// Maps an embedder name to its id in the hannoy store.
pub(crate) embedder_category_id: Database<Unspecified, Unspecified>,
/// Vector store based on arroy™.
pub vector_arroy: arroy::Database<Unspecified>,
/// Vector store based on hannoy™.
pub vector_hannoy: hannoy::Database<Unspecified>,
/// Maps the document id to the document as an obkv store.
pub(crate) documents: Database<BEU32, ObkvCodec>,
@@ -237,7 +237,7 @@ impl Index {
// vector stuff
let embedder_category_id =
env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
let vector_arroy = env.create_database(&mut wtxn, Some(VECTOR_ARROY))?;
let vector_hannoy = env.create_database(&mut wtxn, Some(VECTOR_HANNOY))?;
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
@@ -264,7 +264,7 @@ impl Index {
facet_id_is_empty_docids,
field_id_docid_facet_f64s,
field_id_docid_facet_strings,
vector_arroy,
vector_hannoy,
embedder_category_id,
documents,
};
@@ -1771,8 +1771,8 @@ impl Index {
let embedders = self.embedding_configs();
for config in embedders.embedding_configs(rtxn)? {
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
let reader = ArroyWrapper::new(
self.vector_arroy,
let reader = HannoyWrapper::new(
self.vector_hannoy,
embedder_info.embedder_id,
config.config.quantized(),
);
@@ -1790,13 +1790,13 @@ impl Index {
Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
}
pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> {
let mut stats = ArroyStats::default();
pub fn hannoy_stats(&self, rtxn: &RoTxn<'_>) -> Result<HannoyStats> {
let mut stats = HannoyStats::default();
let embedding_configs = self.embedding_configs();
for config in embedding_configs.embedding_configs(rtxn)? {
let embedder_id = embedding_configs.embedder_id(rtxn, &config.name)?.unwrap();
let reader =
ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
HannoyWrapper::new(self.vector_hannoy, embedder_id, config.config.quantized());
reader.aggregate_stats(rtxn, &mut stats)?;
}
Ok(stats)
@@ -1840,7 +1840,7 @@ impl Index {
facet_id_is_empty_docids,
field_id_docid_facet_f64s,
field_id_docid_facet_strings,
vector_arroy,
vector_hannoy,
embedder_category_id,
documents,
} = self;
@@ -1911,7 +1911,7 @@ impl Index {
"field_id_docid_facet_strings",
field_id_docid_facet_strings.stat(rtxn).map(compute_size)?,
);
sizes.insert("vector_arroy", vector_arroy.stat(rtxn).map(compute_size)?);
sizes.insert("vector_hannoy", vector_hannoy.stat(rtxn).map(compute_size)?);
sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);

View File

@@ -52,7 +52,7 @@ pub use search::new::{
};
use serde_json::Value;
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
pub use {arroy, charabia as tokenizer, heed, rhai};
pub use {charabia as tokenizer, hannoy, heed, rhai};
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
pub use self::attribute_patterns::{AttributePatterns, PatternMatch};

View File

@@ -98,12 +98,12 @@ impl Progress {
}
// TODO: ideally we should expose the progress in a way that let arroy use it directly
pub(crate) fn update_progress_from_arroy(&self, progress: arroy::WriterProgress) {
self.update_progress(progress.main);
if let Some(sub) = progress.sub {
self.update_progress(sub);
}
}
// pub(crate) fn update_progress_from_hannoy(&self, progress: hannoy::WriterProgress) {
// self.update_progress(progress.main);
// if let Some(sub) = progress.sub {
// self.update_progress(sub);
// }
// }
}
/// Generate the names associated with the durations and push them.
@@ -277,43 +277,43 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
}
}
impl Step for arroy::MainStep {
fn name(&self) -> Cow<'static, str> {
match self {
arroy::MainStep::PreProcessingTheItems => "pre processing the items",
arroy::MainStep::WritingTheDescendantsAndMetadata => {
"writing the descendants and metadata"
}
arroy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items",
arroy::MainStep::RetrievingTheTreeAndItemNodes => "retrieving the tree and item nodes",
arroy::MainStep::UpdatingTheTrees => "updating the trees",
arroy::MainStep::CreateNewTrees => "create new trees",
arroy::MainStep::WritingNodesToDatabase => "writing nodes to database",
arroy::MainStep::DeleteExtraneousTrees => "delete extraneous trees",
arroy::MainStep::WriteTheMetadata => "write the metadata",
}
.into()
}
// impl Step for hannoy::MainStep {
// fn name(&self) -> Cow<'static, str> {
// match self {
// hannoy::MainStep::PreProcessingTheItems => "pre processing the items",
// hannoy::MainStep::WritingTheDescendantsAndMetadata => {
// "writing the descendants and metadata"
// }
// hannoy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items",
// hannoy::MainStep::RetrievingTheTreeAndItemNodes => "retrieving the tree and item nodes",
// hannoy::MainStep::UpdatingTheTrees => "updating the trees",
// hannoy::MainStep::CreateNewTrees => "create new trees",
// hannoy::MainStep::WritingNodesToDatabase => "writing nodes to database",
// hannoy::MainStep::DeleteExtraneousTrees => "delete extraneous trees",
// hannoy::MainStep::WriteTheMetadata => "write the metadata",
// }
// .into()
// }
fn current(&self) -> u32 {
*self as u32
}
// fn current(&self) -> u32 {
// *self as u32
// }
fn total(&self) -> u32 {
Self::CARDINALITY as u32
}
}
// fn total(&self) -> u32 {
// Self::CARDINALITY as u32
// }
// }
impl Step for arroy::SubStep {
fn name(&self) -> Cow<'static, str> {
self.unit.into()
}
// impl Step for hannoy::SubStep {
// fn name(&self) -> Cow<'static, str> {
// self.unit.into()
// }
fn current(&self) -> u32 {
self.current.load(Ordering::Relaxed)
}
// fn current(&self) -> u32 {
// self.current.load(Ordering::Relaxed)
// }
fn total(&self) -> u32 {
self.max
}
}
// fn total(&self) -> u32 {
// self.max
// }
// }

View File

@@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use super::VectorStoreStats;
use crate::score_details::{self, ScoreDetails};
use crate::vector::{ArroyWrapper, DistributionShift, Embedder};
use crate::vector::{DistributionShift, Embedder, HannoyWrapper};
use crate::{DocumentId, Result, SearchContext, SearchLogger};
pub struct VectorSort<Q: RankingRuleQueryTrait> {
@@ -56,7 +56,8 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
let target = &self.target;
let before = Instant::now();
let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized);
let reader =
HannoyWrapper::new(ctx.index.vector_hannoy, self.embedder_index, self.quantized);
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
self.cached_sorted_docids = results.into_iter();
*ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {

View File

@@ -3,7 +3,7 @@ use std::sync::Arc;
use roaring::RoaringBitmap;
use crate::score_details::{self, ScoreDetails};
use crate::vector::{ArroyWrapper, Embedder};
use crate::vector::{Embedder, HannoyWrapper};
use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};
pub struct Similar<'a> {
@@ -72,7 +72,7 @@ impl<'a> Similar<'a> {
crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned())
})?;
let reader = ArroyWrapper::new(self.index.vector_arroy, embedder_index, self.quantized);
let reader = HannoyWrapper::new(self.index.vector_hannoy, embedder_index, self.quantized);
let results = reader.nns_by_item(
self.rtxn,
self.id,

View File

@@ -45,7 +45,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
facet_id_is_empty_docids,
field_id_docid_facet_f64s,
field_id_docid_facet_strings,
vector_arroy,
vector_hannoy,
embedder_category_id: _,
documents,
} = self.index;
@@ -88,7 +88,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
field_id_docid_facet_f64s.clear(self.wtxn)?;
field_id_docid_facet_strings.clear(self.wtxn)?;
// vector
vector_arroy.clear(self.wtxn)?;
vector_hannoy.clear(self.wtxn)?;
documents.clear(self.wtxn)?;

View File

@@ -39,7 +39,7 @@ use crate::update::{
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
};
use crate::vector::db::EmbedderInfo;
use crate::vector::{ArroyWrapper, RuntimeEmbedders};
use crate::vector::{HannoyWrapper, RuntimeEmbedders};
use crate::{CboRoaringBitmapCodec, Index, Result, UserError};
static MERGED_DATABASE_COUNT: usize = 7;
@@ -494,7 +494,7 @@ where
},
)?;
let reader =
ArroyWrapper::new(self.index.vector_arroy, index, action.was_quantized);
HannoyWrapper::new(self.index.vector_hannoy, index, action.was_quantized);
let Some(dim) = reader.dimensions(self.wtxn)? else {
continue;
};
@@ -504,7 +504,7 @@ where
for (embedder_name, dimension) in dimension {
let wtxn = &mut *self.wtxn;
let vector_arroy = self.index.vector_arroy;
let vector_hannoy = self.index.vector_hannoy;
let cancel = &self.should_abort;
let embedder_index =
@@ -523,7 +523,7 @@ where
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
pool.install(|| {
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
let mut writer = HannoyWrapper::new(vector_hannoy, embedder_index, was_quantized);
writer.build_and_quantize(
wtxn,
// In the settings we don't have any progress to share

View File

@@ -32,7 +32,7 @@ use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::update::{AvailableIds, UpdateIndexingStep};
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use crate::vector::settings::{RemoveFragments, WriteBackToDocuments};
use crate::vector::ArroyWrapper;
use crate::vector::HannoyWrapper;
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result};
pub struct TransformOutput {
@@ -834,15 +834,15 @@ impl<'a, 'i> Transform<'a, 'i> {
None
};
let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff
let readers: BTreeMap<&str, (HannoyWrapper, &RoaringBitmap)> = settings_diff
.embedding_config_updates
.iter()
.filter_map(|(name, action)| {
if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
action.write_back()
{
let reader = ArroyWrapper::new(
self.index.vector_arroy,
let reader = HannoyWrapper::new(
self.index.vector_hannoy,
*embedder_id,
action.was_quantized,
);
@@ -884,7 +884,7 @@ impl<'a, 'i> Transform<'a, 'i> {
let injected_vectors: std::result::Result<
serde_json::Map<String, serde_json::Value>,
arroy::Error,
hannoy::Error,
> = readers
.iter()
.filter_map(|(name, (reader, user_provided))| {
@@ -949,9 +949,9 @@ impl<'a, 'i> Transform<'a, 'i> {
else {
continue;
};
let arroy =
ArroyWrapper::new(self.index.vector_arroy, infos.embedder_id, was_quantized);
let Some(dimensions) = arroy.dimensions(wtxn)? else {
let hannoy =
HannoyWrapper::new(self.index.vector_hannoy, infos.embedder_id, was_quantized);
let Some(dimensions) = hannoy.dimensions(wtxn)? else {
continue;
};
for fragment_id in fragment_ids {
@@ -959,17 +959,17 @@ impl<'a, 'i> Transform<'a, 'i> {
if infos.embedding_status.user_provided_docids().is_empty() {
// no user provided: clear store
arroy.clear_store(wtxn, *fragment_id, dimensions)?;
hannoy.clear_store(wtxn, *fragment_id, dimensions)?;
continue;
}
// some user provided, remove only the ids that are not user provided
let to_delete = arroy.items_in_store(wtxn, *fragment_id, |items| {
let to_delete = hannoy.items_in_store(wtxn, *fragment_id, |items| {
items - infos.embedding_status.user_provided_docids()
})?;
for to_delete in to_delete {
arroy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
hannoy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
}
}
}

View File

@@ -27,7 +27,7 @@ use crate::update::index_documents::helpers::{
};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
use crate::vector::ArroyWrapper;
use crate::vector::HannoyWrapper;
use crate::{
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
Result, SerializationError, U8StrStrCodec,
@@ -677,7 +677,8 @@ pub(crate) fn write_typed_chunk_into_index(
.get(&embedder_name)
.is_some_and(|conf| conf.is_quantized);
// FIXME: allow customizing distance
let writer = ArroyWrapper::new(index.vector_arroy, infos.embedder_id, binary_quantized);
let writer =
HannoyWrapper::new(index.vector_hannoy, infos.embedder_id, binary_quantized);
// remove vectors for docids we want them removed
let merger = remove_vectors_builder.build();

View File

@@ -255,9 +255,9 @@ impl<'a> From<FrameGrantR<'a>> for FrameWithHeader<'a> {
#[repr(u8)]
pub enum EntryHeader {
DbOperation(DbOperation),
ArroyDeleteVector(ArroyDeleteVector),
ArroySetVectors(ArroySetVectors),
ArroySetVector(ArroySetVector),
HannoyDeleteVector(HannoyDeleteVector),
HannoySetVectors(HannoySetVectors),
HannoySetVector(HannoySetVector),
}
impl EntryHeader {
@@ -268,9 +268,9 @@ impl EntryHeader {
const fn variant_id(&self) -> u8 {
match self {
EntryHeader::DbOperation(_) => 0,
EntryHeader::ArroyDeleteVector(_) => 1,
EntryHeader::ArroySetVectors(_) => 2,
EntryHeader::ArroySetVector(_) => 3,
EntryHeader::HannoyDeleteVector(_) => 1,
EntryHeader::HannoySetVectors(_) => 2,
EntryHeader::HannoySetVector(_) => 3,
}
}
@@ -286,26 +286,26 @@ impl EntryHeader {
}
const fn total_delete_vector_size() -> usize {
Self::variant_size() + mem::size_of::<ArroyDeleteVector>()
Self::variant_size() + mem::size_of::<HannoyDeleteVector>()
}
/// The `dimensions` corresponds to the number of `f32` in the embedding.
fn total_set_vectors_size(count: usize, dimensions: usize) -> usize {
let embedding_size = dimensions * mem::size_of::<f32>();
Self::variant_size() + mem::size_of::<ArroySetVectors>() + embedding_size * count
Self::variant_size() + mem::size_of::<HannoySetVectors>() + embedding_size * count
}
fn total_set_vector_size(dimensions: usize) -> usize {
let embedding_size = dimensions * mem::size_of::<f32>();
Self::variant_size() + mem::size_of::<ArroySetVector>() + embedding_size
Self::variant_size() + mem::size_of::<HannoySetVector>() + embedding_size
}
fn header_size(&self) -> usize {
let payload_size = match self {
EntryHeader::DbOperation(op) => mem::size_of_val(op),
EntryHeader::ArroyDeleteVector(adv) => mem::size_of_val(adv),
EntryHeader::ArroySetVectors(asvs) => mem::size_of_val(asvs),
EntryHeader::ArroySetVector(asv) => mem::size_of_val(asv),
EntryHeader::HannoyDeleteVector(adv) => mem::size_of_val(adv),
EntryHeader::HannoySetVectors(asvs) => mem::size_of_val(asvs),
EntryHeader::HannoySetVector(asv) => mem::size_of_val(asv),
};
Self::variant_size() + payload_size
}
@@ -319,19 +319,19 @@ impl EntryHeader {
EntryHeader::DbOperation(header)
}
1 => {
let header_bytes = &remaining[..mem::size_of::<ArroyDeleteVector>()];
let header_bytes = &remaining[..mem::size_of::<HannoyDeleteVector>()];
let header = checked::pod_read_unaligned(header_bytes);
EntryHeader::ArroyDeleteVector(header)
EntryHeader::HannoyDeleteVector(header)
}
2 => {
let header_bytes = &remaining[..mem::size_of::<ArroySetVectors>()];
let header_bytes = &remaining[..mem::size_of::<HannoySetVectors>()];
let header = checked::pod_read_unaligned(header_bytes);
EntryHeader::ArroySetVectors(header)
EntryHeader::HannoySetVectors(header)
}
3 => {
let header_bytes = &remaining[..mem::size_of::<ArroySetVector>()];
let header_bytes = &remaining[..mem::size_of::<HannoySetVector>()];
let header = checked::pod_read_unaligned(header_bytes);
EntryHeader::ArroySetVector(header)
EntryHeader::HannoySetVector(header)
}
id => panic!("invalid variant id: {id}"),
}
@@ -341,9 +341,9 @@ impl EntryHeader {
let (first, remaining) = header_bytes.split_first_mut().unwrap();
let payload_bytes = match self {
EntryHeader::DbOperation(op) => bytemuck::bytes_of(op),
EntryHeader::ArroyDeleteVector(adv) => bytemuck::bytes_of(adv),
EntryHeader::ArroySetVectors(asvs) => bytemuck::bytes_of(asvs),
EntryHeader::ArroySetVector(asv) => bytemuck::bytes_of(asv),
EntryHeader::HannoyDeleteVector(adv) => bytemuck::bytes_of(adv),
EntryHeader::HannoySetVectors(asvs) => bytemuck::bytes_of(asvs),
EntryHeader::HannoySetVector(asv) => bytemuck::bytes_of(asv),
};
*first = self.variant_id();
remaining.copy_from_slice(payload_bytes);
@@ -378,7 +378,7 @@ impl DbOperation {
#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
#[repr(transparent)]
pub struct ArroyDeleteVector {
pub struct HannoyDeleteVector {
pub docid: DocumentId,
}
@@ -386,13 +386,13 @@ pub struct ArroyDeleteVector {
#[repr(C)]
/// The embeddings are in the remaining space and represents
/// non-aligned [f32] each with dimensions f32s.
pub struct ArroySetVectors {
pub struct HannoySetVectors {
pub docid: DocumentId,
pub embedder_id: u8,
_padding: [u8; 3],
}
impl ArroySetVectors {
impl HannoySetVectors {
fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] {
let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
&frame[skip..]
@@ -416,14 +416,14 @@ impl ArroySetVectors {
#[repr(C)]
/// The embeddings are in the remaining space and represents
/// non-aligned [f32] each with dimensions f32s.
pub struct ArroySetVector {
pub struct HannoySetVector {
pub docid: DocumentId,
pub embedder_id: u8,
pub extractor_id: u8,
_padding: [u8; 2],
}
impl ArroySetVector {
impl HannoySetVector {
fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] {
let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
&frame[skip..]
@@ -553,7 +553,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
let refcell = self.producers.get().unwrap();
let mut producer = refcell.0.borrow_mut_or_yield();
let payload_header = EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid });
let payload_header = EntryHeader::HannoyDeleteVector(HannoyDeleteVector { docid });
let total_length = EntryHeader::total_delete_vector_size();
if total_length > max_grant {
panic!("The entry is larger ({total_length} bytes) than the BBQueue max grant ({max_grant} bytes)");
@@ -589,8 +589,8 @@ impl<'b> ExtractorBbqueueSender<'b> {
// to zero to allocate no extra space at all
let dimensions = embeddings.first().map_or(0, |emb| emb.len());
let arroy_set_vector = ArroySetVectors { docid, embedder_id, _padding: [0; 3] };
let payload_header = EntryHeader::ArroySetVectors(arroy_set_vector);
let hannoy_set_vector = HannoySetVectors { docid, embedder_id, _padding: [0; 3] };
let payload_header = EntryHeader::HannoySetVectors(hannoy_set_vector);
let total_length = EntryHeader::total_set_vectors_size(embeddings.len(), dimensions);
if total_length > max_grant {
let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
@@ -650,9 +650,9 @@ impl<'b> ExtractorBbqueueSender<'b> {
// to zero to allocate no extra space at all
let dimensions = embedding.as_ref().map_or(0, |emb| emb.len());
let arroy_set_vector =
ArroySetVector { docid, embedder_id, extractor_id, _padding: [0; 2] };
let payload_header = EntryHeader::ArroySetVector(arroy_set_vector);
let hannoy_set_vector =
HannoySetVector { docid, embedder_id, extractor_id, _padding: [0; 2] };
let payload_header = EntryHeader::HannoySetVector(hannoy_set_vector);
let total_length = EntryHeader::total_set_vector_size(dimensions);
if total_length > max_grant {
let mut value_file = tempfile::tempfile().map(BufWriter::new)?;

View File

@@ -24,7 +24,7 @@ use crate::progress::{EmbedderStats, Progress};
use crate::update::settings::SettingsDelta;
use crate::update::GrenadParameters;
use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments};
use crate::vector::{ArroyWrapper, Embedder, RuntimeEmbedders};
use crate::vector::{Embedder, HannoyWrapper, RuntimeEmbedders};
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort};
pub(crate) mod de;
@@ -66,7 +66,7 @@ where
let mut bbbuffers = Vec::new();
let finished_extraction = AtomicBool::new(false);
let arroy_memory = grenad_parameters.max_memory;
let hannoy_memory = grenad_parameters.max_memory;
let (grenad_parameters, total_bbbuffer_capacity) =
indexer_memory_settings(pool.current_num_threads(), grenad_parameters);
@@ -129,8 +129,8 @@ where
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
let vector_arroy = index.vector_arroy;
let arroy_writers: Result<HashMap<_, _>> = embedders
let vector_arroy = index.vector_hannoy;
let hannoy_writers: Result<HashMap<_, _>> = embedders
.inner_as_ref()
.iter()
.map(|(embedder_name, runtime)| {
@@ -143,7 +143,7 @@ where
})?;
let dimensions = runtime.embedder.dimensions();
let writer = ArroyWrapper::new(vector_arroy, embedder_index, runtime.is_quantized);
let writer = HannoyWrapper::new(vector_arroy, embedder_index, runtime.is_quantized);
Ok((
embedder_index,
@@ -152,10 +152,10 @@ where
})
.collect();
let mut arroy_writers = arroy_writers?;
let mut hannoy_writers = hannoy_writers?;
let congestion =
write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?;
write_to_db(writer_receiver, finished_extraction, index, wtxn, &hannoy_writers)?;
indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
@@ -169,8 +169,8 @@ where
wtxn,
indexing_context.progress,
index_embeddings,
arroy_memory,
&mut arroy_writers,
hannoy_memory,
&mut hannoy_writers,
None,
&indexing_context.must_stop_processing,
)
@@ -226,7 +226,7 @@ where
let mut bbbuffers = Vec::new();
let finished_extraction = AtomicBool::new(false);
let arroy_memory = grenad_parameters.max_memory;
let hannoy_memory = grenad_parameters.max_memory;
let (grenad_parameters, total_bbbuffer_capacity) =
indexer_memory_settings(pool.current_num_threads(), grenad_parameters);
@@ -283,7 +283,7 @@ where
let new_embedders = settings_delta.new_embedders();
let embedder_actions = settings_delta.embedder_actions();
let index_embedder_category_ids = settings_delta.new_embedder_category_id();
let mut arroy_writers = arroy_writers_from_embedder_actions(
let mut hannoy_writers = hannoy_writers_from_embedder_actions(
index,
embedder_actions,
new_embedders,
@@ -291,7 +291,7 @@ where
)?;
let congestion =
write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?;
write_to_db(writer_receiver, finished_extraction, index, wtxn, &hannoy_writers)?;
indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
@@ -305,8 +305,8 @@ where
wtxn,
indexing_context.progress,
index_embeddings,
arroy_memory,
&mut arroy_writers,
hannoy_memory,
&mut hannoy_writers,
Some(embedder_actions),
&indexing_context.must_stop_processing,
)
@@ -336,13 +336,13 @@ where
Ok(congestion)
}
fn arroy_writers_from_embedder_actions<'indexer>(
fn hannoy_writers_from_embedder_actions<'indexer>(
index: &Index,
embedder_actions: &'indexer BTreeMap<String, EmbedderAction>,
embedders: &'indexer RuntimeEmbedders,
index_embedder_category_ids: &'indexer std::collections::HashMap<String, u8>,
) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, ArroyWrapper, usize)>> {
let vector_arroy = index.vector_arroy;
) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, HannoyWrapper, usize)>> {
let vector_arroy = index.vector_hannoy;
embedders
.inner_as_ref()
@@ -361,7 +361,7 @@ fn arroy_writers_from_embedder_actions<'indexer>(
)));
};
let writer =
ArroyWrapper::new(vector_arroy, embedder_category_id, action.was_quantized);
HannoyWrapper::new(vector_arroy, embedder_category_id, action.was_quantized);
let dimensions = runtime.embedder.dimensions();
Some(Ok((
embedder_category_id,
@@ -384,7 +384,7 @@ where
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
continue;
};
let reader = ArroyWrapper::new(index.vector_arroy, *embedder_id, action.was_quantized);
let reader = HannoyWrapper::new(index.vector_hannoy, *embedder_id, action.was_quantized);
let Some(dimensions) = reader.dimensions(wtxn)? else {
continue;
};
@@ -400,7 +400,7 @@ where
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
continue;
};
let arroy = ArroyWrapper::new(index.vector_arroy, infos.embedder_id, was_quantized);
let arroy = HannoyWrapper::new(index.vector_hannoy, infos.embedder_id, was_quantized);
let Some(dimensions) = arroy.dimensions(wtxn)? else {
continue;
};

View File

@@ -15,7 +15,7 @@ use crate::progress::Progress;
use crate::update::settings::InnerIndexSettings;
use crate::vector::db::IndexEmbeddingConfig;
use crate::vector::settings::EmbedderAction;
use crate::vector::{ArroyWrapper, Embedder, Embeddings, RuntimeEmbedders};
use crate::vector::{Embedder, Embeddings, HannoyWrapper, RuntimeEmbedders};
use crate::{Error, Index, InternalError, Result, UserError};
pub fn write_to_db(
@@ -23,9 +23,9 @@ pub fn write_to_db(
finished_extraction: &AtomicBool,
index: &Index,
wtxn: &mut RwTxn<'_>,
arroy_writers: &HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
hannoy_writers: &HashMap<u8, (&str, &Embedder, HannoyWrapper, usize)>,
) -> Result<ChannelCongestion> {
// Used by by the ArroySetVector to copy the embedding into an
// Used by by the HannoySetVector to copy the embedding into an
// aligned memory area, required by arroy to accept a new vector.
let mut aligned_embedding = Vec::new();
let span = tracing::trace_span!(target: "indexing::write_db", "all");
@@ -56,7 +56,7 @@ pub fn write_to_db(
ReceiverAction::LargeVectors(large_vectors) => {
let LargeVectors { docid, embedder_id, .. } = large_vectors;
let (_, _, writer, dimensions) =
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
hannoy_writers.get(&embedder_id).expect("requested a missing embedder");
let mut embeddings = Embeddings::new(*dimensions);
for embedding in large_vectors.read_embeddings(*dimensions) {
embeddings.push(embedding.to_vec()).unwrap();
@@ -68,7 +68,7 @@ pub fn write_to_db(
large_vector @ LargeVector { docid, embedder_id, extractor_id, .. },
) => {
let (_, _, writer, dimensions) =
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
hannoy_writers.get(&embedder_id).expect("requested a missing embedder");
let embedding = large_vector.read_embedding(*dimensions);
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
}
@@ -80,12 +80,12 @@ pub fn write_to_db(
&mut writer_receiver,
index,
wtxn,
arroy_writers,
hannoy_writers,
&mut aligned_embedding,
)?;
}
write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?;
write_from_bbqueue(&mut writer_receiver, index, wtxn, hannoy_writers, &mut aligned_embedding)?;
Ok(ChannelCongestion {
attempts: writer_receiver.sent_messages_attempts(),
@@ -115,8 +115,8 @@ pub fn build_vectors<MSP>(
wtxn: &mut RwTxn<'_>,
progress: &Progress,
index_embeddings: Vec<IndexEmbeddingConfig>,
arroy_memory: Option<usize>,
arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
hannoy_memory: Option<usize>,
hannoy_writers: &mut HashMap<u8, (&str, &Embedder, HannoyWrapper, usize)>,
embeder_actions: Option<&BTreeMap<String, EmbedderAction>>,
must_stop_processing: &MSP,
) -> Result<()>
@@ -129,7 +129,7 @@ where
let seed = rand::random();
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
for (_index, (embedder_name, _embedder, writer, dimensions)) in arroy_writers {
for (_index, (embedder_name, _embedder, writer, dimensions)) in hannoy_writers {
let dimensions = *dimensions;
let is_being_quantized = embeder_actions
.and_then(|actions| actions.get(*embedder_name).map(|action| action.is_being_quantized))
@@ -140,7 +140,7 @@ where
&mut rng,
dimensions,
is_being_quantized,
arroy_memory,
hannoy_memory,
must_stop_processing,
)?;
}
@@ -181,7 +181,7 @@ pub fn write_from_bbqueue(
writer_receiver: &mut WriterBbqueueReceiver<'_>,
index: &Index,
wtxn: &mut RwTxn<'_>,
arroy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, ArroyWrapper, usize)>,
hannoy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, HannoyWrapper, usize)>,
aligned_embedding: &mut Vec<f32>,
) -> crate::Result<()> {
while let Some(frame_with_header) = writer_receiver.recv_frame() {
@@ -221,17 +221,17 @@ pub fn write_from_bbqueue(
},
}
}
EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => {
for (_index, (_name, _embedder, writer, dimensions)) in arroy_writers {
EntryHeader::HannoyDeleteVector(HannoyDeleteVector { docid }) => {
for (_index, (_name, _embedder, writer, dimensions)) in hannoy_writers {
let dimensions = *dimensions;
writer.del_items(wtxn, dimensions, docid)?;
}
}
EntryHeader::ArroySetVectors(asvs) => {
let ArroySetVectors { docid, embedder_id, .. } = asvs;
EntryHeader::HannoySetVectors(asvs) => {
let HannoySetVectors { docid, embedder_id, .. } = asvs;
let frame = frame_with_header.frame();
let (_, _, writer, dimensions) =
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
hannoy_writers.get(&embedder_id).expect("requested a missing embedder");
let mut embeddings = Embeddings::new(*dimensions);
let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding);
writer.del_items(wtxn, *dimensions, docid)?;
@@ -245,12 +245,12 @@ pub fn write_from_bbqueue(
writer.add_items(wtxn, docid, &embeddings)?;
}
}
EntryHeader::ArroySetVector(
asv @ ArroySetVector { docid, embedder_id, extractor_id, .. },
EntryHeader::HannoySetVector(
asv @ HannoySetVector { docid, embedder_id, extractor_id, .. },
) => {
let frame = frame_with_header.frame();
let (_, _, writer, dimensions) =
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
hannoy_writers.get(&embedder_id).expect("requested a missing embedder");
let embedding = asv.read_all_embeddings_into_vec(frame, aligned_embedding);
if embedding.is_empty() {

View File

@@ -14,7 +14,7 @@ use crate::constants::RESERVED_VECTORS_FIELD_NAME;
use crate::documents::FieldIdMapper;
use crate::vector::db::{EmbeddingStatus, IndexEmbeddingConfig};
use crate::vector::parsed_vectors::{RawVectors, RawVectorsError, VectorOrArrayOfVectors};
use crate::vector::{ArroyWrapper, Embedding, RuntimeEmbedders};
use crate::vector::{Embedding, HannoyWrapper, RuntimeEmbedders};
use crate::{DocumentId, Index, InternalError, Result, UserError};
#[derive(Serialize)]
@@ -121,7 +121,7 @@ impl<'t> VectorDocumentFromDb<'t> {
status: &EmbeddingStatus,
) -> Result<VectorEntry<'t>> {
let reader =
ArroyWrapper::new(self.index.vector_arroy, embedder_id, config.config.quantized());
HannoyWrapper::new(self.index.vector_hannoy, embedder_id, config.config.quantized());
let vectors = reader.item_vectors(self.rtxn, self.docid)?;
Ok(VectorEntry {
@@ -149,7 +149,7 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> {
name,
entry_from_raw_value(value, false).map_err(|_| {
InternalError::Serialization(crate::SerializationError::Decoding {
db_name: Some(crate::index::db_name::VECTOR_ARROY),
db_name: Some(crate::index::db_name::VECTOR_HANNOY),
})
})?,
))
@@ -167,7 +167,7 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> {
Some(embedding_from_doc) => {
Some(entry_from_raw_value(embedding_from_doc, false).map_err(|_| {
InternalError::Serialization(crate::SerializationError::Decoding {
db_name: Some(crate::index::db_name::VECTOR_ARROY),
db_name: Some(crate::index::db_name::VECTOR_HANNOY),
})
})?)
}

View File

@@ -1,4 +1,4 @@
use arroy::distances::Cosine;
use hannoy::distances::Cosine;
use heed::RwTxn;
use super::UpgradeIndex;
@@ -25,12 +25,13 @@ impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
progress.update_progress(VectorStore::UpdateInternalVersions);
let rtxn = index.read_txn()?;
arroy::upgrade::from_0_5_to_0_6::<Cosine>(
&rtxn,
index.vector_arroy.remap_data_type(),
wtxn,
index.vector_arroy.remap_data_type(),
)?;
// hannoy::upgrade::from_0_5_to_0_6::<Cosine>(
// &rtxn,
// index.vector_hannoy.remap_data_type(),
// wtxn,
// index.vector_hannoy.remap_data_type(),
// )?;
unimplemented!("upgrade hannoy");
Ok(false)
}

View File

@@ -1,6 +1,6 @@
use std::time::Instant;
use arroy::Distance;
use hannoy::Distance;
use super::error::CompositeEmbedderContainsHuggingFace;
use super::{
@@ -307,19 +307,18 @@ fn check_similarity(
}
for (left, right) in left.into_iter().zip(right) {
let left = arroy::internals::UnalignedVector::from_slice(&left);
let right = arroy::internals::UnalignedVector::from_slice(&right);
let left = arroy::internals::Leaf {
header: arroy::distances::Cosine::new_header(&left),
let left = hannoy::internals::UnalignedVector::from_slice(&left);
let right = hannoy::internals::UnalignedVector::from_slice(&right);
let left = hannoy::internals::Item {
header: hannoy::distances::Cosine::new_header(&left),
vector: left,
};
let right = arroy::internals::Leaf {
header: arroy::distances::Cosine::new_header(&right),
let right = hannoy::internals::Item {
header: hannoy::distances::Cosine::new_header(&right),
vector: right,
};
let distance = arroy::distances::Cosine::built_distance(&left, &right);
let distance = hannoy::distances::Cosine::distance(&left, &right);
if distance > super::MAX_COMPOSITE_DISTANCE {
return Err(NewEmbedderError::composite_embedding_value_mismatch(distance, hint));
}

View File

@@ -3,9 +3,9 @@ use std::num::NonZeroUsize;
use std::sync::{Arc, Mutex};
use std::time::Instant;
use arroy::distances::{BinaryQuantizedCosine, Cosine};
use arroy::ItemId;
use deserr::{DeserializeError, Deserr};
use hannoy::distances::{BinaryQuantizedCosine, Cosine};
use hannoy::ItemId;
use heed::{RoTxn, RwTxn, Unspecified};
use ordered_float::OrderedFloat;
use roaring::RoaringBitmap;
@@ -41,15 +41,15 @@ pub type Embedding = Vec<f32>;
pub const REQUEST_PARALLELISM: usize = 40;
pub const MAX_COMPOSITE_DISTANCE: f32 = 0.01;
pub struct ArroyWrapper {
pub struct HannoyWrapper {
quantized: bool,
embedder_index: u8,
database: arroy::Database<Unspecified>,
database: hannoy::Database<Unspecified>,
}
impl ArroyWrapper {
impl HannoyWrapper {
pub fn new(
database: arroy::Database<Unspecified>,
database: hannoy::Database<Unspecified>,
embedder_index: u8,
quantized: bool,
) -> Self {
@@ -60,19 +60,19 @@ impl ArroyWrapper {
self.embedder_index
}
fn readers<'a, D: arroy::Distance>(
fn readers<'a, D: hannoy::Distance>(
&'a self,
rtxn: &'a RoTxn<'a>,
db: arroy::Database<D>,
) -> impl Iterator<Item = Result<arroy::Reader<'a, D>, arroy::Error>> + 'a {
arroy_store_range_for_embedder(self.embedder_index).filter_map(move |index| {
match arroy::Reader::open(rtxn, index, db) {
db: hannoy::Database<D>,
) -> impl Iterator<Item = Result<hannoy::Reader<'a, D>, hannoy::Error>> + 'a {
hannoy_store_range_for_embedder(self.embedder_index).filter_map(move |index| {
match hannoy::Reader::open(rtxn, index, db) {
Ok(reader) => match reader.is_empty(rtxn) {
Ok(false) => Some(Ok(reader)),
Ok(true) => None,
Err(e) => Some(Err(e)),
},
Err(arroy::Error::MissingMetadata(_)) => None,
Err(hannoy::Error::MissingMetadata(_)) => None,
Err(e) => Some(Err(e)),
}
})
@@ -86,7 +86,7 @@ impl ArroyWrapper {
rtxn: &RoTxn,
store_id: u8,
with_items: F,
) -> Result<O, arroy::Error>
) -> Result<O, hannoy::Error>
where
F: FnOnce(&RoaringBitmap) -> O,
{
@@ -97,26 +97,26 @@ impl ArroyWrapper {
}
}
fn _items_in_store<D: arroy::Distance, F, O>(
fn _items_in_store<D: hannoy::Distance, F, O>(
&self,
rtxn: &RoTxn,
db: arroy::Database<D>,
db: hannoy::Database<D>,
store_id: u8,
with_items: F,
) -> Result<O, arroy::Error>
) -> Result<O, hannoy::Error>
where
F: FnOnce(&RoaringBitmap) -> O,
{
let index = arroy_store_for_embedder(self.embedder_index, store_id);
let reader = arroy::Reader::open(rtxn, index, db);
let index = hannoy_store_for_embedder(self.embedder_index, store_id);
let reader = hannoy::Reader::open(rtxn, index, db);
match reader {
Ok(reader) => Ok(with_items(reader.item_ids())),
Err(arroy::Error::MissingMetadata(_)) => Ok(with_items(&RoaringBitmap::new())),
Err(hannoy::Error::MissingMetadata(_)) => Ok(with_items(&RoaringBitmap::new())),
Err(err) => Err(err),
}
}
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<Option<usize>, arroy::Error> {
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<Option<usize>, hannoy::Error> {
if self.quantized {
Ok(self
.readers(rtxn, self.quantized_db())
@@ -140,39 +140,41 @@ impl ArroyWrapper {
rng: &mut R,
dimension: usize,
quantizing: bool,
arroy_memory: Option<usize>,
hannoy_memory: Option<usize>,
cancel: &(impl Fn() -> bool + Sync + Send),
) -> Result<(), arroy::Error> {
for index in arroy_store_range_for_embedder(self.embedder_index) {
) -> Result<(), hannoy::Error> {
for index in hannoy_store_range_for_embedder(self.embedder_index) {
if self.quantized {
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
if writer.need_build(wtxn)? {
writer.builder(rng).build(wtxn)?
writer.builder(rng).ef_construction(48).build::<16, 32>(wtxn)?
} else if writer.is_empty(wtxn)? {
continue;
}
} else {
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
let writer = hannoy::Writer::new(self.angular_db(), index, dimension);
// If we are quantizing the databases, we can't know from meilisearch
// if the db was empty but still contained the wrong metadata, thus we need
// to quantize everything and can't stop early. Since this operation can
// only happens once in the life of an embedder, it's not very performances
// sensitive.
if quantizing && !self.quantized {
let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?;
writer
.builder(rng)
.available_memory(arroy_memory.unwrap_or(usize::MAX))
.progress(|step| progress.update_progress_from_arroy(step))
.cancel(cancel)
.build(wtxn)?;
// let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?;
// writer
// .builder(rng)
// .available_memory(hannoy_memory.unwrap_or(usize::MAX))
// .progress(|step| progress.update_progress_from_hannoy(step))
// .cancel(cancel)
// .build(wtxn)?;
unimplemented!("switching from quantized to non-quantized");
} else if writer.need_build(wtxn)? {
writer
.builder(rng)
.available_memory(arroy_memory.unwrap_or(usize::MAX))
.progress(|step| progress.update_progress_from_arroy(step))
.cancel(cancel)
.build(wtxn)?;
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
// .progress(|step| progress.update_progress_from_hannoy(step))
// .cancel(cancel)
.ef_construction(48)
.build::<16, 32>(wtxn)?;
} else if writer.is_empty(wtxn)? {
continue;
}
@@ -188,18 +190,18 @@ impl ArroyWrapper {
pub fn add_items(
&self,
wtxn: &mut RwTxn,
item_id: arroy::ItemId,
item_id: hannoy::ItemId,
embeddings: &Embeddings<f32>,
) -> Result<(), arroy::Error> {
) -> Result<(), hannoy::Error> {
let dimension = embeddings.dimension();
for (index, vector) in
arroy_store_range_for_embedder(self.embedder_index).zip(embeddings.iter())
hannoy_store_range_for_embedder(self.embedder_index).zip(embeddings.iter())
{
if self.quantized {
arroy::Writer::new(self.quantized_db(), index, dimension)
hannoy::Writer::new(self.quantized_db(), index, dimension)
.add_item(wtxn, item_id, vector)?
} else {
arroy::Writer::new(self.angular_db(), index, dimension)
hannoy::Writer::new(self.angular_db(), index, dimension)
.add_item(wtxn, item_id, vector)?
}
}
@@ -210,9 +212,9 @@ impl ArroyWrapper {
pub fn add_item(
&self,
wtxn: &mut RwTxn,
item_id: arroy::ItemId,
item_id: hannoy::ItemId,
vector: &[f32],
) -> Result<(), arroy::Error> {
) -> Result<(), hannoy::Error> {
if self.quantized {
self._add_item(wtxn, self.quantized_db(), item_id, vector)
} else {
@@ -220,17 +222,17 @@ impl ArroyWrapper {
}
}
fn _add_item<D: arroy::Distance>(
fn _add_item<D: hannoy::Distance>(
&self,
wtxn: &mut RwTxn,
db: arroy::Database<D>,
item_id: arroy::ItemId,
db: hannoy::Database<D>,
item_id: hannoy::ItemId,
vector: &[f32],
) -> Result<(), arroy::Error> {
) -> Result<(), hannoy::Error> {
let dimension = vector.len();
for index in arroy_store_range_for_embedder(self.embedder_index) {
let writer = arroy::Writer::new(db, index, dimension);
for index in hannoy_store_range_for_embedder(self.embedder_index) {
let writer = hannoy::Writer::new(db, index, dimension);
if !writer.contains_item(wtxn, item_id)? {
writer.add_item(wtxn, item_id, vector)?;
break;
@@ -245,10 +247,10 @@ impl ArroyWrapper {
pub fn add_item_in_store(
&self,
wtxn: &mut RwTxn,
item_id: arroy::ItemId,
item_id: hannoy::ItemId,
store_id: u8,
vector: &[f32],
) -> Result<(), arroy::Error> {
) -> Result<(), hannoy::Error> {
if self.quantized {
self._add_item_in_store(wtxn, self.quantized_db(), item_id, store_id, vector)
} else {
@@ -256,18 +258,18 @@ impl ArroyWrapper {
}
}
fn _add_item_in_store<D: arroy::Distance>(
fn _add_item_in_store<D: hannoy::Distance>(
&self,
wtxn: &mut RwTxn,
db: arroy::Database<D>,
item_id: arroy::ItemId,
db: hannoy::Database<D>,
item_id: hannoy::ItemId,
store_id: u8,
vector: &[f32],
) -> Result<(), arroy::Error> {
) -> Result<(), hannoy::Error> {
let dimension = vector.len();
let index = arroy_store_for_embedder(self.embedder_index, store_id);
let writer = arroy::Writer::new(db, index, dimension);
let index = hannoy_store_for_embedder(self.embedder_index, store_id);
let writer = hannoy::Writer::new(db, index, dimension);
writer.add_item(wtxn, item_id, vector)
}
@@ -276,14 +278,14 @@ impl ArroyWrapper {
&self,
wtxn: &mut RwTxn,
dimension: usize,
item_id: arroy::ItemId,
) -> Result<(), arroy::Error> {
for index in arroy_store_range_for_embedder(self.embedder_index) {
item_id: hannoy::ItemId,
) -> Result<(), hannoy::Error> {
for index in hannoy_store_range_for_embedder(self.embedder_index) {
if self.quantized {
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
writer.del_item(wtxn, item_id)?;
} else {
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
let writer = hannoy::Writer::new(self.angular_db(), index, dimension);
writer.del_item(wtxn, item_id)?;
}
}
@@ -301,10 +303,10 @@ impl ArroyWrapper {
pub fn del_item_in_store(
&self,
wtxn: &mut RwTxn,
item_id: arroy::ItemId,
item_id: hannoy::ItemId,
store_id: u8,
dimensions: usize,
) -> Result<bool, arroy::Error> {
) -> Result<bool, hannoy::Error> {
if self.quantized {
self._del_item_in_store(wtxn, self.quantized_db(), item_id, store_id, dimensions)
} else {
@@ -312,16 +314,16 @@ impl ArroyWrapper {
}
}
fn _del_item_in_store<D: arroy::Distance>(
fn _del_item_in_store<D: hannoy::Distance>(
&self,
wtxn: &mut RwTxn,
db: arroy::Database<D>,
item_id: arroy::ItemId,
db: hannoy::Database<D>,
item_id: hannoy::ItemId,
store_id: u8,
dimensions: usize,
) -> Result<bool, arroy::Error> {
let index = arroy_store_for_embedder(self.embedder_index, store_id);
let writer = arroy::Writer::new(db, index, dimensions);
) -> Result<bool, hannoy::Error> {
let index = hannoy_store_for_embedder(self.embedder_index, store_id);
let writer = hannoy::Writer::new(db, index, dimensions);
writer.del_item(wtxn, item_id)
}
@@ -335,7 +337,7 @@ impl ArroyWrapper {
wtxn: &mut RwTxn,
store_id: u8,
dimensions: usize,
) -> Result<(), arroy::Error> {
) -> Result<(), hannoy::Error> {
if self.quantized {
self._clear_store(wtxn, self.quantized_db(), store_id, dimensions)
} else {
@@ -343,15 +345,15 @@ impl ArroyWrapper {
}
}
fn _clear_store<D: arroy::Distance>(
fn _clear_store<D: hannoy::Distance>(
&self,
wtxn: &mut RwTxn,
db: arroy::Database<D>,
db: hannoy::Database<D>,
store_id: u8,
dimensions: usize,
) -> Result<(), arroy::Error> {
let index = arroy_store_for_embedder(self.embedder_index, store_id);
let writer = arroy::Writer::new(db, index, dimensions);
) -> Result<(), hannoy::Error> {
let index = hannoy_store_for_embedder(self.embedder_index, store_id);
let writer = hannoy::Writer::new(db, index, dimensions);
writer.clear(wtxn)
}
@@ -359,9 +361,9 @@ impl ArroyWrapper {
pub fn del_item(
&self,
wtxn: &mut RwTxn,
item_id: arroy::ItemId,
item_id: hannoy::ItemId,
vector: &[f32],
) -> Result<bool, arroy::Error> {
) -> Result<bool, hannoy::Error> {
if self.quantized {
self._del_item(wtxn, self.quantized_db(), item_id, vector)
} else {
@@ -369,37 +371,34 @@ impl ArroyWrapper {
}
}
fn _del_item<D: arroy::Distance>(
fn _del_item<D: hannoy::Distance>(
&self,
wtxn: &mut RwTxn,
db: arroy::Database<D>,
item_id: arroy::ItemId,
db: hannoy::Database<D>,
item_id: hannoy::ItemId,
vector: &[f32],
) -> Result<bool, arroy::Error> {
) -> Result<bool, hannoy::Error> {
let dimension = vector.len();
for index in arroy_store_range_for_embedder(self.embedder_index) {
let writer = arroy::Writer::new(db, index, dimension);
let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
continue;
};
if candidate == vector {
for index in hannoy_store_range_for_embedder(self.embedder_index) {
let writer = hannoy::Writer::new(db, index, dimension);
if writer.contains_item(wtxn, item_id)? {
return writer.del_item(wtxn, item_id);
}
}
Ok(false)
}
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
for index in arroy_store_range_for_embedder(self.embedder_index) {
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), hannoy::Error> {
for index in hannoy_store_range_for_embedder(self.embedder_index) {
if self.quantized {
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
if writer.is_empty(wtxn)? {
continue;
}
writer.clear(wtxn)?;
} else {
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
let writer = hannoy::Writer::new(self.angular_db(), index, dimension);
if writer.is_empty(wtxn)? {
continue;
}
@@ -413,17 +412,17 @@ impl ArroyWrapper {
&self,
rtxn: &RoTxn,
dimension: usize,
item: arroy::ItemId,
) -> Result<bool, arroy::Error> {
for index in arroy_store_range_for_embedder(self.embedder_index) {
item: hannoy::ItemId,
) -> Result<bool, hannoy::Error> {
for index in hannoy_store_range_for_embedder(self.embedder_index) {
let contains = if self.quantized {
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
if writer.is_empty(rtxn)? {
continue;
}
writer.contains_item(rtxn, item)?
} else {
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
let writer = hannoy::Writer::new(self.angular_db(), index, dimension);
if writer.is_empty(rtxn)? {
continue;
}
@@ -442,7 +441,7 @@ impl ArroyWrapper {
item: ItemId,
limit: usize,
filter: Option<&RoaringBitmap>,
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
) -> Result<Vec<(ItemId, f32)>, hannoy::Error> {
if self.quantized {
self._nns_by_item(rtxn, self.quantized_db(), item, limit, filter)
} else {
@@ -450,24 +449,25 @@ impl ArroyWrapper {
}
}
fn _nns_by_item<D: arroy::Distance>(
fn _nns_by_item<D: hannoy::Distance>(
&self,
rtxn: &RoTxn,
db: arroy::Database<D>,
db: hannoy::Database<D>,
item: ItemId,
limit: usize,
filter: Option<&RoaringBitmap>,
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
) -> Result<Vec<(ItemId, f32)>, hannoy::Error> {
let mut results = Vec::new();
for reader in self.readers(rtxn, db) {
let reader = reader?;
let mut searcher = reader.nns(limit);
let searcher = reader.nns(limit, limit * 2); // TODO find better ef
if let Some(filter) = filter {
if reader.item_ids().is_disjoint(filter) {
continue;
}
searcher.candidates(filter);
tracing::error!("Hannoy doesn't support filtering");
// searcher.candidates(filter);
}
if let Some(mut ret) = searcher.by_item(rtxn, item)? {
@@ -484,7 +484,7 @@ impl ArroyWrapper {
vector: &[f32],
limit: usize,
filter: Option<&RoaringBitmap>,
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
) -> Result<Vec<(ItemId, f32)>, hannoy::Error> {
if self.quantized {
self._nns_by_vector(rtxn, self.quantized_db(), vector, limit, filter)
} else {
@@ -492,24 +492,25 @@ impl ArroyWrapper {
}
}
fn _nns_by_vector<D: arroy::Distance>(
fn _nns_by_vector<D: hannoy::Distance>(
&self,
rtxn: &RoTxn,
db: arroy::Database<D>,
db: hannoy::Database<D>,
vector: &[f32],
limit: usize,
filter: Option<&RoaringBitmap>,
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
) -> Result<Vec<(ItemId, f32)>, hannoy::Error> {
let mut results = Vec::new();
for reader in self.readers(rtxn, db) {
let reader = reader?;
let mut searcher = reader.nns(limit);
let searcher = reader.nns(limit, limit * 2); // TODO find better ef
if let Some(filter) = filter {
if reader.item_ids().is_disjoint(filter) {
continue;
}
searcher.candidates(filter);
tracing::error!("Hannoy doesn't support filtering");
// searcher.candidates(filter);
}
results.append(&mut searcher.by_vector(rtxn, vector)?);
@@ -520,7 +521,7 @@ impl ArroyWrapper {
Ok(results)
}
pub fn item_vectors(&self, rtxn: &RoTxn, item_id: u32) -> Result<Vec<Vec<f32>>, arroy::Error> {
pub fn item_vectors(&self, rtxn: &RoTxn, item_id: u32) -> Result<Vec<Vec<f32>>, hannoy::Error> {
let mut vectors = Vec::new();
if self.quantized {
@@ -539,19 +540,19 @@ impl ArroyWrapper {
Ok(vectors)
}
fn angular_db(&self) -> arroy::Database<Cosine> {
fn angular_db(&self) -> hannoy::Database<Cosine> {
self.database.remap_data_type()
}
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
fn quantized_db(&self) -> hannoy::Database<BinaryQuantizedCosine> {
self.database.remap_data_type()
}
pub fn aggregate_stats(
&self,
rtxn: &RoTxn,
stats: &mut ArroyStats,
) -> Result<(), arroy::Error> {
stats: &mut HannoyStats,
) -> Result<(), hannoy::Error> {
if self.quantized {
for reader in self.readers(rtxn, self.quantized_db()) {
let reader = reader?;
@@ -579,10 +580,11 @@ impl ArroyWrapper {
}
#[derive(Debug, Default, Clone)]
pub struct ArroyStats {
pub struct HannoyStats {
pub number_of_embeddings: u64,
pub documents: RoaringBitmap,
}
/// One or multiple embeddings stored consecutively in a flat vector.
#[derive(Debug, PartialEq)]
pub struct Embeddings<F> {
@@ -1208,11 +1210,11 @@ pub const fn is_cuda_enabled() -> bool {
cfg!(feature = "cuda")
}
fn arroy_store_range_for_embedder(embedder_id: u8) -> impl Iterator<Item = u16> {
(0..=u8::MAX).map(move |store_id| arroy_store_for_embedder(embedder_id, store_id))
fn hannoy_store_range_for_embedder(embedder_id: u8) -> impl Iterator<Item = u16> {
(0..=u8::MAX).map(move |store_id| hannoy_store_for_embedder(embedder_id, store_id))
}
fn arroy_store_for_embedder(embedder_id: u8, store_id: u8) -> u16 {
fn hannoy_store_for_embedder(embedder_id: u8, store_id: u8) -> u16 {
let embedder_id = (embedder_id as u16) << 8;
embedder_id | (store_id as u16)
}