Compare commits

...

234 Commits

Author SHA1 Message Date
bdd363dd94 Add spans 2024-09-26 17:20:32 +02:00
d6b3aae8a6 WIP add more logs 2024-09-26 16:37:38 +02:00
ac2d54b27c Make the merger multithreaded 2024-09-26 11:09:06 +02:00
7d61697f19 Fix another iteration bug on hashmap entries 2024-09-25 22:42:41 +02:00
97d2860998 Fix iterating on hashmap entries 2024-09-25 22:15:15 +02:00
15bf556291 Write the inverted indexes in memory and never on disk 2024-09-25 18:13:19 +02:00
3d244451df Reduce the lru key size from 8 to 12 bytes 2024-09-25 16:14:13 +02:00
5f53935c8a Fix a bug in the Lru 2024-09-25 16:09:34 +02:00
29a7623c3f Fxi some logs 2024-09-25 15:57:50 +02:00
e97041f7d0 Replace the Lru free list by a simple increment 2024-09-25 15:55:52 +02:00
52d7f3ed1c Reduce the lru key size from 20 to 8 bytes 2024-09-25 15:37:13 +02:00
86d5e6d9ff Use the new Lru 2024-09-25 14:54:56 +02:00
759b9b1546 Introduce a new custom Lru 2024-09-25 14:49:12 +02:00
3f7a500f3b Build prefix fst 2024-09-25 14:36:06 +02:00
974272f2e9 Merge branch 'main' into indexer-edition-2024 2024-09-25 07:41:16 +02:00
7ad037841f Move the tracing info to eprintln 2024-09-24 18:21:58 +02:00
e0c7067355 Expose an IndexedParallelIterator to the index function 2024-09-24 17:24:59 +02:00
6e87332410 Change the way the FST is built 2024-09-24 16:28:31 +02:00
2d1caf27df Use eprintln to log 2024-09-24 15:59:50 +02:00
92678383d6 Update charabia 2024-09-24 15:37:56 +02:00
7f148c127c Measure the SmallVec efficacity 2024-09-24 15:32:15 +02:00
4ce5d3d66d Do not check before pushing in bitmaps 2024-09-24 09:43:16 +02:00
ff931edb55 Update roaring to inline max calls 2024-09-23 16:53:42 +02:00
42b093687d Introduce the new PushOptimizedBitmap 2024-09-23 16:38:21 +02:00
835c5f98f9 Remove the debug symbols 2024-09-23 15:49:24 +02:00
f00664247d Add more stats about the channel message sent 2024-09-23 15:13:52 +02:00
3c63d4a1e5 Fix charabia Zho 2024-09-23 14:50:17 +02:00
4551abf6d4 Update roaring to the latest version 2024-09-23 14:35:33 +02:00
193d7f5d34 Add the mutualized charabia normalization 2024-09-23 14:24:25 +02:00
013acb3d93 Measure merger writer channel contention 2024-09-23 11:07:59 +02:00
7f20c13f3f Merge #4943
4943: Correct broken links in README r=curquiza a=iornstein

# Pull Request

## Related issue
Fixes #4942

## What does this PR do?
- Corrects some broken links in the README. My suspicion is that some of these documentation articles were moved around without someone updating links in the README.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? _(well the contributing guidelines led me to create an issue first)_
- [x] Have you read the contributing guidelines? _yes_
- [x] Have you made sure that the title is accurate and descriptive of the changes? _yes_

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ian Ornstein <ian.ornstein@gmail.com>
2024-09-19 19:22:04 +00:00
462a2329f1 Merge #4941
4941: Implement the binary quantization in meilisearch r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4873

## What does this PR do?
- Add a settings for the binary quantization
- Once enabled, the bq cannot be disabled

TODO:
- [ ] Missing a bunch of tests

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-09-19 15:50:24 +00:00
f6483cf15d apply review comment 2024-09-19 16:47:06 +02:00
bd34ed01d9 Merge #4945
4945: Add swedish in default pipelines r=dureuill a=ManyTheFish

# Summary
## Fix Swedish support

In Swedish the characters `å`/`ä`/`ö` are completely different than `a` or `o`  and should not be normalized as the same character.
because the Swedish specialized pipeline was not activated by default, these characters were normalized even with the settings:
```json
{
  "localizedAttributes": [ { "locales": ["swe"], "attributePatterns": ["*"] } ]
}
```

## Update Charabia adding German support

German segmentation will now be activated using the setting:
```json
{
  "localizedAttributes": [ { "locales": ["deu"], "attributePatterns": ["*"] } ]
}
```

# TODO

- [x] Activate Swedish Pipeline
- [x] Add a test to avoid future regressions
- [x] Update Charabia


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-09-19 14:42:03 +00:00
74199f328d Make clippy happy 2024-09-19 16:27:34 +02:00
1113c42de0 fix broken comments 2024-09-19 16:18:36 +02:00
465afe01b2 Add test for German 2024-09-19 16:09:01 +02:00
7d6768e4c4 Add german tokenization pipeline 2024-09-19 16:09:01 +02:00
f77661ec44 Update Charabia v0.9.1 2024-09-19 16:08:59 +02:00
b8fd85a46d Get rids of useless collect before an iteration on the readers 2024-09-19 15:57:38 +02:00
fd43c6c404 Improve the error message explaining you can't un-bq an embedder 2024-09-19 15:51:29 +02:00
2564ec1496 Update milli/src/index.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-19 15:41:44 +02:00
b6b73fe41c Update milli/src/update/settings.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-19 15:41:14 +02:00
6dde41cc46 stop using a local version of arroy and instead point to the git repo with the rev 2024-09-19 15:25:38 +02:00
163f8023a1 remove debug println 2024-09-19 12:13:25 +02:00
2b120b89e4 update the test now that the embedder must be specified 2024-09-19 12:08:59 +02:00
84f842233d snapshots the embedder settings in the dump import with vector test 2024-09-19 12:00:58 +02:00
633537ccd7 fix updating documents without updating the settings 2024-09-19 12:00:58 +02:00
e8d7c00d30 add a test on the settings value 2024-09-19 12:00:58 +02:00
3f6301dbc9 fix the missing embedder name in the error message when trying to disable the binary quantization 2024-09-19 12:00:58 +02:00
ca71b63ed1 adds integration tests 2024-09-19 12:00:58 +02:00
2b6952eda1 rename the ArroyReader to an ArroyWrapper since it can read and write 2024-09-19 12:00:58 +02:00
79f29eed3c fix the tests and the arroy_readers method 2024-09-19 12:00:58 +02:00
cc45e264ca implement the binary quantization in meilisearch 2024-09-19 12:00:56 +02:00
5f474a640d Merge #4938
4938: Remove default embedder r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes #4738 

## What does this PR do?

[See public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#1044b06b651f80edb9d4ef6dc367bad0)

- Remove `hybrid.embedder` boolean from analytics because embedder is now mandatory and so the boolean would always be `true`
- Rework search kind so that a search without query but with vector is a vector search regardless of (non-zero) semantic ratio


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-19 09:17:14 +00:00
bbaee3dbc6 Add Swedish pipeline in all-tokenization feature 2024-09-19 08:34:51 +02:00
877717cb26 Add a test using Swedish documents 2024-09-19 08:34:04 +02:00
716817122a Correct broken links in README 2024-09-18 16:30:29 -05:00
ff523a2357 Merge #4939
4939: Introduce the `STARTS WITH` filter operator r=irevoire a=Kerollmops

This PR fixes #4872 by introducing the `STARTS WITH` filter operator and gating it under the _contains filter_ experimental feature along with the `CONTAINS` one. I also updated [the experimental feature discussion page](https://github.com/orgs/meilisearch/discussions/763).

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-09-18 10:19:48 +00:00
29c3aca72a Merge #4929
4929: Add facets support to federated r=Kerollmops a=dureuill

# Pull Request

## Related issue 

- Fixes #4932 (sprint issue)
- Fixes  #4913 (user-opened issue)

## What does this PR do?

See [public usage](https://meilisearch.notion.site/v1-11-Federated-search-59b30e03383c40729d7541a3dffb0069)

> [!CAUTION]
> This PR introduces a 🚨**breaking change**🚨: `queries.facets` when `federation` is present and non-`null` is now **an error**

### Implementation standpoint:

- Facet distribution: fix issue where truncated facet distribution would have a wrong order
- facet distribution: implement Display for OrderBy


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-18 09:47:20 +00:00
00f8d03f43 Use f32::min and f32::max 2024-09-18 11:46:10 +02:00
50981ea778 Update the error messages 2024-09-18 11:44:29 +02:00
c2caff1716 Remove obsolete enum 2024-09-18 11:26:43 +02:00
4c355bede7 Merge #4937
4937: Support iso 639 1 r=ManyTheFish a=ManyTheFish

# Pull Request

## Related issue
Fixes #4827

## What does this PR do?
- Add iso-639-1 variants to the Locales enum
- Convert iso-639-1 into iso-639-3


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-09-18 05:29:32 +00:00
174d69ff72 Don't override max value in indexes 2024-09-17 18:16:14 +02:00
52a52f97cf Update tests 2024-09-17 17:49:12 +02:00
5de4b48552 Fixup error messages 2024-09-17 17:49:00 +02:00
df648ce7a6 Update tests 2024-09-17 17:40:14 +02:00
af8edab21d Remove mention of sort order and recommend changing index settings on inconsistent order error 2024-09-17 17:39:51 +02:00
c42746c4cd Update tests 2024-09-17 17:22:14 +02:00
98b77aec66 Remove runtime sortFacetValuesBy 2024-09-17 17:22:03 +02:00
54d3ba3357 Fix tests that check error message content 2024-09-17 17:14:39 +02:00
6e058709f2 Rustfmt 2024-09-17 17:02:06 +02:00
0fbf9ea5b1 Factorize using macro 2024-09-17 17:00:03 +02:00
9f1fb4b425 Introduce the STARTS WITH filter operator gated under an experimental feature 2024-09-17 16:44:11 +02:00
1120a5296c Update tests 2024-09-17 16:30:43 +02:00
a35a339c3d Touchup error message 2024-09-17 16:30:43 +02:00
cac5836f6f Remove hybrid.embedder boolean from analytics because embedder is now mandatory 2024-09-17 16:30:43 +02:00
5239ae0297 Rework search kind so that a search without query but with vector is a vector search regardless of semantic ratio 2024-09-17 16:30:43 +02:00
2fdb1d8018 SearchQueryGet can fail 2024-09-17 16:30:43 +02:00
3c5e363554 Remove default embedders 2024-09-17 16:30:43 +02:00
da0dd6febf Make embedder mandatory 2024-09-17 16:30:43 +02:00
a197d63ab6 simplify tests 2024-09-17 15:30:12 +02:00
390eadb733 Support iso-639-1 2024-09-17 15:01:01 +02:00
93f0317b94 Merge #4936
4936: Update version for the next release (v1.11.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2024-09-17 11:47:08 +00:00
29ff02f3ff Update version for the next release (v1.11.0) in Cargo.toml 2024-09-17 11:45:48 +00:00
d9e0df74ea update test 2024-09-17 10:39:48 +02:00
dc8a662209 federated queries: adjust error message 2024-09-17 10:39:48 +02:00
6732dd95d7 Update tests 2024-09-17 10:39:48 +02:00
95da428dc8 Use route in federated 2024-09-17 10:39:48 +02:00
38c4be1c8e compute_facets accepts Route argument to fixup error code 2024-09-17 10:39:48 +02:00
91dfab317f New error 2024-09-17 10:39:48 +02:00
47e3c4b5c3 Add new tests 2024-09-17 10:39:48 +02:00
533f1d4345 Federated search: support facets 2024-09-17 10:39:48 +02:00
7b55462610 BREAKING CHANGE: errors if queries.facets in federated search 2024-09-17 10:39:48 +02:00
f6114a1ff2 Introduce ComputedFacets and compute_facet_distribution_stats 2024-09-17 10:39:48 +02:00
7c084b1286 SearchQueriesWithIndex changes 2024-09-17 10:39:47 +02:00
57f9517a98 Required changes to IndexUid 2024-09-17 10:39:47 +02:00
72cc573e0a Add new error types 2024-09-17 10:39:47 +02:00
a48b1d5a79 Update existing tests following error message changes 2024-09-17 10:39:47 +02:00
a94a87ee54 Slightly changes existing error messages 2024-09-17 10:39:47 +02:00
f4ab1f168e Prefer using Rc<str> than String when cloning a lot 2024-09-16 15:41:29 +02:00
4b55ba68bc Merge #4911
4911: Bump quinn-proto from 0.11.3 to 0.11.8 r=Kerollmops a=dependabot[bot]

Bumps [quinn-proto](https://github.com/quinn-rs/quinn) from 0.11.3 to 0.11.8.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/quinn-rs/quinn/releases">quinn-proto's releases</a>.</em></p>
<blockquote>
<h2>quinn-proto 0.11.5</h2>
<h2>What's Changed</h2>
<ul>
<li>No workspace lints by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1955">quinn-rs/quinn#1955</a></li>
</ul>
<h2>quinn-proto 0.11.4</h2>
<h2>What's Changed</h2>
<ul>
<li>Fix panic in example due to unset default crypto provider by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1882">quinn-rs/quinn#1882</a></li>
<li>Fix zero-length connection IDs by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1883">quinn-rs/quinn#1883</a></li>
<li>Add support for NetBSD, fix OpenBSD by <a href="https://github.com/flub"><code>`@​flub</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1884">quinn-rs/quinn#1884</a></li>
<li>docs(udp): replace AsRawFd and AsRawSocket with AsFd and AsSocket by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1890">quinn-rs/quinn#1890</a></li>
<li>Resolve stopped/received_reset futures on lost connections by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1886">quinn-rs/quinn#1886</a></li>
<li>Bump version numbers (quinn 0.11.2, -proto 0.11.3) by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1891">quinn-rs/quinn#1891</a></li>
<li>udp: bump version to 0.5.2 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1892">quinn-rs/quinn#1892</a></li>
<li>docs(quinn): Clarify effects of setting AckFrequencyConfig by <a href="https://github.com/gretchenfrage"><code>`@​gretchenfrage</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1894">quinn-rs/quinn#1894</a></li>
<li>Apply clippy suggestions from Rust 1.79 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1895">quinn-rs/quinn#1895</a></li>
<li>Only send MAX_STREAMS when &gt;1/8 of flow control window is consumed  by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1898">quinn-rs/quinn#1898</a></li>
<li>fix: remove unused dependency tracing-attributes by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1903">quinn-rs/quinn#1903</a></li>
<li>proto: make initial destination cid configurable  by <a href="https://github.com/thynson"><code>`@​thynson</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1897">quinn-rs/quinn#1897</a></li>
<li>Allow configuring rng seed through <code>EndpointConfig</code> by <a href="https://github.com/aochagavia"><code>`@​aochagavia</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1901">quinn-rs/quinn#1901</a></li>
<li>quinn: introduce waking helpers by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1908">quinn-rs/quinn#1908</a></li>
<li>Wake blocked streams on 0-RTT rejection by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1905">quinn-rs/quinn#1905</a></li>
<li>Upgrade to rustc-hash 2 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1909">quinn-rs/quinn#1909</a></li>
<li>Fix unnecessary Incoming warning on Endpoint drop by <a href="https://github.com/gretchenfrage"><code>`@​gretchenfrage</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1907">quinn-rs/quinn#1907</a></li>
<li>Revise and add additional 0-rtt doc comments by <a href="https://github.com/gretchenfrage"><code>`@​gretchenfrage</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1826">quinn-rs/quinn#1826</a></li>
<li>docs: remove reference to sendmmsg by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1914">quinn-rs/quinn#1914</a></li>
<li>Fix debug assert with reordered ACKs by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1893">quinn-rs/quinn#1893</a></li>
<li>quinn: Make <code>Endpoint::client</code> dual-stack V6 by default by <a href="https://github.com/gretchenfrage"><code>`@​gretchenfrage</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1913">quinn-rs/quinn#1913</a></li>
<li>bench(udp): measure non-GSO &amp; GSO on localhost by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1915">quinn-rs/quinn#1915</a></li>
<li>proto: avoid overflow in handshake done statistic by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1918">quinn-rs/quinn#1918</a></li>
<li>Use workspace dependencies for all external dependencies by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1919">quinn-rs/quinn#1919</a></li>
<li>Fix lack of reexport of ConnectionStats and ResetError by <a href="https://github.com/TirushOne"><code>`@​TirushOne</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1920">quinn-rs/quinn#1920</a></li>
<li>[non-breaking] deps(udp): make tracing optional and add optional log by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1923">quinn-rs/quinn#1923</a></li>
<li>fix(udp): feature flag tracing in windows.rs by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1932">quinn-rs/quinn#1932</a></li>
<li>Bump MSRV to 1.70 following tokio 1.39 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1939">quinn-rs/quinn#1939</a></li>
<li>Raise default idle timeout to 30 seconds by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1938">quinn-rs/quinn#1938</a></li>
<li>Discard pre-handshake packets after the handshake by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1937">quinn-rs/quinn#1937</a></li>
<li>Apply suggestions from Clippy 1.80 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1941">quinn-rs/quinn#1941</a></li>
<li>chore(quinn): feature flag socket2 imports by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1933">quinn-rs/quinn#1933</a></li>
<li>refactor: move rust-version to workspace Cargo.toml by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1940">quinn-rs/quinn#1940</a></li>
<li>chore: move common package data to workspace Cargo.toml by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1943">quinn-rs/quinn#1943</a></li>
<li>Endpoint stats interface by <a href="https://github.com/ryleung-solana"><code>`@​ryleung-solana</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1900">quinn-rs/quinn#1900</a></li>
<li>Expose the Handshake Confirmed state by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1944">quinn-rs/quinn#1944</a></li>
<li>Exclude metrics with freestanding getters from EndpointStats by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1945">quinn-rs/quinn#1945</a></li>
<li>Fix incorrect initial DCID indexing on retried connections by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1946">quinn-rs/quinn#1946</a></li>
<li>Add expect message to unwrap in PacketBuilder by <a href="https://github.com/casey"><code>`@​casey</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1951">quinn-rs/quinn#1951</a></li>
<li>Revert &quot;proto: yield transport error for Initial packets with no CRYPTO&quot; by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1952">quinn-rs/quinn#1952</a></li>
<li>refactor(udp): introduce log facade by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1935">quinn-rs/quinn#1935</a></li>
<li>Update cargo-deny-action to v2 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1953">quinn-rs/quinn#1953</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="7c09b02073"><code>7c09b02</code></a> proto: bump version to 0.11.8 for release (<a href="https://redirect.github.com/quinn-rs/quinn/issues/1981">#1981</a>)</li>
<li><a href="59bccd2e7e"><code>59bccd2</code></a> Version bump <code>quinn</code> to enforce patched <code>quinn-proto</code></li>
<li><a href="a8ec510fd1"><code>a8ec510</code></a> proto: avoid panicking on rustls server config errors</li>
<li><a href="c26e8cd2f7"><code>c26e8cd</code></a> Bump versions</li>
<li><a href="e01609ccd8"><code>e01609c</code></a> Merge commit from fork</li>
<li><a href="c292a3c6a6"><code>c292a3c</code></a> Fix and test validation of IDCID length</li>
<li><a href="bb02a12a84"><code>bb02a12</code></a> fix(.github/android): use API level 26</li>
<li><a href="5e5cc93645"><code>5e5cc93</code></a> fix(.github/android): pass matrix.target and increase api to v26</li>
<li><a href="cef42cccef"><code>cef42cc</code></a> fix(udp): typo in sendmsg error log</li>
<li><a href="edf16a6f10"><code>edf16a6</code></a> ci(rust.yml): add workflow testing feature permutations</li>
<li>Additional commits viewable in <a href="https://github.com/quinn-rs/quinn/compare/quinn-proto-0.11.3...quinn-proto-0.11.8">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=quinn-proto&package-manager=cargo&previous-version=0.11.3&new-version=0.11.8)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 13:32:32 +00:00
1a0e962299 Replace hashmap by vectors in wpp 2024-09-16 15:01:20 +02:00
f13e076b8a Use hashmap instead of Btree in wpp extractor 2024-09-16 14:40:40 +02:00
7ba49b849e Extract and write facet databases 2024-09-16 09:35:16 +02:00
f7652186e1 WIP geo fields 2024-09-12 18:01:02 +02:00
23e14138bb facet distribution: implement Display for OrderBy 2024-09-12 17:43:50 +02:00
e44325683a Facet distribution: fix issue where truncated facet distribution would have a wrong order 2024-09-12 17:43:49 +02:00
b2f4e67c9a Do not store useless updates 2024-09-12 15:38:31 +02:00
ff5d3b59f5 Move the document id extraction to the primary key code 2024-09-12 12:01:42 +02:00
aa69308e45 Use a bufWriter to build word FSTs 2024-09-12 11:48:00 +02:00
eb9a20ff0b Fix fid_word_docids extraction 2024-09-12 11:08:18 +02:00
0d868f36d7 Make sure we always use a BufWriter to write the update files 2024-09-11 18:38:04 +02:00
e7d9db078f Use the right key name when convertir from CSV to NDJSON 2024-09-11 18:27:00 +02:00
3e9198ebaa Support guessing primary key again 2024-09-11 17:25:40 +02:00
2a0ad0982f Fix the document counter 2024-09-11 15:59:36 +02:00
2b317c681b Build mergers in parallel 2024-09-11 11:49:26 +02:00
39b5990f64 Mutualize tokenization 2024-09-11 10:22:38 +02:00
3848adf5a2 Improve error management and simplify JSON read 2024-09-11 10:10:51 +02:00
b4de06259e Better CSV support 2024-09-11 10:02:00 +02:00
02c2b660f8 Merge #4920
4920: Change OpenAI default model r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #4856

See also [public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#b4685a48c4784262a149ec307ec58671)

## What does this PR do?
- make the `text-embedding-3-small` the default model for OpenAI instead of `text-embedding-ada-002`. Existing embedders are not impacted


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-11 07:08:39 +00:00
8287c2644f Support CSV again 2024-09-10 21:10:28 +01:00
c1c44a0b81 Impl serialize on TopLevelMap 2024-09-10 19:32:03 +01:00
04596f3616 Move the TopLevelMap into a dedicated module 2024-09-10 18:01:17 +01:00
24cb5839ad Move the document changes sorting logic to a new trait 2024-09-10 17:37:52 +01:00
8d97b7b28c Support JSON payloads again (not perfectly though) 2024-09-10 17:09:49 +01:00
f69688e8f7 Fix several warnings in extractors and remove unreachable macros 2024-09-09 14:52:50 +02:00
f18e9cb7b3 Change openai default model 2024-09-09 13:09:35 +02:00
8fd0afaaaa Make sure we iterate over the payload documents in order 2024-09-06 08:09:08 +02:00
72c6a21a30 Use raw JSON to read the payloads 2024-09-05 20:08:23 +02:00
8412be4a7d Cleanup CowStr and TopLevelMap struct 2024-09-05 18:32:55 +02:00
10f09c531f add some commented code to read from json with raw values 2024-09-05 18:22:16 +02:00
8fd99b111b Add tracing timers logs 2024-09-05 18:00:22 +02:00
f6b3d1f9a5 Increase some channel sizes 2024-09-05 15:12:07 +02:00
db0cf3b2ed Merge #4912
4912: Allow Meilitool to dumplessly, offline upgrade v1.9 -> v1.10 in some conditions r=Kerollmops a=dureuill

- bail early if the DB contains at least 1 REST embedder, providing the list of detected REST embedders, and without modifying the DB
- Might depend on the feature set that meilitool was compiled with and the featureset that the Meilisearch that created the DB was compiled with đź’€. In case of runtime error, try again with a different feature set (passing or not passing `-p meilitool` when building after a `cargo clean`)

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-05 09:11:23 +00:00
73ce67862d Use the word pair proximity and fid word count docids extractors
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-09-05 10:56:22 +02:00
f6abf01d2c Check REST embedders before touching the DB 2024-09-05 10:49:59 +02:00
0fc02f7351 Move the facet extraction to dedicated modules 2024-09-05 10:32:27 +02:00
34f11e3380 Implement word count and word pair proximity extractors 2024-09-05 10:30:39 +02:00
28da759f11 meilitool: Support dumpless upgrade from v1.9 to v1.10 when there are no REST embedders 2024-09-05 10:08:38 +02:00
ea96d19525 Change versioning in meili 2024-09-05 10:08:06 +02:00
d352b1ee83 Add serde to meilitool 2024-09-05 10:07:33 +02:00
27308eaab1 Import the facet extractors 2024-09-04 17:58:15 +02:00
b33ec9ba3f Introduce the FieldIdFacetIsNullDocidsExtractor 2024-09-04 17:50:08 +02:00
9c0a1cd9fd Introduce the FieldIdFacetExistsDocidsExtractor 2024-09-04 17:48:49 +02:00
0b061f1e70 Introduce the FieldIdFacetIsEmptyDocidsExtractor 2024-09-04 17:40:24 +02:00
19d937ab21 Introduce the facet extractors 2024-09-04 17:03:54 +02:00
1d59c19cd2 Send the WordsFst by using an Mmap 2024-09-04 14:30:09 +02:00
98e48371c3 Factorize some stuff 2024-09-04 12:17:13 +02:00
6d74fb0229 Introduce the WordFidWordDocids database 2024-09-04 11:40:55 +02:00
1eb75a1040 remove milli/src/update/new/extract/tokenize_document.rs 2024-09-04 11:40:26 +02:00
3b82d8b5b9 Fix the cache to serialize entries correctly 2024-09-04 10:55:36 +02:00
781a186f75 remove milli/src/update/new/extract/extract_word_docids.rs 2024-09-04 10:28:31 +02:00
6a399556b5 Implement more searchable extractor 2024-09-04 10:20:18 +02:00
27b4cab857 Extract and write the documents and words fst in the database 2024-09-04 09:59:19 +02:00
3f3cebf5f9 Bump quinn-proto from 0.11.3 to 0.11.8
Bumps [quinn-proto](https://github.com/quinn-rs/quinn) from 0.11.3 to 0.11.8.
- [Release notes](https://github.com/quinn-rs/quinn/releases)
- [Commits](https://github.com/quinn-rs/quinn/compare/quinn-proto-0.11.3...quinn-proto-0.11.8)

---
updated-dependencies:
- dependency-name: quinn-proto
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-03 20:50:30 +00:00
b278815617 Merge #4908
4908: Bring back changes from release v1.10.1 to main r=dureuill a=irevoire

# Pull Request

Following the [latest release](https://github.com/meilisearch/meilisearch/releases/tag/v1.10.1), this PR brings back the changes to main.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: irevoire <irevoire@users.noreply.github.com>
2024-09-03 14:28:12 +00:00
52d32b4ee9 Move the channel sender in the closure to stop the merger thread 2024-09-03 16:08:33 +02:00
da61408e52 Remove unimplemented from document changes 2024-09-03 15:14:16 +02:00
fe69385bd7 Fix tokenizer test 2024-09-03 14:24:37 +02:00
40e13ceef3 Merge #4892
4892:  Add a documentTemplateMaxBytes parameter to limit the max length of document templates r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes #4885 

See [public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#a3d63628129e40adba943ae7b8ec06c2)



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-03 11:50:07 +00:00
18a2c13e4e add analytics 2024-09-03 12:07:59 +02:00
ed19b7c3c3 Only reindex if the size increased 2024-09-03 12:07:59 +02:00
66bda2ce8a fix tests 2024-09-03 12:07:58 +02:00
1ac008926b Add maxBytes parameter 2024-09-03 12:07:15 +02:00
c49d892c82 Changes to prompt 2024-09-03 12:07:10 +02:00
de962a26f3 New error type when maxBytes is null 2024-09-03 12:01:04 +02:00
c1557734dc Use the GlobalFieldsIdsMap everywhere and write it to disk
Co-authored-by: Dureuill <louis@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-09-03 12:01:01 +02:00
005204e9e5 make the code of init_web_app in common between most tests 2024-09-03 11:40:05 +02:00
1040e5e2b4 spawn on search queue per test 2024-09-03 11:20:25 +02:00
c50d3edc4a Integrate first searchable exctrator 2024-09-03 11:02:39 +02:00
80408c92dc Merge #4906
4906: Add searchable fields to template r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #4886 

See [public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#1dd6f0eee5a1422888e1c5d48e107cd1)

## What does this PR do?
- `Prompt::render` now requires and uses metadata to indicate if the fields are searchable or not
- Changes default template
- Updated tests
- Correctly reindex vectors when the list of searchable fields changes in a settings update.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-03 07:14:58 +00:00
5369bf4a62 Change some lifetimes 2024-09-02 19:51:22 +02:00
bcb1aa3d22 Find a temporary solution to par into iter on an HashMap
Spoiler: Do not use an HashMap but drain it into a Vec
2024-09-02 19:39:48 +02:00
fa1a0beb0c fix conflicts after rebase 2024-09-02 18:15:42 +02:00
5aefe7cd17 add the snapshots 2024-09-02 16:27:51 +02:00
e6dd66e4a0 Do not fail the whole batch when a single document deletion by filter fails 2024-09-02 16:27:51 +02:00
6e3839d8b6 autobatch document deletion by filter 2024-09-02 16:27:51 +02:00
cd271b8762 stop trying to process searches after one minute 2024-09-02 16:27:51 +02:00
3ce8500d4c ensure we never early exit when we have a permit and remove the warning when we implicitely drop a permit 2024-09-02 16:27:51 +02:00
588000d398 add a warning to help us find when we forget to drop explicitely drop a permit 2024-09-02 16:27:51 +02:00
92b151607c explicitely drop the search permit 2024-09-02 16:27:51 +02:00
42e7499260 Update version for the next release (v1.10.1) in Cargo.toml 2024-09-02 16:27:51 +02:00
41aa1e1424 Only spawn one search queue in actix-web 2024-09-02 16:27:50 +02:00
9b7858fb90 Expose the new indexer 2024-09-02 15:21:59 +02:00
ab01679a8f Remove the useless option from the document changes 2024-09-02 15:21:00 +02:00
521775f788 I push for Many 2024-09-02 15:10:21 +02:00
72e7b7846e Renaming the indexers 2024-09-02 14:42:27 +02:00
6526ce1208 Fix the merging of documents 2024-09-02 14:41:20 +02:00
24ace5c381 Add reindexing test 2024-09-02 13:37:01 +02:00
21296190a3 Reindex embedders 2024-09-02 13:00:53 +02:00
03fda78901 update other tests 2024-09-02 11:31:31 +02:00
30a143f149 Test new facilities 2024-09-02 11:31:23 +02:00
4464d319af Change default template to use the new facility 2024-09-02 11:30:59 +02:00
580ea2f450 Pass the fields <-> ids map with metadata to render 2024-09-02 11:30:10 +02:00
915cf4bae5 Add field.is_searchable property to fields 2024-09-02 11:28:53 +02:00
e639ec79d1 Move the indexers into their own modules 2024-09-02 10:42:19 +02:00
bb885a5810 Fix the merge for roaring bitmap 2024-09-01 23:20:19 +02:00
b625d31c7d Introduce the PartialDumpIndexer indexer that generates document ids in parallel 2024-08-30 15:07:21 +02:00
6487a67f2b Introduce the ConcurrentAvailableIds struct and rename the other to AvailableIds 2024-08-30 15:06:50 +02:00
271ce91b3b Add the rayon Threadpool to the index function parameter 2024-08-30 14:34:24 +02:00
54f2eb4507 Remove duplication of grenad merger 2024-08-30 14:34:05 +02:00
794ebcd582 Replace grenad with the new grenad various-improvement branch 2024-08-30 11:53:59 +02:00
b7c77c7a39 Use the latest version of the obkv crate 2024-08-30 11:53:59 +02:00
0c57cf7565 Replace obkv with the temporary new version of it 2024-08-30 11:53:58 +02:00
27df9e6c73 Introduce the indexer::index function that runs the indexation 2024-08-30 11:53:58 +02:00
45c060831e Introduce typed channels and the merger loop 2024-08-30 11:53:58 +02:00
874c1ac538 First channels types 2024-08-30 11:53:58 +02:00
e6ffa4d454 Implement the document merge function for the replace method 2024-08-30 11:53:58 +02:00
637a9c8bdd Implement the document merge function for the update method 2024-08-30 11:53:58 +02:00
c683fa98e6 WIP
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-08-30 11:53:57 +02:00
9a756cf2c5 Merge #4888
4888: bring back v1.10.0 into main r=Kerollmops a=ManyTheFish



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-08-27 14:02:08 +00:00
321639364f Merge #4861
4861: Make sure the index scheduler never stops running r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4748

## What does this PR do?
- Whatever happens, we always try to process tasks once every minute (if no tasks are enqueued that's practically free)

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-08-07 16:21:54 +00:00
442d06dce7 ensure the run function doesn't panic even if the tick function does 2024-08-07 17:50:32 +02:00
8f6a98df07 make sure the index scheduler never stops running 2024-08-07 17:06:43 +02:00
2d16d0aea1 Merge #4839
4839: In prometheus metrics return the route pattern instead of the real route when returning the HTTP requests total r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4825

## What does this PR do?
- return the route pattern instead of the real route when returning the HTTP requests total


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-08-05 10:14:51 +00:00
866922ecc3 Merge #4808
4808: Make the tests run faster r=irevoire a=irevoire

## Index-Scheduler

### Only check the consistency of the index-scheduler on snapshots when running in release mode

This saves 12s on the tests, and since the tests run in release mode in the CI, we don't lose any information.
From 28s to 16s

### We were snapshotting the index for no reason in `advance_till`, I removed this call

This saved an additional 8s on the tests, going from 16s to 8s.

----

After these two optimizations, the test suite as a whole executes 14% quicker

## Meilisearch integration tests

While profiling this test suite, nothing stands out. The only noticeable thing is that we're losing most of our time creating and dropping threads.
I made the theory that by sharing a single common instance between all integrations tests I would gain some time again.

In 355a7acd1c I saved another 15s by only testing this theory on the module that tests the error messages. 
But we can do it on many more tests. **We must take care of not making any test flaky, though**.

## Use two indexing threads

By moving from one to two indexing threads, we gain an additional 30% in performance.

# Conclusion

## Before

The execution of the test suite was taking around:
- 4m40s on my computer
- 15 minutes on the debug CI with cache
- 29 minutes on the Windows CI with cache

## After

The execution of the test suite is taking around:
- 2m20 on my computer
- 8 minutes on the debug CI with cache
- 29 minutes on the Windows CI with cache

## This means the test suite should now run ~50% faster on your computer; the CI may report errors twice faster, but we'll still wait for ~the same amount of time to merge a PR


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-07-30 15:11:30 +00:00
f05ea04879 In prometheus metrics return the route pattern instead of the real route when returning the HTTP requests total 2024-07-30 16:24:49 +02:00
c457069367 ensure a test is 100% not flaky 2024-07-30 15:41:51 +02:00
bb1283222e make clippy happy 2024-07-30 15:10:56 +02:00
7a5a38f870 fix a sync issue on empty indexes 2024-07-30 15:09:12 +02:00
ded3cd0dd6 an additionnal 30% of perf for the tests 2024-07-30 15:03:54 +02:00
68f885f1c4 fix two snapshots 2024-07-30 14:45:59 +02:00
9372c34dab prepare the tests to share indexes with api key 2024-07-30 14:34:11 +02:00
6666c57880 reduce the number of thread spawned by milli 2024-07-30 14:34:10 +02:00
b53a019b07 fix the initialization problem over the shared indexes with documents 2024-07-30 14:24:57 +02:00
d262b1df32 craft an API over the Shared Server and Shared index to avoid hard to debug mistakes 2024-07-30 14:24:57 +02:00
ed795bc837 fmt 2024-07-30 14:24:57 +02:00
993264227d reuse an index with already indexed documents instead of reindexing from scratch 2024-07-30 14:24:57 +02:00
953d3a44bd make the new_shared function synchronous and stop indexing documents when it's not required 2024-07-30 14:24:57 +02:00
e5345fb0eb shave off 15s by providing a shared instance to the integration tests 2024-07-30 14:24:55 +02:00
2d9a055fb9 stops snapshotting in advance_till when we don't need to 2024-07-30 13:57:12 +02:00
110dc01f40 only check the consistency of the index-scheduler on snapshots when running in release mode 2024-07-30 13:57:12 +02:00
169 changed files with 15008 additions and 6286 deletions

83
Cargo.lock generated
View File

@ -387,14 +387,14 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "arroy"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d"
dependencies = [
"bytemuck",
"byteorder",
"heed",
"log",
"memmap2",
"nohash",
"ordered-float",
"rand",
"rayon",
@ -471,7 +471,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "benchmarks"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"anyhow",
"bytes",
@ -652,7 +652,7 @@ dependencies = [
[[package]]
name = "build-info"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"anyhow",
"time",
@ -933,9 +933,8 @@ dependencies = [
[[package]]
name = "charabia"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03cd8f290cae94934cdd0103c14c2de9faf2d7d85be0d24d511af2bf1b14119d"
version = "0.9.1"
source = "git+https://github.com/meilisearch/charabia?branch=mutualize-char-normalizer#f8d8308cdb8db80819be7eeed5652cc4a995cc71"
dependencies = [
"aho-corasick",
"csv",
@ -1622,7 +1621,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"anyhow",
"big_s",
@ -1834,7 +1833,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
[[package]]
name = "file-store"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"tempfile",
"thiserror",
@ -1856,7 +1855,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"insta",
"nom",
@ -1876,7 +1875,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"criterion",
"serde_json",
@ -2000,7 +1999,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"arbitrary",
"clap",
@ -2221,11 +2220,11 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "grenad"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "350d89047298d3b1b40050acd11ab76e487b854a104b760ebc5a7f375093de77"
source = "git+https://github.com/meilisearch/grenad?branch=various-improvements#58ac87d852413571102f44c5e55ca13509a3f1a0"
dependencies = [
"bytemuck",
"byteorder",
"either",
"rayon",
"tempfile",
]
@ -2308,9 +2307,9 @@ dependencies = [
[[package]]
name = "hashbrown"
version = "0.14.3"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash 0.8.11",
"allocator-api2",
@ -2552,7 +2551,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]]
name = "index-scheduler"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"anyhow",
"arroy",
@ -2570,6 +2569,7 @@ dependencies = [
"meili-snap",
"meilisearch-auth",
"meilisearch-types",
"memmap2",
"page_size",
"rayon",
"roaring",
@ -2591,7 +2591,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
dependencies = [
"equivalent",
"hashbrown 0.14.3",
"hashbrown 0.14.5",
"serde",
]
@ -2650,8 +2650,7 @@ checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
[[package]]
name = "irg-kvariants"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef2af7c331f2536964a32b78a7d2e0963d78b42f4a76323b16cc7d94b1ddce26"
source = "git+https://github.com/meilisearch/charabia?branch=mutualize-char-normalizer#f8d8308cdb8db80819be7eeed5652cc4a995cc71"
dependencies = [
"csv",
"once_cell",
@ -2746,7 +2745,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"criterion",
"serde_json",
@ -3365,7 +3364,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"insta",
"md5",
@ -3374,7 +3373,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"actix-cors",
"actix-http",
@ -3463,7 +3462,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"base64 0.22.1",
"enum-iterator",
@ -3482,7 +3481,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"actix-web",
"anyhow",
@ -3512,7 +3511,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"anyhow",
"clap",
@ -3520,6 +3519,7 @@ dependencies = [
"file-store",
"meilisearch-auth",
"meilisearch-types",
"serde",
"time",
"uuid",
]
@ -3542,7 +3542,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"arroy",
"big_s",
@ -3566,6 +3566,7 @@ dependencies = [
"fxhash",
"geoutils",
"grenad",
"hashbrown 0.14.5",
"heed",
"hf-hub",
"indexmap",
@ -3685,6 +3686,12 @@ version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
[[package]]
name = "nohash"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca"
[[package]]
name = "nom"
version = "7.1.3"
@ -3829,9 +3836,8 @@ dependencies = [
[[package]]
name = "obkv"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2e27bcfe835a379d32352112f6b8dbae2d99d16a5fff42abe6e5ba5386c1e5a"
version = "0.3.0"
source = "git+https://github.com/kerollmops/obkv?branch=unsized-kvreader#ce535874008ecac554f02e0c670e6caf62134d6b"
[[package]]
name = "once_cell"
@ -3976,7 +3982,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "permissive-json-pointer"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"big_s",
"serde_json",
@ -4648,8 +4654,7 @@ dependencies = [
[[package]]
name = "roaring"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f4b84ba6e838ceb47b41de5194a60244fac43d9fe03b71dbe8c5a201081d6d1"
source = "git+https://github.com/RoaringBitmap/roaring-rs?branch=clone-iter-slice#6bba84b1a47da1d6e52d5c4dc0ce8593ae4646a5"
dependencies = [
"bytemuck",
"byteorder",
@ -4834,9 +4839,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
[[package]]
name = "serde"
version = "1.0.204"
version = "1.0.210"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
dependencies = [
"serde_derive",
]
@ -4852,9 +4857,9 @@ dependencies = [
[[package]]
name = "serde_derive"
version = "1.0.204"
version = "1.0.210"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
dependencies = [
"proc-macro2",
"quote",
@ -6035,7 +6040,7 @@ version = "0.16.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0"
dependencies = [
"hashbrown 0.14.3",
"hashbrown 0.14.5",
"once_cell",
]
@ -6361,7 +6366,7 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.10.0"
version = "1.11.0"
dependencies = [
"anyhow",
"build-info",

View File

@ -22,7 +22,7 @@ members = [
]
[workspace.package]
version = "1.10.0"
version = "1.11.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
@ -44,23 +44,5 @@ opt-level = 3
[profile.dev.package.roaring]
opt-level = 3
[profile.dev.package.lindera-ipadic-builder]
opt-level = 3
[profile.dev.package.encoding]
opt-level = 3
[profile.dev.package.yada]
opt-level = 3
[profile.release.package.lindera-ipadic-builder]
opt-level = 3
[profile.release.package.encoding]
opt-level = 3
[profile.release.package.yada]
opt-level = 3
[profile.bench.package.lindera-ipadic-builder]
opt-level = 3
[profile.bench.package.encoding]
opt-level = 3
[profile.bench.package.yada]
opt-level = 3
[patch.crates-io]
roaring = { git = "https://github.com/RoaringBitmap/roaring-rs", branch = "clone-iter-slice" }

View File

@ -45,14 +45,14 @@ See the list of all our example apps in our [demos repository](https://github.co
## ✨ Features
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
- **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **Easy to install, deploy, and maintain**

View File

@ -255,6 +255,8 @@ pub(crate) mod test {
}
"###);
insta::assert_json_snapshot!(vector_index.settings().unwrap());
{
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
let mut documents = documents.unwrap();

View File

@ -1,783 +1,56 @@
---
source: dump/src/reader/mod.rs
expression: document
expression: vector_index.settings().unwrap()
---
{
"id": "e3",
"desc": "overriden vector + map",
"_vectors": {
"default": [
0.2,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1
],
"toto": [
0.1
]
}
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"nonSeparatorTokens": [],
"separatorTokens": [],
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100,
"sortFacetValuesBy": {
"*": "alpha"
}
},
"pagination": {
"maxTotalHits": 1000
},
"embedders": {
"default": {
"source": "huggingFace",
"model": "BAAI/bge-base-en-v1.5",
"revision": "617ca489d9e86b49b8167676d8220688b99db36e",
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
}
},
"searchCutoffMs": null
}

View File

@ -0,0 +1,780 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e0",
"desc": "overriden vector",
"_vectors": {
"default": [
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1
]
}
}

View File

@ -27,6 +27,7 @@ pub enum Condition<'a> {
LowerThanOrEqual(Token<'a>),
Between { from: Token<'a>, to: Token<'a> },
Contains { keyword: Token<'a>, word: Token<'a> },
StartsWith { keyword: Token<'a>, word: Token<'a> },
}
/// condition = value ("==" | ">" ...) value
@ -121,6 +122,34 @@ pub fn parse_not_contains(input: Span) -> IResult<FilterCondition> {
))
}
/// starts with = value "CONTAINS" value
pub fn parse_starts_with(input: Span) -> IResult<FilterCondition> {
let (input, (fid, starts_with, value)) =
tuple((parse_value, tag("STARTS WITH"), cut(parse_value)))(input)?;
Ok((
input,
FilterCondition::Condition {
fid,
op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value },
},
))
}
/// starts with = value "NOT" WS+ "CONTAINS" value
pub fn parse_not_starts_with(input: Span) -> IResult<FilterCondition> {
let keyword = tuple((tag("NOT"), multispace1, tag("STARTS WITH")));
let (input, (fid, (_not, _spaces, starts_with), value)) =
tuple((parse_value, keyword, cut(parse_value)))(input)?;
Ok((
input,
FilterCondition::Not(Box::new(FilterCondition::Condition {
fid,
op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value },
})),
))
}
/// to = value value "TO" WS+ value
pub fn parse_to(input: Span) -> IResult<FilterCondition> {
let (input, (key, from, _, _, to)) =

View File

@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> {
}
ErrorKind::InvalidPrimary => {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
}
ErrorKind::InvalidEscapedNumber => {
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?

View File

@ -49,7 +49,7 @@ use std::fmt::Debug;
pub use condition::{parse_condition, parse_to, Condition};
use condition::{
parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null,
parse_is_null, parse_not_contains, parse_not_exists,
parse_is_null, parse_not_contains, parse_not_exists, parse_not_starts_with, parse_starts_with,
};
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
pub use error::{Error, ErrorKind};
@ -166,7 +166,8 @@ impl<'a> FilterCondition<'a> {
| Condition::LowerThan(_)
| Condition::LowerThanOrEqual(_)
| Condition::Between { .. } => None,
Condition::Contains { keyword, word: _ } => Some(keyword),
Condition::Contains { keyword, word: _ }
| Condition::StartsWith { keyword, word: _ } => Some(keyword),
},
FilterCondition::Not(this) => this.use_contains_operator(),
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
@ -484,6 +485,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
parse_to,
parse_contains,
parse_not_contains,
parse_starts_with,
parse_not_starts_with,
// the next lines are only for error handling and are written at the end to have the less possible performance impact
parse_geo,
parse_geo_distance,
@ -567,6 +570,7 @@ impl<'a> std::fmt::Display for Condition<'a> {
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
Condition::Between { from, to } => write!(f, "{from} TO {to}"),
Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"),
Condition::StartsWith { word, keyword: _ } => write!(f, "STARTS WITH {word}"),
}
}
}
@ -680,6 +684,13 @@ pub mod tests {
insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");
// Test STARTS WITH + NOT STARTS WITH
insta::assert_snapshot!(p("subscribers STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}");
insta::assert_snapshot!(p("NOT subscribers STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})");
insta::assert_snapshot!(p("subscribers NOT STARTS WITH hel"), @"NOT ({subscribers} STARTS WITH {hel})");
insta::assert_snapshot!(p("NOT subscribers NOT STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}");
insta::assert_snapshot!(p("subscribers NOT STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})");
// Test nested NOT
insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
@ -751,7 +762,7 @@ pub mod tests {
"###);
insta::assert_snapshot!(p("'OR'"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
1:5 'OR'
"###);
@ -761,12 +772,12 @@ pub mod tests {
"###);
insta::assert_snapshot!(p("channel Ponce"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
1:14 channel Ponce
"###);
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
19:19 channel = Ponce OR
"###);
@ -851,12 +862,12 @@ pub mod tests {
"###);
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
1:17 colour NOT EXIST
"###);
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
1:23 subscribers 100 TO1000
"###);
@ -919,35 +930,35 @@ pub mod tests {
"###);
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
1:11 value NULL
"###);
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
1:15 value NOT NULL
"###);
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
1:12 value EMPTY
"###);
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
1:16 value NOT EMPTY
"###);
insta::assert_snapshot!(p(r#"value IS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
1:9 value IS
"###);
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
1:13 value IS NOT
"###);
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
1:16 value IS EXISTS
"###);
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
1:20 value IS NOT EXISTS
"###);
}

View File

@ -212,6 +212,8 @@ fn is_keyword(s: &str) -> bool {
| "NULL"
| "EMPTY"
| "CONTAINS"
| "STARTS"
| "WITH"
| "_geoRadius"
| "_geoBoundingBox"
)

View File

@ -29,6 +29,7 @@ serde_json = { version = "1.0.120", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.10.1"
thiserror = "1.0.61"
memmap2 = "0.9.4"
time = { version = "0.3.36", features = [
"serde-well-known",
"formatting",
@ -40,7 +41,7 @@ ureq = "2.10.0"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
[dev-dependencies]
arroy = "0.4.0"
arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
big_s = "1.0.2"
crossbeam = "0.8.4"
insta = { version = "1.39.0", features = ["json", "redactions"] }

View File

@ -25,8 +25,9 @@ enum AutobatchKind {
primary_key: Option<String>,
},
DocumentEdition,
DocumentDeletion,
DocumentDeletionByFilter,
DocumentDeletion {
by_filter: bool,
},
DocumentClear,
Settings {
allow_index_creation: bool,
@ -65,10 +66,12 @@ impl From<KindWithContent> for AutobatchKind {
..
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
KindWithContent::DocumentDeletion { .. } => {
AutobatchKind::DocumentDeletion { by_filter: false }
}
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
KindWithContent::DocumentDeletionByFilter { .. } => {
AutobatchKind::DocumentDeletionByFilter
AutobatchKind::DocumentDeletion { by_filter: true }
}
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
AutobatchKind::Settings {
@ -105,9 +108,7 @@ pub enum BatchKind {
},
DocumentDeletion {
deletion_ids: Vec<TaskId>,
},
DocumentDeletionByFilter {
id: TaskId,
includes_by_filter: bool,
},
ClearAndSettings {
other: Vec<TaskId>,
@ -205,12 +206,13 @@ impl BatchKind {
allow_index_creation,
),
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
K::DocumentDeletion => {
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
}
K::DocumentDeletionByFilter => {
(Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false)
}
K::DocumentDeletion { by_filter: includes_by_filter } => (
Continue(BatchKind::DocumentDeletion {
deletion_ids: vec![task_id],
includes_by_filter,
}),
false,
),
K::Settings { allow_index_creation } => (
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
allow_index_creation,
@ -228,7 +230,7 @@ impl BatchKind {
match (self, kind) {
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::DocumentDeletionByFilter) => Break(this),
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this),
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
Break(this)
@ -264,7 +266,7 @@ impl BatchKind {
// The index deletion can batch with everything but must stop after
(
BatchKind::DocumentClear { mut ids }
| BatchKind::DocumentDeletion { deletion_ids: mut ids }
| BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ }
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
K::IndexDeletion,
@ -284,7 +286,7 @@ impl BatchKind {
(
BatchKind::DocumentClear { mut ids },
K::DocumentClear | K::DocumentDeletion,
K::DocumentClear | K::DocumentDeletion { by_filter: _ },
) => {
ids.push(id);
Continue(BatchKind::DocumentClear { ids })
@ -328,7 +330,7 @@ impl BatchKind {
}
(
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
K::DocumentDeletion,
K::DocumentDeletion { by_filter: false },
) => {
operation_ids.push(id);
@ -339,6 +341,13 @@ impl BatchKind {
operation_ids,
})
}
// We can't batch a document operation with a delete by filter
(
this @ BatchKind::DocumentOperation { .. },
K::DocumentDeletion { by_filter: true },
) => {
Break(this)
}
// but we can't autobatch documents if it's not the same kind
// this match branch MUST be AFTER the previous one
(
@ -357,13 +366,18 @@ impl BatchKind {
operation_ids,
}),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => {
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentClear { ids: deletion_ids })
}
// we can't autobatch the deletion and import if the document deletion contained a filter
(
this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true },
K::DocumentImport { .. }
) => Break(this),
// we can autobatch the deletion and import if the index already exists
(
BatchKind::DocumentDeletion { mut deletion_ids },
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
K::DocumentImport { method, allow_index_creation, primary_key }
) if index_already_exists => {
deletion_ids.push(id);
@ -377,7 +391,7 @@ impl BatchKind {
}
// we can autobatch the deletion and import if both can't create an index
(
BatchKind::DocumentDeletion { mut deletion_ids },
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
K::DocumentImport { method, allow_index_creation, primary_key }
) if !allow_index_creation => {
deletion_ids.push(id);
@ -396,9 +410,9 @@ impl BatchKind {
) => {
Break(this)
}
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => {
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentDeletion { deletion_ids })
Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter })
}
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
@ -412,7 +426,7 @@ impl BatchKind {
}),
(
this @ BatchKind::Settings { .. },
K::DocumentImport { .. } | K::DocumentDeletion,
K::DocumentImport { .. } | K::DocumentDeletion { .. },
) => Break(this),
(
BatchKind::Settings { mut settings_ids, allow_index_creation },
@ -443,7 +457,7 @@ impl BatchKind {
settings_ids,
allow_index_creation,
},
K::DocumentDeletion,
K::DocumentDeletion { .. },
) => {
other.push(id);
Continue(BatchKind::ClearAndSettings {
@ -505,7 +519,7 @@ impl BatchKind {
// this MUST be AFTER the two previous branch
(
this @ BatchKind::SettingsAndDocumentOperation { .. },
K::DocumentDeletion | K::DocumentImport { .. },
K::DocumentDeletion { .. } | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
@ -525,8 +539,7 @@ impl BatchKind {
| BatchKind::IndexDeletion { .. }
| BatchKind::IndexUpdate { .. }
| BatchKind::IndexSwap { .. }
| BatchKind::DocumentEdition { .. }
| BatchKind::DocumentDeletionByFilter { .. },
| BatchKind::DocumentEdition { .. },
_,
) => {
unreachable!()
@ -616,6 +629,13 @@ mod tests {
}
}
fn doc_del_fil() -> KindWithContent {
KindWithContent::DocumentDeletionByFilter {
index_uid: String::from("doggo"),
filter_expr: serde_json::json!("cuteness > 100"),
}
}
fn doc_clr() -> KindWithContent {
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
}
@ -676,10 +696,16 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// we can autobatch one or multiple DocumentDeletion together
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
// we can autobatch one or multiple DocumentDeletionByFilter together
debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
// we can autobatch one or multiple Settings together
debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
@ -722,25 +748,63 @@ mod tests {
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
// But we can't autobatch document addition with document deletion by filter
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
// And the other way around
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
}
#[test]
fn simple_document_operation_dont_autobatch_with_other() {
// addition, updates and deletion can't batch together
// addition, updates and deletion by filter can't batch together
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
}
#[test]
@ -807,6 +871,7 @@ mod tests {
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
@ -816,6 +881,7 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
@ -827,6 +893,7 @@ mod tests {
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
@ -836,6 +903,7 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
@ -901,10 +969,10 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// batch deletion and addition
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
}
#[test]

View File

@ -28,6 +28,9 @@ use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::new::indexer::{
self, retrieve_or_guess_primary_key, DocumentChanges,
};
use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
};
@ -110,9 +113,9 @@ pub(crate) enum IndexOperation {
index_uid: String,
task: Task,
},
IndexDocumentDeletionByFilter {
DocumentDeletion {
index_uid: String,
task: Task,
tasks: Vec<Task>,
},
DocumentClear {
index_uid: String,
@ -165,11 +168,11 @@ impl Batch {
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. }
| IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentDeletion { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => {
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
}
IndexOperation::DocumentEdition { task, .. }
| IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
IndexOperation::DocumentEdition { task, .. } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
IndexOperation::SettingsAndDocumentOperation {
@ -234,7 +237,7 @@ impl IndexOperation {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentEdition { index_uid, .. }
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
@ -252,8 +255,8 @@ impl fmt::Display for IndexOperation {
IndexOperation::DocumentEdition { .. } => {
f.write_str("IndexOperation::DocumentEdition")
}
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
IndexOperation::DocumentDeletion { .. } => {
f.write_str("IndexOperation::DocumentDeletion")
}
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
@ -289,21 +292,6 @@ impl IndexScheduler {
},
must_create_index,
})),
BatchKind::DocumentDeletionByFilter { id } => {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
match &task.kind {
KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
Ok(Some(Batch::IndexOperation {
op: IndexOperation::IndexDocumentDeletionByFilter {
index_uid: index_uid.clone(),
task,
},
must_create_index: false,
}))
}
_ => unreachable!(),
}
}
BatchKind::DocumentEdition { id } => {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
match &task.kind {
@ -366,30 +354,11 @@ impl IndexScheduler {
must_create_index,
}))
}
BatchKind::DocumentDeletion { deletion_ids } => {
BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => {
let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;
let mut operations = Vec::with_capacity(tasks.len());
let mut documents_counts = Vec::with_capacity(tasks.len());
for task in &tasks {
match task.kind {
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
operations.push(DocumentOperation::Delete(documents_ids.clone()));
documents_counts.push(documents_ids.len() as u64);
}
_ => unreachable!(),
}
}
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentOperation {
index_uid,
primary_key: None,
method: IndexDocumentsMethod::ReplaceDocuments,
documents_counts,
operations,
tasks,
},
op: IndexOperation::DocumentDeletion { index_uid, tasks },
must_create_index,
}))
}
@ -909,10 +878,8 @@ impl IndexScheduler {
while let Some(doc) =
cursor.next_document().map_err(milli::Error::from)?
{
dump_content_file.push_document(&obkv_to_object(
&doc,
&documents_batch_index,
)?)?;
dump_content_file
.push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
}
dump_content_file.flush()?;
}
@ -1286,58 +1253,52 @@ impl IndexScheduler {
let must_stop_processing = self.must_stop_processing.clone();
let indexer_config = self.index_mapper.indexer_config();
if let Some(primary_key) = primary_key {
match index.primary_key(index_wtxn)? {
// if a primary key was set AND had already been defined in the index
// but to a different value, we can make the whole batch fail.
Some(pk) => {
if primary_key != pk {
return Err(milli::Error::from(
milli::UserError::PrimaryKeyCannotBeChanged(pk.to_string()),
)
.into());
}
}
// if the primary key was set and there was no primary key set for this index
// we set it to the received value before starting the indexing process.
None => {
let mut builder =
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.set_primary_key(primary_key);
builder.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.clone().get(),
)?;
primary_key_has_been_set = true;
/// TODO manage errors correctly
let rtxn = index.read_txn()?;
let first_addition_uuid = operations
.iter()
.find_map(|op| match op {
DocumentOperation::Add(content_uuid) => Some(content_uuid),
_ => None,
})
.unwrap();
let mut content_files = Vec::new();
for operation in &operations {
if let DocumentOperation::Add(content_uuid) = operation {
let content_file = self.file_store.get_update(*content_uuid)?;
let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
if !mmap.is_empty() {
content_files.push(mmap);
}
}
}
let config = IndexDocumentsConfig { update_method: method, ..Default::default() };
let mut fields_ids_map = index.fields_ids_map(&rtxn)?;
let first_document = match content_files.first() {
Some(mmap) => {
let mut iter = serde_json::Deserializer::from_slice(mmap).into_iter();
iter.next().transpose().map_err(|e| e.into()).map_err(Error::IoError)?
}
None => None,
};
let embedder_configs = index.embedding_configs(index_wtxn)?;
// TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
let embedders = self.embedders(embedder_configs)?;
let mut builder = milli::update::IndexDocuments::new(
index_wtxn,
let primary_key = retrieve_or_guess_primary_key(
&rtxn,
index,
indexer_config,
config,
|indexing_step| tracing::trace!(?indexing_step, "Update"),
|| must_stop_processing.get(),
)?;
&mut fields_ids_map,
first_document.as_ref(),
)?
.unwrap();
let mut content_files_iter = content_files.iter();
let mut indexer = indexer::DocumentOperation::new(method);
for (operation, task) in operations.into_iter().zip(tasks.iter_mut()) {
match operation {
DocumentOperation::Add(content_uuid) => {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
builder = builder.with_embedders(embedders.clone());
DocumentOperation::Add(_content_uuid) => {
let mmap = content_files_iter.next().unwrap();
let stats = indexer.add_documents(mmap)?;
// builder = builder.with_embedders(embedders.clone());
let received_documents =
if let Some(Details::DocumentAdditionOrUpdate {
@ -1351,30 +1312,17 @@ impl IndexScheduler {
unreachable!();
};
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(count),
})
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(stats.document_count as u64),
})
}
DocumentOperation::Delete(document_ids) => {
let (new_builder, user_result) =
builder.remove_documents(document_ids)?;
builder = new_builder;
let count = document_ids.len();
indexer.delete_documents(document_ids);
// Uses Invariant: remove documents actually always returns Ok for the inner result
let count = user_result.unwrap();
// let count = user_result.unwrap();
let provided_ids =
if let Some(Details::DocumentDeletion { provided_ids, .. }) =
task.details
@ -1388,26 +1336,35 @@ impl IndexScheduler {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(count),
deleted_documents: Some(count as u64),
});
}
}
}
if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?;
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
} else if primary_key_has_been_set {
// Everything failed but we've set a primary key.
// We need to remove it.
let mut builder =
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.reset_primary_key();
builder.execute(
|indexing_step| tracing::trace!(update = ?indexing_step),
|| must_stop_processing.clone().get(),
)?;
/// TODO create a pool if needed
// let pool = indexer_config.thread_pool.unwrap();
let pool = rayon::ThreadPoolBuilder::new().build().unwrap();
let param = (index, &rtxn, &primary_key);
let document_changes = indexer.document_changes(&mut fields_ids_map, param)?;
/// TODO pass/write the FieldsIdsMap
indexer::index(index_wtxn, index, fields_ids_map, &pool, document_changes)?;
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
}
// else if primary_key_has_been_set {
// // Everything failed but we've set a primary key.
// // We need to remove it.
// let mut builder =
// milli::update::Settings::new(index_wtxn, index, indexer_config);
// builder.reset_primary_key();
// builder.execute(
// |indexing_step| tracing::trace!(update = ?indexing_step),
// || must_stop_processing.clone().get(),
// )?;
// }
Ok(tasks)
}
@ -1439,7 +1396,7 @@ impl IndexScheduler {
{
(original_filter, context, function)
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
// In the case of a `documentEdition` the details MUST be set
unreachable!();
};
@ -1469,52 +1426,102 @@ impl IndexScheduler {
Ok(vec![task])
}
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let filter =
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
&task.kind
{
filter_expr
} else {
unreachable!()
};
let deleted_documents = delete_document_by_filter(
index_wtxn,
filter,
self.index_mapper.indexer_config(),
self.must_stop_processing.clone(),
index,
);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
}) = task.details
{
original_filter
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
IndexOperation::DocumentDeletion { mut tasks, index_uid: _ } => {
let mut to_delete = RoaringBitmap::new();
let external_documents_ids = index.external_documents_ids();
match deleted_documents {
Ok(deleted_documents) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(deleted_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(0),
});
task.error = Some(e.into());
for task in tasks.iter_mut() {
let before = to_delete.len();
task.status = Status::Succeeded;
match &task.kind {
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
for id in documents_ids {
if let Some(id) = external_documents_ids.get(index_wtxn, id)? {
to_delete.insert(id);
}
}
let will_be_removed = to_delete.len() - before;
task.details = Some(Details::DocumentDeletion {
provided_ids: documents_ids.len(),
deleted_documents: Some(will_be_removed),
});
}
KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
let before = to_delete.len();
let filter = match Filter::from_json(filter_expr) {
Ok(filter) => filter,
Err(err) => {
// theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens
task.status = Status::Failed;
task.error = match err {
milli::Error::UserError(
milli::UserError::InvalidFilterExpression { .. },
) => Some(
Error::from(err)
.with_custom_error_code(Code::InvalidDocumentFilter)
.into(),
),
e => Some(e.into()),
};
None
}
};
if let Some(filter) = filter {
let candidates =
filter.evaluate(index_wtxn, index).map_err(|err| match err {
milli::Error::UserError(
milli::UserError::InvalidFilter(_),
) => Error::from(err)
.with_custom_error_code(Code::InvalidDocumentFilter),
e => e.into(),
});
match candidates {
Ok(candidates) => to_delete |= candidates,
Err(err) => {
task.status = Status::Failed;
task.error = Some(err.into());
}
};
}
let will_be_removed = to_delete.len() - before;
if let Some(Details::DocumentDeletionByFilter {
original_filter: _,
deleted_documents,
}) = &mut task.details
{
*deleted_documents = Some(will_be_removed);
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!()
}
}
_ => unreachable!(),
}
}
Ok(vec![task])
let config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let must_stop_processing = self.must_stop_processing.clone();
let mut builder = milli::update::IndexDocuments::new(
index_wtxn,
index,
self.index_mapper.indexer_config(),
config,
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)?;
let (new_builder, _count) =
builder.remove_documents_from_db_no_batch(&to_delete)?;
builder = new_builder;
let _ = builder.execute()?;
Ok(tasks)
}
IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
let indexer_config = self.index_mapper.indexer_config();
@ -1718,46 +1725,6 @@ impl IndexScheduler {
}
}
fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a>,
filter: &serde_json::Value,
indexer_config: &IndexerConfig,
must_stop_processing: MustStopProcessing,
index: &'a Index,
) -> Result<u64> {
let filter = Filter::from_json(filter)?;
Ok(if let Some(filter) = filter {
let candidates = filter.evaluate(wtxn, index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
}
e => e.into(),
})?;
let config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let mut builder = milli::update::IndexDocuments::new(
wtxn,
index,
indexer_config,
config,
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)?;
let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?;
builder = new_builder;
let _ = builder.execute()?;
count
} else {
0
})
}
fn edit_documents_by_function<'a>(
wtxn: &mut RwTxn<'a>,
filter: &Option<serde_json::Value>,

View File

@ -87,7 +87,7 @@ impl RoFeatures {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Using `CONTAINS` in a filter",
disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter",
feature: "contains filter",
issue_link: "https://github.com/orgs/meilisearch/discussions/763",
}

View File

@ -11,6 +11,9 @@ use crate::index_mapper::IndexMapper;
use crate::{IndexScheduler, Kind, Status, BEI128};
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
// Since we'll snapshot the index right afterward, we don't need to ensure it's internally consistent for every run.
// We can only do it for the release run, where the function runs way faster.
#[cfg(not(debug_assertions))]
scheduler.assert_internally_consistent();
let IndexScheduler {

View File

@ -35,6 +35,7 @@ pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap};
use std::io::{self, BufReader, Read};
use std::ops::{Bound, RangeBounds};
use std::panic::{catch_unwind, AssertUnwindSafe};
use std::path::{Path, PathBuf};
use std::sync::atomic::Ordering::{self, Relaxed};
use std::sync::atomic::{AtomicBool, AtomicU32};
@ -612,19 +613,24 @@ impl IndexScheduler {
#[cfg(test)]
run.breakpoint(Breakpoint::Init);
run.wake_up.wait();
run.wake_up.wait_timeout(std::time::Duration::from_secs(60));
loop {
match run.tick() {
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => {
let ret = catch_unwind(AssertUnwindSafe(|| run.tick()));
match ret {
Ok(Ok(TickOutcome::TickAgain(_))) => (),
Ok(Ok(TickOutcome::WaitForSignal)) => run.wake_up.wait(),
Ok(Err(e)) => {
tracing::error!("{e}");
// Wait one second when an irrecoverable error occurs.
if !e.is_recoverable() {
std::thread::sleep(Duration::from_secs(1));
}
}
Err(_panic) => {
tracing::error!("Internal error: Unexpected panic in the `IndexScheduler::run` method.");
}
}
}
})
@ -1471,7 +1477,7 @@ impl IndexScheduler {
.map(
|IndexEmbeddingConfig {
name,
config: milli::vector::EmbeddingConfig { embedder_options, prompt },
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
..
}| {
let prompt =
@ -1480,7 +1486,10 @@ impl IndexScheduler {
{
let embedders = self.embedders.read().unwrap();
if let Some(embedder) = embedders.get(&embedder_options) {
return Ok((name, (embedder.clone(), prompt)));
return Ok((
name,
(embedder.clone(), prompt, quantized.unwrap_or_default()),
));
}
}
@ -1494,7 +1503,7 @@ impl IndexScheduler {
let mut embedders = self.embedders.write().unwrap();
embedders.insert(embedder_options, embedder.clone());
}
Ok((name, (embedder, prompt)))
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
},
)
.collect();
@ -1758,6 +1767,7 @@ mod tests {
use crossbeam::channel::RecvTimeoutError;
use file_store::File;
use insta::assert_json_snapshot;
use maplit::btreeset;
use meili_snap::{json_string, snapshot};
use meilisearch_auth::AuthFilter;
use meilisearch_types::document_formats::DocumentFormatError;
@ -2000,11 +2010,13 @@ mod tests {
fn advance_till(&mut self, breakpoints: impl IntoIterator<Item = Breakpoint>) {
for breakpoint in breakpoints {
let b = self.advance();
let state = snapshot_index_scheduler(&self.index_scheduler);
assert_eq!(
b, breakpoint,
"Was expecting the breakpoint `{:?}` but instead got `{:?}`.\n{state}",
breakpoint, b
b,
breakpoint,
"Was expecting the breakpoint `{:?}` but instead got `{:?}`.\n{}",
breakpoint,
b,
snapshot_index_scheduler(&self.index_scheduler)
);
}
}
@ -2028,7 +2040,6 @@ mod tests {
// Wait for one successful batch.
#[track_caller]
fn advance_one_successful_batch(&mut self) {
self.index_scheduler.assert_internally_consistent();
self.advance_till([Start, BatchCreated]);
loop {
match self.advance() {
@ -2047,7 +2058,6 @@ mod tests {
}
self.advance_till([AfterProcessing]);
self.index_scheduler.assert_internally_consistent();
}
// Wait for one failed batch.
@ -2547,6 +2557,117 @@ mod tests {
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn fail_in_process_batch_for_document_deletion() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
use meilisearch_types::settings::{Settings, Unchecked};
let mut new_settings: Box<Settings<Unchecked>> = Box::default();
new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto")));
index_scheduler
.register(
KindWithContent::SettingsUpdate {
index_uid: S("doggos"),
new_settings,
is_deletion: false,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), &mut file).unwrap();
file.persist().unwrap();
index_scheduler
.register(
KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_setting_and_document_addition");
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_settings");
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_documents");
index_scheduler
.register(
KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1")],
},
None,
false,
)
.unwrap();
// This one should not be catched by Meilisearch but it's still nice to handle it because if one day we break the filters it could happens
index_scheduler
.register(
KindWithContent::DocumentDeletionByFilter {
index_uid: S("doggos"),
filter_expr: serde_json::json!(true),
},
None,
false,
)
.unwrap();
// Should fail because the ids are not filterable
index_scheduler
.register(
KindWithContent::DocumentDeletionByFilter {
index_uid: S("doggos"),
filter_expr: serde_json::json!("id = 2"),
},
None,
false,
)
.unwrap();
index_scheduler
.register(
KindWithContent::DocumentDeletionByFilter {
index_uid: S("doggos"),
filter_expr: serde_json::json!("catto EXISTS"),
},
None,
false,
)
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_document_deletions");
// Everything should be batched together
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_documents");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork");
}
#[test]
fn do_not_batch_task_of_different_indexes() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
@ -5079,7 +5200,7 @@ mod tests {
let simple_hf_name = name.clone();
let configs = index_scheduler.embedders(configs).unwrap();
let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap();
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
@ -5397,7 +5518,11 @@ mod tests {
),
prompt: PromptData {
template: "{{doc.doggo}}",
max_bytes: Some(
400,
),
},
quantized: None,
},
user_provided: RoaringBitmap<[1, 2]>,
},
@ -5410,28 +5535,8 @@ mod tests {
// the document with the id 3 should keep its original embedding
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
let mut embeddings = Vec::new();
'vectors: for i in 0..=u8::MAX {
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata(_) => Ok(None),
e => Err(e),
})
.transpose();
let Some(reader) = reader else {
break 'vectors;
};
let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap();
if let Some(embedding) = embedding {
embeddings.push(embedding)
} else {
break 'vectors;
}
}
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embeddings = &embeddings["my_doggo_embedder"];
snapshot!(embeddings.len(), @"1");
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
@ -5611,8 +5716,12 @@ mod tests {
},
),
prompt: PromptData {
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
max_bytes: Some(
400,
),
},
quantized: None,
},
user_provided: RoaringBitmap<[0]>,
},
@ -5651,8 +5760,12 @@ mod tests {
},
),
prompt: PromptData {
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
max_bytes: Some(
400,
),
},
quantized: None,
},
user_provided: RoaringBitmap<[]>,
},

View File

@ -0,0 +1,44 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,43 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,43 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,56 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
3 {uid: 3, status: failed, error: ResponseError { code: 200, message: "Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
5 {uid: 5, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,5,]
failed [3,4,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [2,3,4,5,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,2,3,4,5,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,3,4,5,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,3,4,5,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,9 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 3,
"doggo": "bork"
}
]

View File

@ -0,0 +1,53 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
4 {uid: 4, status: enqueued, details: { original_filter: "id = 2", deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
5 {uid: 5, status: enqueued, details: { original_filter: "catto EXISTS", deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
----------------------------------------------------------------------
### Status:
enqueued [2,3,4,5,]
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [2,3,4,5,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,2,3,4,5,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,39 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@ -66,3 +66,5 @@ khmer = ["milli/khmer"]
vietnamese = ["milli/vietnamese"]
# force swedish character recomposition
swedish-recomposition = ["milli/swedish-recomposition"]
# force german character recomposition
german = ["milli/german"]

View File

@ -1,20 +1,22 @@
use std::fmt::{self, Debug, Display};
use std::fs::File;
use std::io::{self, BufWriter, Write};
use std::io::{self, BufWriter};
use std::marker::PhantomData;
use memmap2::MmapOptions;
use milli::documents::{DocumentsBatchBuilder, Error};
use memmap2::Mmap;
use milli::documents::Error;
use milli::update::new::TopLevelMap;
use milli::Object;
use serde::de::{SeqAccess, Visitor};
use serde::{Deserialize, Deserializer};
use serde_json::error::Category;
use serde_json::{to_writer, Map, Value};
use crate::error::{Code, ErrorCode};
type Result<T> = std::result::Result<T, DocumentFormatError>;
#[derive(Debug)]
#[derive(Debug, Clone, Copy)]
pub enum PayloadType {
Ndjson,
Json,
@ -88,6 +90,26 @@ impl From<(PayloadType, Error)> for DocumentFormatError {
}
}
impl From<(PayloadType, serde_json::Error)> for DocumentFormatError {
fn from((ty, error): (PayloadType, serde_json::Error)) -> Self {
if error.classify() == Category::Data {
Self::Io(error.into())
} else {
Self::MalformedPayload(Error::Json(error), ty)
}
}
}
impl From<(PayloadType, csv::Error)> for DocumentFormatError {
fn from((ty, error): (PayloadType, csv::Error)) -> Self {
if error.is_io_error() {
Self::Io(error.into())
} else {
Self::MalformedPayload(Error::Csv(error), ty)
}
}
}
impl From<io::Error> for DocumentFormatError {
fn from(error: io::Error) -> Self {
Self::Io(error)
@ -103,67 +125,140 @@ impl ErrorCode for DocumentFormatError {
}
}
/// Reads CSV from input and write an obkv batch to writer.
pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
let mmap = unsafe { MmapOptions::new().map(file)? };
let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
let count = builder.documents_count();
let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
Ok(count as u64)
// TODO remove that from the place I've borrowed it
#[derive(Debug)]
enum AllowedType {
String,
Boolean,
Number,
}
/// Reads JSON from temporary file and write an obkv batch to writer.
pub fn read_json(file: &File, writer: impl Write) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
let mmap = unsafe { MmapOptions::new().map(file)? };
let mut deserializer = serde_json::Deserializer::from_slice(&mmap);
fn parse_csv_header(header: &str) -> (&str, AllowedType) {
// if there are several separators we only split on the last one.
match header.rsplit_once(':') {
Some((field_name, field_type)) => match field_type {
"string" => (field_name, AllowedType::String),
"boolean" => (field_name, AllowedType::Boolean),
"number" => (field_name, AllowedType::Number),
// if the pattern isn't recognized, we keep the whole field.
_otherwise => (header, AllowedType::String),
},
None => (header, AllowedType::String),
}
}
match array_each(&mut deserializer, |obj| builder.append_json_object(&obj)) {
/// Reads CSV from file and write it in NDJSON in a file checking it along the way.
pub fn read_csv(input: &File, output: impl io::Write, delimiter: u8) -> Result<u64> {
let ptype = PayloadType::Csv { delimiter };
let mut output = BufWriter::new(output);
let mut reader = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(input);
let headers = reader.headers().map_err(|e| DocumentFormatError::from((ptype, e)))?.clone();
let typed_fields: Vec<_> = headers.iter().map(parse_csv_header).collect();
let mut object: Map<_, _> =
typed_fields.iter().map(|(k, _)| (k.to_string(), Value::Null)).collect();
let mut line = 0;
let mut record = csv::StringRecord::new();
while reader.read_record(&mut record).map_err(|e| DocumentFormatError::from((ptype, e)))? {
// We increment here and not at the end of the loop
// to take the header offset into account.
line += 1;
// Reset the document values
object.iter_mut().for_each(|(_, v)| *v = Value::Null);
for (i, (name, atype)) in typed_fields.iter().enumerate() {
let value = &record[i];
let trimmed_value = value.trim();
let value = match atype {
AllowedType::Number if trimmed_value.is_empty() => Value::Null,
AllowedType::Number => match trimmed_value.parse::<i64>() {
Ok(integer) => Value::from(integer),
Err(_) => match trimmed_value.parse::<f64>() {
Ok(float) => Value::from(float),
Err(error) => {
return Err(DocumentFormatError::MalformedPayload(
Error::ParseFloat { error, line, value: value.to_string() },
ptype,
))
}
},
},
AllowedType::Boolean if trimmed_value.is_empty() => Value::Null,
AllowedType::Boolean => match trimmed_value.parse::<bool>() {
Ok(bool) => Value::from(bool),
Err(error) => {
return Err(DocumentFormatError::MalformedPayload(
Error::ParseBool { error, line, value: value.to_string() },
ptype,
))
}
},
AllowedType::String if value.is_empty() => Value::Null,
AllowedType::String => Value::from(value),
};
*object.get_mut(*name).expect("encountered an unknown field") = value;
}
to_writer(&mut output, &object).map_err(|e| DocumentFormatError::from((ptype, e)))?;
}
Ok(line as u64)
}
/// Reads JSON from file and write it in NDJSON in a file checking it along the way.
pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
// We memory map to be able to deserailize into a TopLevelMap<'pl> that
// does not allocate when possible and only materialize the first/top level.
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
let mut out = BufWriter::new(output);
let mut deserializer = serde_json::Deserializer::from_slice(&input);
let count = match array_each(&mut deserializer, |obj: TopLevelMap| to_writer(&mut out, &obj)) {
// The json data has been deserialized and does not need to be processed again.
// The data has been transferred to the writer during the deserialization process.
Ok(Ok(_)) => (),
Ok(Err(e)) => return Err(DocumentFormatError::Io(e)),
Ok(Ok(count)) => count,
Ok(Err(e)) => return Err(DocumentFormatError::from((PayloadType::Json, e))),
Err(e) => {
// Attempt to deserialize a single json string when the cause of the exception is not Category.data
// Other types of deserialisation exceptions are returned directly to the front-end
if e.classify() != serde_json::error::Category::Data {
return Err(DocumentFormatError::MalformedPayload(
Error::Json(e),
PayloadType::Json,
));
if e.classify() != Category::Data {
return Err(DocumentFormatError::from((PayloadType::Json, e)));
}
let content: Object = serde_json::from_slice(&mmap)
let content: Object = serde_json::from_slice(&input)
.map_err(Error::Json)
.map_err(|e| (PayloadType::Json, e))?;
builder.append_json_object(&content).map_err(DocumentFormatError::Io)?;
to_writer(&mut out, &content)
.map(|_| 1)
.map_err(|e| DocumentFormatError::from((PayloadType::Json, e)))?
}
};
match out.into_inner() {
Ok(_) => Ok(count),
Err(ie) => Err(DocumentFormatError::Io(ie.into_error())),
}
let count = builder.documents_count();
let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
Ok(count as u64)
}
/// Reads JSON from temporary file and write an obkv batch to writer.
pub fn read_ndjson(file: &File, writer: impl Write) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
let mmap = unsafe { MmapOptions::new().map(file)? };
/// Reads NDJSON from file and write it in NDJSON in a file checking it along the way.
pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> {
// We memory map to be able to deserailize into a TopLevelMap<'pl> that
// does not allocate when possible and only materialize the first/top level.
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
let mut output = BufWriter::new(output);
for result in serde_json::Deserializer::from_slice(&mmap).into_iter() {
let object = result.map_err(Error::Json).map_err(|e| (PayloadType::Ndjson, e))?;
builder.append_json_object(&object).map_err(Into::into).map_err(DocumentFormatError::Io)?;
let mut count = 0;
for result in serde_json::Deserializer::from_slice(&input).into_iter() {
count += 1;
result
.and_then(|map: TopLevelMap| to_writer(&mut output, &map))
.map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
}
let count = builder.documents_count();
let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Io)?;
Ok(count as u64)
Ok(count)
}
/// The actual handling of the deserialization process in serde
@ -172,20 +267,23 @@ pub fn read_ndjson(file: &File, writer: impl Write) -> Result<u64> {
/// ## References
/// <https://serde.rs/stream-array.html>
/// <https://github.com/serde-rs/json/issues/160>
fn array_each<'de, D, T, F>(deserializer: D, f: F) -> std::result::Result<io::Result<u64>, D::Error>
fn array_each<'de, D, T, F>(
deserializer: D,
f: F,
) -> std::result::Result<serde_json::Result<u64>, D::Error>
where
D: Deserializer<'de>,
T: Deserialize<'de>,
F: FnMut(T) -> io::Result<()>,
F: FnMut(T) -> serde_json::Result<()>,
{
struct SeqVisitor<T, F>(F, PhantomData<T>);
impl<'de, T, F> Visitor<'de> for SeqVisitor<T, F>
where
T: Deserialize<'de>,
F: FnMut(T) -> io::Result<()>,
F: FnMut(T) -> serde_json::Result<()>,
{
type Value = io::Result<u64>;
type Value = serde_json::Result<u64>;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a nonempty sequence")
@ -194,7 +292,7 @@ where
fn visit_seq<A>(
mut self,
mut seq: A,
) -> std::result::Result<io::Result<u64>, <A as SeqAccess<'de>>::Error>
) -> std::result::Result<serde_json::Result<u64>, <A as SeqAccess<'de>>::Error>
where
A: SeqAccess<'de>,
{
@ -203,7 +301,7 @@ where
match self.0(value) {
Ok(()) => max += 1,
Err(e) => return Ok(Err(e)),
};
}
}
Ok(Ok(max))
}

View File

@ -238,8 +238,14 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
@ -388,7 +394,11 @@ impl ErrorCode for milli::Error {
| UserError::InvalidOpenAiModelDimensionsMax { .. }
| UserError::InvalidSettingsDimensions { .. }
| UserError::InvalidUrl { .. }
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
| UserError::InvalidPrompt(_)
| UserError::InvalidDisableBinaryQuantization { .. } => {
Code::InvalidSettingsEmbedders
}
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,

View File

@ -1,3 +1,4 @@
use std::borrow::Borrow;
use std::error::Error;
use std::fmt;
use std::str::FromStr;
@ -8,7 +9,7 @@ use crate::error::{Code, ErrorCode};
/// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
/// bytes long
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord)]
#[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)]
pub struct IndexUid(String);
@ -70,6 +71,12 @@ impl From<IndexUid> for String {
}
}
impl Borrow<String> for IndexUid {
fn borrow(&self) -> &String {
&self.0
}
}
#[derive(Debug)]
pub struct IndexUidFormatError {
pub invalid_uid: String,

View File

@ -1,134 +1,6 @@
use deserr::Deserr;
use milli::LocalizedAttributesRule;
use serde::{Deserialize, Serialize};
use serde_json::json;
/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language.
///
/// this enum implements `Deserr` in order to be used in the API.
macro_rules! make_locale {
($($language:tt), +) => {
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)]
#[deserr(rename_all = camelCase)]
#[serde(rename_all = "camelCase")]
pub enum Locale {
$($language),+,
}
impl From<milli::tokenizer::Language> for Locale {
fn from(other: milli::tokenizer::Language) -> Locale {
match other {
$(milli::tokenizer::Language::$language => Locale::$language), +
}
}
}
impl From<Locale> for milli::tokenizer::Language {
fn from(other: Locale) -> milli::tokenizer::Language {
match other {
$(Locale::$language => milli::tokenizer::Language::$language), +,
}
}
}
#[derive(Debug)]
pub struct LocaleFormatError {
pub invalid_locale: String,
}
impl std::fmt::Display for LocaleFormatError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::<Vec<_>>().join(", ");
write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales)
}
}
};
}
make_locale! {
Epo,
Eng,
Rus,
Cmn,
Spa,
Por,
Ita,
Ben,
Fra,
Deu,
Ukr,
Kat,
Ara,
Hin,
Jpn,
Heb,
Yid,
Pol,
Amh,
Jav,
Kor,
Nob,
Dan,
Swe,
Fin,
Tur,
Nld,
Hun,
Ces,
Ell,
Bul,
Bel,
Mar,
Kan,
Ron,
Slv,
Hrv,
Srp,
Mkd,
Lit,
Lav,
Est,
Tam,
Vie,
Urd,
Tha,
Guj,
Uzb,
Pan,
Aze,
Ind,
Tel,
Pes,
Mal,
Ori,
Mya,
Nep,
Sin,
Khm,
Tuk,
Aka,
Zul,
Sna,
Afr,
Lat,
Slk,
Cat,
Tgl,
Hye
}
impl std::error::Error for LocaleFormatError {}
impl std::str::FromStr for Locale {
type Err = LocaleFormatError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
milli::tokenizer::Language::from_code(s)
.map(Self::from)
.ok_or(LocaleFormatError { invalid_locale: s.to_string() })
}
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)]
#[deserr(rename_all = camelCase)]
@ -155,3 +27,140 @@ impl From<LocalizedAttributesRuleView> for LocalizedAttributesRule {
}
}
}
/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language.
///
/// this enum implements `Deserr` in order to be used in the API.
macro_rules! make_locale {
($(($iso_639_1:ident, $iso_639_1_str:expr) => ($iso_639_3:ident, $iso_639_3_str:expr),)+) => {
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)]
#[deserr(rename_all = camelCase)]
#[serde(rename_all = "camelCase")]
pub enum Locale {
$($iso_639_1,)+
$($iso_639_3,)+
Cmn,
}
impl From<milli::tokenizer::Language> for Locale {
fn from(other: milli::tokenizer::Language) -> Locale {
match other {
$(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+
milli::tokenizer::Language::Cmn => Locale::Cmn,
}
}
}
impl From<Locale> for milli::tokenizer::Language {
fn from(other: Locale) -> milli::tokenizer::Language {
match other {
$(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+
$(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+
Locale::Cmn => milli::tokenizer::Language::Cmn,
}
}
}
impl std::str::FromStr for Locale {
type Err = LocaleFormatError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let locale = match s {
$($iso_639_1_str => Locale::$iso_639_1,)+
$($iso_639_3_str => Locale::$iso_639_3,)+
"cmn" => Locale::Cmn,
_ => return Err(LocaleFormatError { invalid_locale: s.to_string() }),
};
Ok(locale)
}
}
#[derive(Debug)]
pub struct LocaleFormatError {
pub invalid_locale: String,
}
impl std::fmt::Display for LocaleFormatError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+,"cmn"];
valid_locales.sort_by(|left, right| left.len().cmp(&right.len()).then(left.cmp(right)));
write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales.join(", "))
}
}
impl std::error::Error for LocaleFormatError {}
};
}
make_locale!(
(Af, "af") => (Afr, "afr"),
(Ak, "ak") => (Aka, "aka"),
(Am, "am") => (Amh, "amh"),
(Ar, "ar") => (Ara, "ara"),
(Az, "az") => (Aze, "aze"),
(Be, "be") => (Bel, "bel"),
(Bn, "bn") => (Ben, "ben"),
(Bg, "bg") => (Bul, "bul"),
(Ca, "ca") => (Cat, "cat"),
(Cs, "cs") => (Ces, "ces"),
(Da, "da") => (Dan, "dan"),
(De, "de") => (Deu, "deu"),
(El, "el") => (Ell, "ell"),
(En, "en") => (Eng, "eng"),
(Eo, "eo") => (Epo, "epo"),
(Et, "et") => (Est, "est"),
(Fi, "fi") => (Fin, "fin"),
(Fr, "fr") => (Fra, "fra"),
(Gu, "gu") => (Guj, "guj"),
(He, "he") => (Heb, "heb"),
(Hi, "hi") => (Hin, "hin"),
(Hr, "hr") => (Hrv, "hrv"),
(Hu, "hu") => (Hun, "hun"),
(Hy, "hy") => (Hye, "hye"),
(Id, "id") => (Ind, "ind"),
(It, "it") => (Ita, "ita"),
(Jv, "jv") => (Jav, "jav"),
(Ja, "ja") => (Jpn, "jpn"),
(Kn, "kn") => (Kan, "kan"),
(Ka, "ka") => (Kat, "kat"),
(Km, "km") => (Khm, "khm"),
(Ko, "ko") => (Kor, "kor"),
(La, "la") => (Lat, "lat"),
(Lv, "lv") => (Lav, "lav"),
(Lt, "lt") => (Lit, "lit"),
(Ml, "ml") => (Mal, "mal"),
(Mr, "mr") => (Mar, "mar"),
(Mk, "mk") => (Mkd, "mkd"),
(My, "my") => (Mya, "mya"),
(Ne, "ne") => (Nep, "nep"),
(Nl, "nl") => (Nld, "nld"),
(Nb, "nb") => (Nob, "nob"),
(Or, "or") => (Ori, "ori"),
(Pa, "pa") => (Pan, "pan"),
(Fa, "fa") => (Pes, "pes"),
(Pl, "pl") => (Pol, "pol"),
(Pt, "pt") => (Por, "por"),
(Ro, "ro") => (Ron, "ron"),
(Ru, "ru") => (Rus, "rus"),
(Si, "si") => (Sin, "sin"),
(Sk, "sk") => (Slk, "slk"),
(Sl, "sl") => (Slv, "slv"),
(Sn, "sn") => (Sna, "sna"),
(Es, "es") => (Spa, "spa"),
(Sr, "sr") => (Srp, "srp"),
(Sv, "sv") => (Swe, "swe"),
(Ta, "ta") => (Tam, "tam"),
(Te, "te") => (Tel, "tel"),
(Tl, "tl") => (Tgl, "tgl"),
(Th, "th") => (Tha, "tha"),
(Tk, "tk") => (Tuk, "tuk"),
(Tr, "tr") => (Tur, "tur"),
(Uk, "uk") => (Ukr, "ukr"),
(Ur, "ur") => (Urd, "urd"),
(Uz, "uz") => (Uzb, "uzb"),
(Vi, "vi") => (Vie, "vie"),
(Yi, "yi") => (Yid, "yid"),
(Zh, "zh") => (Zho, "zho"),
(Zu, "zu") => (Zul, "zul"),
);

View File

@ -10,38 +10,52 @@ static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
/// Persists the version of the current Meilisearch binary to a VERSION file
pub fn create_version_file(db_path: &Path) -> io::Result<()> {
pub fn create_current_version_file(db_path: &Path) -> io::Result<()> {
create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
}
pub fn create_version_file(
db_path: &Path,
major: &str,
minor: &str,
patch: &str,
) -> io::Result<()> {
let version_path = db_path.join(VERSION_FILE_NAME);
fs::write(version_path, format!("{}.{}.{}", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH))
fs::write(version_path, format!("{}.{}.{}", major, minor, patch))
}
/// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch.
pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
let version_path = db_path.join(VERSION_FILE_NAME);
let (major, minor, patch) = get_version(db_path)?;
match fs::read_to_string(version_path) {
Ok(version) => {
let version_components = version.split('.').collect::<Vec<_>>();
let (major, minor, patch) = match &version_components[..] {
[major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()),
_ => return Err(VersionFileError::MalformedVersionFile.into()),
};
if major != VERSION_MAJOR || minor != VERSION_MINOR {
return Err(VersionFileError::VersionMismatch { major, minor, patch }.into());
}
}
Err(error) => {
return match error.kind() {
ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile.into()),
_ => Err(error.into()),
}
}
if major != VERSION_MAJOR || minor != VERSION_MINOR {
return Err(VersionFileError::VersionMismatch { major, minor, patch }.into());
}
Ok(())
}
pub fn get_version(db_path: &Path) -> Result<(String, String, String), VersionFileError> {
let version_path = db_path.join(VERSION_FILE_NAME);
match fs::read_to_string(version_path) {
Ok(version) => parse_version(&version),
Err(error) => match error.kind() {
ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile),
_ => Err(error.into()),
},
}
}
pub fn parse_version(version: &str) -> Result<(String, String, String), VersionFileError> {
let version_components = version.split('.').collect::<Vec<_>>();
let (major, minor, patch) = match &version_components[..] {
[major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()),
_ => return Err(VersionFileError::MalformedVersionFile),
};
Ok((major, minor, patch))
}
#[derive(thiserror::Error, Debug)]
pub enum VersionFileError {
#[error(
@ -58,4 +72,7 @@ pub enum VersionFileError {
env!("CARGO_PKG_VERSION").to_string()
)]
VersionMismatch { major: String, minor: String, patch: String },
#[error(transparent)]
IoError(#[from] std::io::Error),
}

View File

@ -57,7 +57,7 @@ meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.43", default-features = false }
mime = "0.3.17"
num_cpus = "1.16.0"
obkv = "0.2.2"
obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
once_cell = "1.19.0"
ordered-float = "4.2.1"
parking_lot = "0.12.3"
@ -153,6 +153,7 @@ greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"]
vietnamese = ["meilisearch-types/vietnamese"]
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
german = ["meilisearch-types/german"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"

View File

@ -646,8 +646,6 @@ pub struct SearchAggregator {
max_vector_size: usize,
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
semantic_ratio: bool,
// Whether a non-default embedder was specified
embedder: bool,
hybrid: bool,
retrieve_vectors: bool,
@ -795,7 +793,6 @@ impl SearchAggregator {
if let Some(hybrid) = hybrid {
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
ret.embedder = hybrid.embedder.is_some();
ret.hybrid = true;
}
@ -863,7 +860,6 @@ impl SearchAggregator {
show_ranking_score,
show_ranking_score_details,
semantic_ratio,
embedder,
hybrid,
total_degraded,
total_used_negative_operator,
@ -923,7 +919,6 @@ impl SearchAggregator {
self.retrieve_vectors |= retrieve_vectors;
self.semantic_ratio |= semantic_ratio;
self.hybrid |= hybrid;
self.embedder |= embedder;
// pagination
self.max_limit = self.max_limit.max(max_limit);
@ -999,7 +994,6 @@ impl SearchAggregator {
show_ranking_score,
show_ranking_score_details,
semantic_ratio,
embedder,
hybrid,
total_degraded,
total_used_negative_operator,
@ -1051,7 +1045,6 @@ impl SearchAggregator {
"hybrid": {
"enabled": hybrid,
"semantic_ratio": semantic_ratio,
"embedder": embedder,
},
"pagination": {
"max_limit": max_limit,
@ -1782,7 +1775,6 @@ pub struct SimilarAggregator {
used_syntax: HashMap<String, usize>,
// Whether a non-default embedder was specified
embedder: bool,
retrieve_vectors: bool,
// pagination
@ -1803,7 +1795,7 @@ impl SimilarAggregator {
pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
let SimilarQuery {
id: _,
embedder,
embedder: _,
offset,
limit,
attributes_to_retrieve: _,
@ -1851,7 +1843,6 @@ impl SimilarAggregator {
ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
ret.embedder = embedder.is_some();
ret.retrieve_vectors = *retrieve_vectors;
ret
@ -1883,7 +1874,6 @@ impl SimilarAggregator {
max_attributes_to_retrieve,
show_ranking_score,
show_ranking_score_details,
embedder,
ranking_score_threshold,
retrieve_vectors,
} = other;
@ -1914,7 +1904,6 @@ impl SimilarAggregator {
*used_syntax = used_syntax.saturating_add(value);
}
self.embedder |= embedder;
self.retrieve_vectors |= retrieve_vectors;
// pagination
@ -1948,7 +1937,6 @@ impl SimilarAggregator {
max_attributes_to_retrieve,
show_ranking_score,
show_ranking_score_details,
embedder,
ranking_score_threshold,
retrieve_vectors,
} = self;
@ -1980,9 +1968,6 @@ impl SimilarAggregator {
"vector": {
"retrieve_vectors": retrieve_vectors,
},
"hybrid": {
"embedder": embedder,
},
"pagination": {
"max_limit": max_limit,
"max_offset": max_offset,

View File

@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType};
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
use meilisearch_types::milli::OrderBy;
use serde_json::Value;
use tokio::task::JoinError;
@ -27,10 +28,20 @@ pub enum MeilisearchHttpError {
EmptyFilter,
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value),
#[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")]
#[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")]
FederationOptionsInNonFederatedRequest(usize),
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")]
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")]
PaginationInFederatedQuery(usize, &'static str),
#[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
FacetsInFederatedQuery(usize, String, Vec<String>),
#[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
InconsistentFacetOrder {
facet: String,
previous_facet_order: OrderBy,
previous_uid: String,
index_facet_order: OrderBy,
current_uid: String,
},
#[error("A {0} payload is missing.")]
MissingPayload(PayloadType),
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
@ -61,7 +72,7 @@ pub enum MeilisearchHttpError {
DocumentFormat(#[from] DocumentFormatError),
#[error(transparent)]
Join(#[from] JoinError),
#[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
MissingSearchHybrid,
}
@ -96,6 +107,10 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
Code::InvalidMultiSearchQueryPagination
}
MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets,
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
Code::InvalidMultiSearchFacetOrder
}
}
}
}

View File

@ -13,11 +13,10 @@ pub mod search_queue;
use std::fs::File;
use std::io::{BufReader, BufWriter};
use std::num::NonZeroUsize;
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
use std::thread::{self, available_parallelism};
use std::thread;
use std::time::Duration;
use actix_cors::Cors;
@ -37,7 +36,7 @@ use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchR
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::versioning::{check_version_file, create_current_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use option::ScheduleSnapshot;
@ -118,6 +117,7 @@ pub type LogStderrType = tracing_subscriber::filter::Filtered<
pub fn create_app(
index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>,
search_queue: Data<SearchQueue>,
opt: Opt,
logs: (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>,
@ -137,6 +137,7 @@ pub fn create_app(
s,
index_scheduler.clone(),
auth_controller.clone(),
search_queue.clone(),
&opt,
logs,
analytics.clone(),
@ -318,7 +319,7 @@ fn open_or_create_database_unchecked(
match (
index_scheduler_builder(),
auth_controller.map_err(anyhow::Error::from),
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
create_current_version_file(&opt.db_path).map_err(anyhow::Error::from),
) {
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
@ -469,19 +470,16 @@ pub fn configure_data(
config: &mut web::ServiceConfig,
index_scheduler: Data<IndexScheduler>,
auth: Data<AuthController>,
search_queue: Data<SearchQueue>,
opt: &Opt,
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>,
) {
let search_queue = SearchQueue::new(
opt.experimental_search_queue_size,
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
);
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
config
.app_data(index_scheduler)
.app_data(auth)
.app_data(web::Data::new(search_queue))
.app_data(search_queue)
.app_data(web::Data::from(analytics))
.app_data(web::Data::new(logs_route))
.app_data(web::Data::new(logs_stderr))

View File

@ -1,8 +1,10 @@
use std::env;
use std::io::{stderr, LineWriter, Write};
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::thread::available_parallelism;
use actix_web::http::KeepAlive;
use actix_web::web::Data;
@ -11,6 +13,7 @@ use index_scheduler::IndexScheduler;
use is_terminal::IsTerminal;
use meilisearch::analytics::Analytics;
use meilisearch::option::LogMode;
use meilisearch::search_queue::SearchQueue;
use meilisearch::{
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
LogStderrType, Opt, SubscriberForSecondLayer,
@ -148,11 +151,17 @@ async fn run_http(
let opt_clone = opt.clone();
let index_scheduler = Data::from(index_scheduler);
let auth_controller = Data::from(auth_controller);
let search_queue = SearchQueue::new(
opt.experimental_search_queue_size,
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
);
let search_queue = Data::new(search_queue);
let http_server = HttpServer::new(move || {
create_app(
index_scheduler.clone(),
auth_controller.clone(),
search_queue.clone(),
opt.clone(),
logs.clone(),
analytics.clone(),

View File

@ -55,16 +55,17 @@ where
let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
let features = index_scheduler.features();
let request_path = req.path();
let request_pattern = req.match_pattern();
let metric_path = request_pattern.as_ref().map_or(request_path, String::as_str).to_string();
let request_method = req.method().to_string();
if features.check_metrics().is_ok() {
let request_path = req.path();
let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource {
let request_pattern = req.match_pattern();
let metric_path = request_pattern.as_ref().map_or(request_path, String::as_str);
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, metric_path])
.with_label_values(&[&request_method, &metric_path])
.start_timer(),
);
}
@ -76,11 +77,7 @@ where
let res = fut.await?;
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
.with_label_values(&[
res.request().method().as_str(),
res.request().path(),
res.status().as_str(),
])
.with_label_values(&[&request_method, &metric_path, res.status().as_str()])
.inc();
if let Some(histogram_timer) = histogram_timer {

View File

@ -81,7 +81,7 @@ pub async fn search(
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
let _permit = search_queue.try_get_search_permit().await?;
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(
&index,
@ -93,7 +93,9 @@ pub async fn search(
locales,
)
})
.await?;
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);

View File

@ -128,8 +128,10 @@ impl std::ops::Deref for SemanticRatioGet {
}
}
impl From<SearchQueryGet> for SearchQuery {
fn from(other: SearchQueryGet) -> Self {
impl TryFrom<SearchQueryGet> for SearchQuery {
type Error = ResponseError;
fn try_from(other: SearchQueryGet) -> Result<Self, Self::Error> {
let filter = match other.filter {
Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v),
@ -140,19 +142,28 @@ impl From<SearchQueryGet> for SearchQuery {
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
(None, None) => None,
(None, Some(semantic_ratio)) => {
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None })
(None, Some(_)) => {
return Err(ResponseError::from_msg(
"`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(),
meilisearch_types::error::Code::InvalidHybridQuery,
));
}
(Some(embedder), None) => {
Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder })
}
(Some(embedder), None) => Some(HybridQuery {
semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
embedder: Some(embedder),
}),
(Some(embedder), Some(semantic_ratio)) => {
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) })
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder })
}
};
Self {
if other.vector.is_some() && hybrid.is_none() {
return Err(ResponseError::from_msg(
"`hybridEmbedder` is mandatory when `vector` is present".into(),
meilisearch_types::error::Code::MissingSearchHybrid,
));
}
Ok(Self {
q: other.q,
vector: other.vector.map(CS::into_inner),
offset: other.offset.0,
@ -179,7 +190,7 @@ impl From<SearchQueryGet> for SearchQuery {
hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
locales: other.locales.map(|o| o.into_iter().collect()),
}
})
}
}
@ -219,7 +230,7 @@ pub async fn search_with_url_query(
debug!(parameters = ?params, "Search get");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().into();
let mut query: SearchQuery = params.into_inner().try_into()?;
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
@ -233,11 +244,13 @@ pub async fn search_with_url_query(
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
let _permit = search_queue.try_get_search_permit().await?;
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
})
.await?;
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
@ -276,11 +289,13 @@ pub async fn search_with_post(
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
let _permit = search_queue.try_get_search_permit().await?;
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
})
.await?;
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
if search_result.degraded {
@ -308,44 +323,36 @@ pub fn search_kind(
features.check_vector("Passing `hybrid` as a parameter")?;
}
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing
if query.vector.is_none() {
match &query.q {
Some(q) if q.trim().is_empty() => return Ok(SearchKind::KeywordOnly),
None => return Ok(SearchKind::KeywordOnly),
_ => {}
// handle with care, the order of cases matters, the semantics is subtle
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
// empty query, no vector => placeholder search
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
// no query, no vector => placeholder search
(None, _, None) => Ok(SearchKind::KeywordOnly),
// hybrid.semantic_ratio == 1.0 => vector
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
}
}
match &query.hybrid {
Some(HybridQuery { semantic_ratio, embedder }) if **semantic_ratio == 1.0 => {
Ok(SearchKind::semantic(
index_scheduler,
index,
embedder.as_deref(),
query.vector.as_ref().map(Vec::len),
)?)
}
Some(HybridQuery { semantic_ratio, embedder: _ }) if **semantic_ratio == 0.0 => {
// hybrid.semantic_ratio == 0.0 => keyword
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
Ok(SearchKind::KeywordOnly)
}
Some(HybridQuery { semantic_ratio, embedder }) => Ok(SearchKind::hybrid(
// no query, hybrid, vector => semantic
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
}
// query, no hybrid, no vector => keyword
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
// query, hybrid, maybe vector => hybrid
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
index_scheduler,
index,
embedder.as_deref(),
embedder,
**semantic_ratio,
query.vector.as_ref().map(Vec::len),
)?),
None => match (query.q.as_deref(), query.vector.as_deref()) {
(_query, None) => Ok(SearchKind::KeywordOnly),
(None, Some(_vector)) => Ok(SearchKind::semantic(
index_scheduler,
index,
None,
query.vector.as_ref().map(Vec::len),
)?),
(Some(_), Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
},
v.map(|v| v.len()),
),
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
}
}

View File

@ -636,11 +636,26 @@ fn embedder_analytics(
.any(|config| config.document_template.set().is_some())
});
let document_template_max_bytes = setting.as_ref().and_then(|map| {
map.values()
.filter_map(|config| config.clone().set())
.filter_map(|config| config.document_template_max_bytes.set())
.max()
});
let binary_quantization_used = setting.as_ref().map(|map| {
map.values()
.filter_map(|config| config.clone().set())
.any(|config| config.binary_quantized.set().is_some())
});
json!(
{
"total": setting.as_ref().map(|s| s.len()),
"sources": sources,
"document_template_used": document_template_used,
"document_template_max_bytes": document_template_max_bytes,
"binary_quantization_used": binary_quantization_used,
}
)
}

View File

@ -102,8 +102,8 @@ async fn similar(
let index = index_scheduler.index(&index_uid)?;
let (embedder_name, embedder) =
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
let (embedder_name, embedder, quantized) =
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
tokio::task::spawn_blocking(move || {
perform_similar(
@ -111,6 +111,7 @@ async fn similar(
query,
embedder_name,
embedder,
quantized,
retrieve_vectors,
index_scheduler.features(),
)
@ -139,8 +140,8 @@ pub struct SimilarQueryGet {
show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
pub embedder: Option<String>,
#[deserr(error = DeserrQueryParamError<InvalidEmbedder>)]
pub embedder: String,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]

View File

@ -39,7 +39,7 @@ pub async fn multi_search_with_post(
) -> Result<HttpResponse, ResponseError> {
// Since we don't want to process half of the search requests and then get a permit refused
// we're going to get one permit for the whole duration of the multi-search request.
let _permit = search_queue.try_get_search_permit().await?;
let permit = search_queue.try_get_search_permit().await?;
let federated_search = params.into_inner();
@ -81,6 +81,7 @@ pub async fn multi_search_with_post(
perform_federated_search(&index_scheduler, queries, federation, features)
})
.await;
permit.drop().await;
if let Ok(Ok(_)) = search_result {
multi_aggregate.succeed();
@ -143,6 +144,7 @@ pub async fn multi_search_with_post(
Ok(search_results)
}
.await;
permit.drop().await;
if search_results.is_ok() {
multi_aggregate.succeed();

View File

@ -9,20 +9,24 @@ use std::vec::{IntoIter, Vec};
use actix_http::StatusCode;
use index_scheduler::{IndexScheduler, RoFeatures};
use indexmap::IndexMap;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::{
InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset,
InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet,
InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit,
InvalidSearchOffset,
};
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
use meilisearch_types::milli::{self, DocumentId, TimeBudget};
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget};
use roaring::RoaringBitmap;
use serde::Serialize;
use super::ranking_rules::{self, RankingRules};
use super::{
prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind,
SearchQuery, SearchQueryWithIndex,
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats,
HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
};
use crate::error::MeilisearchHttpError;
use crate::routes::indexes::search::search_kind;
@ -73,6 +77,17 @@ pub struct Federation {
pub limit: usize,
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)]
pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)]
pub merge_facets: Option<MergeFacets>,
}
#[derive(Copy, Clone, Debug, deserr::Deserr, Default)]
#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
pub struct MergeFacets {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
pub max_values_per_facet: Option<usize>,
}
#[derive(Debug, deserr::Deserr)]
@ -82,7 +97,7 @@ pub struct FederatedSearch {
#[deserr(default)]
pub federation: Option<Federation>,
}
#[derive(Serialize, Clone, PartialEq)]
#[derive(Serialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct FederatedSearchResult {
pub hits: Vec<SearchHit>,
@ -93,6 +108,13 @@ pub struct FederatedSearchResult {
#[serde(skip_serializing_if = "Option::is_none")]
pub semantic_hit_count: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
#[serde(skip_serializing_if = "FederatedFacets::is_empty")]
pub facets_by_index: FederatedFacets,
// These fields are only used for analytics purposes
#[serde(skip)]
pub degraded: bool,
@ -109,6 +131,9 @@ impl fmt::Debug for FederatedSearchResult {
semantic_hit_count,
degraded,
used_negative_operator,
facet_distribution,
facet_stats,
facets_by_index,
} = self;
let mut debug = f.debug_struct("SearchResult");
@ -122,9 +147,18 @@ impl fmt::Debug for FederatedSearchResult {
if *degraded {
debug.field("degraded", degraded);
}
if let Some(facet_distribution) = facet_distribution {
debug.field("facet_distribution", &facet_distribution);
}
if let Some(facet_stats) = facet_stats {
debug.field("facet_stats", &facet_stats);
}
if let Some(semantic_hit_count) = semantic_hit_count {
debug.field("semantic_hit_count", &semantic_hit_count);
}
if !facets_by_index.is_empty() {
debug.field("facets_by_index", &facets_by_index);
}
debug.finish()
}
@ -313,16 +347,104 @@ struct SearchHitByIndex {
}
struct SearchResultByIndex {
index: String,
hits: Vec<SearchHitByIndex>,
candidates: RoaringBitmap,
estimated_total_hits: usize,
degraded: bool,
used_negative_operator: bool,
facets: Option<ComputedFacets>,
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
impl FederatedFacets {
pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) {
if let Some(facets) = facets {
self.0.insert(index, facets);
}
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn merge(
self,
MergeFacets { max_values_per_facet }: MergeFacets,
facet_order: BTreeMap<String, (String, OrderBy)>,
) -> Option<ComputedFacets> {
if self.is_empty() {
return None;
}
let mut distribution: BTreeMap<String, _> = Default::default();
let mut stats: BTreeMap<String, FacetStats> = Default::default();
for facets_by_index in self.0.into_values() {
for (facet, index_distribution) in facets_by_index.distribution {
match distribution.entry(facet) {
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(index_distribution);
}
std::collections::btree_map::Entry::Occupied(mut entry) => {
let distribution = entry.get_mut();
for (value, index_count) in index_distribution {
distribution
.entry(value)
.and_modify(|count| *count += index_count)
.or_insert(index_count);
}
}
}
}
for (facet, index_stats) in facets_by_index.stats {
match stats.entry(facet) {
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(index_stats);
}
std::collections::btree_map::Entry::Occupied(mut entry) => {
let stats = entry.get_mut();
stats.min = f64::min(stats.min, index_stats.min);
stats.max = f64::max(stats.max, index_stats.max);
}
}
}
}
// fixup order
for (facet, values) in &mut distribution {
let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default();
match order_by {
OrderBy::Lexicographic => {
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
}
OrderBy::Count => {
values.sort_unstable_by(|_, left, _, right| {
left.cmp(right)
// biggest first
.reverse()
})
}
}
if let Some(max_values_per_facet) = max_values_per_facet {
values.truncate(max_values_per_facet)
};
}
Some(ComputedFacets { distribution, stats })
}
}
pub fn perform_federated_search(
index_scheduler: &IndexScheduler,
queries: Vec<SearchQueryWithIndex>,
federation: Federation,
mut federation: Federation,
features: RoFeatures,
) -> Result<FederatedSearchResult, ResponseError> {
let before_search = std::time::Instant::now();
@ -342,6 +464,16 @@ pub fn perform_federated_search(
.into());
}
if let Some(facets) = federated_query.has_facets() {
let facets = facets.to_owned();
return Err(MeilisearchHttpError::FacetsInFederatedQuery(
query_index,
federated_query.index_uid.into_inner(),
facets,
)
.into());
}
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
@ -353,13 +485,24 @@ pub fn perform_federated_search(
// 2. perform queries, merge and make hits index by index
let required_hit_count = federation.limit + federation.offset;
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
// Then in step (3), we'll update its value if there is any semantic search
let mut semantic_hit_count = None;
let mut results_by_index = Vec::with_capacity(queries_by_index.len());
let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
// remember the order and name of first index for each facet when merging with index settings
// to detect if the order is inconsistent for a facet.
let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets
{
Some(MergeFacets { .. }) => Some(Default::default()),
_ => None,
};
for (index_uid, queries) in queries_by_index {
let first_query_index = queries.first().map(|query| query.query_index);
let index = match index_scheduler.index(&index_uid) {
Ok(index) => index,
Err(err) => {
@ -367,9 +510,8 @@ pub fn perform_federated_search(
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
if let Some(query) = queries.first() {
err.message =
format!("Inside `.queries[{}]`: {}", query.query_index, err.message);
if let Some(query_index) = first_query_index {
err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message);
}
return Err(err);
}
@ -394,6 +536,23 @@ pub fn perform_federated_search(
let mut used_negative_operator = false;
let mut candidates = RoaringBitmap::new();
let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten();
// TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries
if let Err(mut error) =
check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn)
{
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {error}{}",
if let Some(query_index) = first_query_index {
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
} else {
Default::default()
}
);
return Err(error);
}
// 2.1. Compute all candidates for each query in the index
let mut results_by_query = Vec::with_capacity(queries.len());
@ -562,34 +721,116 @@ pub fn perform_federated_search(
.collect();
let merged_result = merged_result?;
let estimated_total_hits = candidates.len() as usize;
let facets = facets_by_index
.map(|facets_by_index| {
compute_facet_distribution_stats(
&facets_by_index,
&index,
&rtxn,
candidates,
super::Route::MultiSearch,
)
})
.transpose()
.map_err(|mut error| {
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {}{}",
error.message,
if let Some(query_index) = first_query_index {
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
} else {
Default::default()
}
);
error
})?;
results_by_index.push(SearchResultByIndex {
index: index_uid,
hits: merged_result,
candidates,
estimated_total_hits,
degraded,
used_negative_operator,
facets,
});
}
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
for (index_uid, facets) in federation.facets_by_index {
let index = match index_scheduler.index(&index_uid) {
Ok(index) => index,
Err(err) => {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries",
err.message
);
return Err(err);
}
};
// Important: this is the only transaction we'll use for this index during this federated search
let rtxn = index.read_txn()?;
if let Err(mut error) =
check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn)
{
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries",
);
return Err(error);
}
if let Some(facets) = facets {
if let Err(mut error) = compute_facet_distribution_stats(
&facets,
&index,
&rtxn,
Default::default(),
super::Route::MultiSearch,
) {
error.message =
format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message);
return Err(error);
}
}
}
// 3. merge hits and metadata across indexes
// 3.1 merge metadata
let (estimated_total_hits, degraded, used_negative_operator) = {
let (estimated_total_hits, degraded, used_negative_operator, facets) = {
let mut estimated_total_hits = 0;
let mut degraded = false;
let mut used_negative_operator = false;
let mut facets: FederatedFacets = FederatedFacets::default();
for SearchResultByIndex {
index,
hits: _,
candidates,
estimated_total_hits: estimated_total_hits_by_index,
facets: facets_by_index,
degraded: degraded_by_index,
used_negative_operator: used_negative_operator_by_index,
} in &results_by_index
} in &mut results_by_index
{
estimated_total_hits += candidates.len() as usize;
estimated_total_hits += *estimated_total_hits_by_index;
degraded |= *degraded_by_index;
used_negative_operator |= *used_negative_operator_by_index;
let facets_by_index = std::mem::take(facets_by_index);
let index = std::mem::take(index);
facets.insert(index, facets_by_index);
}
(estimated_total_hits, degraded, used_negative_operator)
(estimated_total_hits, degraded, used_negative_operator, facets)
};
// 3.2 merge hits
@ -606,6 +847,20 @@ pub fn perform_federated_search(
.map(|hit| hit.hit)
.collect();
let (facet_distribution, facet_stats, facets_by_index) =
match federation.merge_facets.zip(facet_order) {
Some((merge_facets, facet_order)) => {
let facets = facets.merge(merge_facets, facet_order);
let (facet_distribution, facet_stats) = facets
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
.unzip();
(facet_distribution, facet_stats, FederatedFacets::default())
}
None => (None, None, facets),
};
let search_result = FederatedSearchResult {
hits: merged_hits,
processing_time_ms: before_search.elapsed().as_millis(),
@ -617,7 +872,39 @@ pub fn perform_federated_search(
semantic_hit_count,
degraded,
used_negative_operator,
facet_distribution,
facet_stats,
facets_by_index,
};
Ok(search_result)
}
fn check_facet_order(
facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>,
current_index: &str,
facets_by_index: &Option<Vec<String>>,
index: &milli::Index,
rtxn: &milli::heed::RoTxn<'_>,
) -> Result<(), ResponseError> {
if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) {
let index_facet_order = index.sort_facet_values_by(rtxn)?;
for facet in facets_by_index {
let index_facet_order = index_facet_order.get(facet);
let (previous_index, previous_facet_order) = facet_order
.entry(facet.to_owned())
.or_insert_with(|| (current_index.to_owned(), index_facet_order));
if previous_facet_order != &index_facet_order {
return Err(MeilisearchHttpError::InconsistentFacetOrder {
facet: facet.clone(),
previous_facet_order: *previous_facet_order,
previous_uid: previous_index.clone(),
current_uid: current_index.to_owned(),
index_facet_order,
}
.into());
}
}
};
Ok(())
}

View File

@ -267,58 +267,54 @@ impl fmt::Debug for SearchQuery {
pub struct HybridQuery {
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
pub semantic_ratio: SemanticRatio,
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
pub embedder: Option<String>,
#[deserr(error = DeserrJsonError<InvalidEmbedder>)]
pub embedder: String,
}
#[derive(Clone)]
pub enum SearchKind {
KeywordOnly,
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
SemanticOnly { embedder_name: String, embedder: Arc<Embedder>, quantized: bool },
Hybrid { embedder_name: String, embedder: Arc<Embedder>, quantized: bool, semantic_ratio: f32 },
}
impl SearchKind {
pub(crate) fn semantic(
index_scheduler: &index_scheduler::IndexScheduler,
index: &Index,
embedder_name: Option<&str>,
embedder_name: &str,
vector_len: Option<usize>,
) -> Result<Self, ResponseError> {
let (embedder_name, embedder) =
let (embedder_name, embedder, quantized) =
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
Ok(Self::SemanticOnly { embedder_name, embedder })
Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
}
pub(crate) fn hybrid(
index_scheduler: &index_scheduler::IndexScheduler,
index: &Index,
embedder_name: Option<&str>,
embedder_name: &str,
semantic_ratio: f32,
vector_len: Option<usize>,
) -> Result<Self, ResponseError> {
let (embedder_name, embedder) =
let (embedder_name, embedder, quantized) =
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio })
Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
}
pub(crate) fn embedder(
index_scheduler: &index_scheduler::IndexScheduler,
index: &Index,
embedder_name: Option<&str>,
embedder_name: &str,
vector_len: Option<usize>,
) -> Result<(String, Arc<Embedder>), ResponseError> {
) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let embedder_name = embedder_name.unwrap_or_else(|| embedders.get_default_embedder_name());
let embedder = embedders.get(embedder_name);
let embedder = embedder
let (embedder, _, quantized) = embedders
.get(embedder_name)
.ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
.map_err(milli::Error::from)?
.0;
.map_err(milli::Error::from)?;
if let Some(vector_len) = vector_len {
if vector_len != embedder.dimensions() {
@ -332,7 +328,7 @@ impl SearchKind {
}
}
Ok((embedder_name.to_owned(), embedder))
Ok((embedder_name.to_owned(), embedder, quantized))
}
}
@ -441,9 +437,6 @@ pub struct SearchQueryWithIndex {
}
impl SearchQueryWithIndex {
pub fn has_federation_options(&self) -> bool {
self.federation_options.is_some()
}
pub fn has_pagination(&self) -> Option<&'static str> {
if self.offset.is_some() {
Some("offset")
@ -458,6 +451,10 @@ impl SearchQueryWithIndex {
}
}
pub fn has_facets(&self) -> Option<&[String]> {
self.facets.as_deref().filter(|v| !v.is_empty())
}
pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
let SearchQueryWithIndex {
index_uid,
@ -537,8 +534,8 @@ pub struct SimilarQuery {
pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
pub embedder: Option<String>,
#[deserr(error = DeserrJsonError<InvalidEmbedder>)]
pub embedder: String,
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
@ -792,7 +789,7 @@ fn prepare_search<'t>(
search.query(q);
}
}
SearchKind::SemanticOnly { embedder_name, embedder } => {
SearchKind::SemanticOnly { embedder_name, embedder, quantized } => {
let vector = match query.vector.clone() {
Some(vector) => vector,
None => {
@ -806,14 +803,19 @@ fn prepare_search<'t>(
}
};
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector));
}
SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => {
SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => {
if let Some(q) = &query.q {
search.query(q);
}
// will be embedded in hybrid search if necessary
search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone());
search.semantic(
embedder_name.clone(),
embedder.clone(),
*quantized,
query.vector.clone(),
);
}
}
@ -987,39 +989,13 @@ pub fn perform_search(
HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits }
};
let (facet_distribution, facet_stats) = match facets {
Some(ref fields) => {
let mut facet_distribution = index.facets_distribution(&rtxn);
let max_values_by_facet = index
.max_values_per_facet(&rtxn)
.map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet);
let sort_facet_values_by =
index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?;
if fields.iter().all(|f| f != "*") {
let fields: Vec<_> =
fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect();
facet_distribution.facets(fields);
}
let distribution = facet_distribution
.candidates(candidates)
.default_order_by(sort_facet_values_by.get("*"))
.execute()?;
let stats = facet_distribution.compute_stats()?;
(Some(distribution), Some(stats))
}
None => (None, None),
};
let facet_stats = facet_stats.map(|stats| {
stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect()
});
let (facet_distribution, facet_stats) = facets
.map(move |facets| {
compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search)
})
.transpose()?
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
.unzip();
let result = SearchResult {
hits: documents,
@ -1035,6 +1011,61 @@ pub fn perform_search(
Ok(result)
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct ComputedFacets {
pub distribution: BTreeMap<String, IndexMap<String, u64>>,
pub stats: BTreeMap<String, FacetStats>,
}
enum Route {
Search,
MultiSearch,
}
fn compute_facet_distribution_stats<S: AsRef<str>>(
facets: &[S],
index: &Index,
rtxn: &RoTxn,
candidates: roaring::RoaringBitmap,
route: Route,
) -> Result<ComputedFacets, ResponseError> {
let mut facet_distribution = index.facets_distribution(rtxn);
let max_values_by_facet = index
.max_values_per_facet(rtxn)
.map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet);
let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?;
// add specific facet if there is no placeholder
if facets.iter().all(|f| f.as_ref() != "*") {
let fields: Vec<_> =
facets.iter().map(|n| (n, sort_facet_values_by.get(n.as_ref()))).collect();
facet_distribution.facets(fields);
}
let distribution = facet_distribution
.candidates(candidates)
.default_order_by(sort_facet_values_by.get("*"))
.execute()
.map_err(|error| match (error, route) {
(
error @ milli::Error::UserError(milli::UserError::InvalidFacetsDistribution {
..
}),
Route::MultiSearch,
) => ResponseError::from_msg(error.to_string(), Code::InvalidMultiSearchFacets),
(error, _) => error.into(),
})?;
let stats = facet_distribution.compute_stats()?;
let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect();
Ok(ComputedFacets { distribution, stats })
}
pub fn search_from_kind(
search_kind: SearchKind,
search: milli::Search<'_>,
@ -1413,6 +1444,7 @@ pub fn perform_similar(
query: SimilarQuery,
embedder_name: String,
embedder: Arc<Embedder>,
quantized: bool,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
) -> Result<SimilarResult, ResponseError> {
@ -1441,8 +1473,16 @@ pub fn perform_similar(
));
};
let mut similar =
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
let mut similar = milli::Similar::new(
internal_id,
offset,
limit,
index,
&rtxn,
embedder_name,
embedder,
quantized,
);
if let Some(ref filter) = query.filter {
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
@ -1642,7 +1682,7 @@ fn add_non_formatted_ids_to_formatted_options(
fn make_document(
displayed_attributes: &BTreeSet<FieldId>,
field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16,
obkv: &obkv::KvReaderU16,
) -> Result<Document, MeilisearchHttpError> {
let mut document = serde_json::Map::new();

View File

@ -18,6 +18,7 @@
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
use std::num::NonZeroUsize;
use std::time::Duration;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
@ -29,16 +30,31 @@ use crate::error::MeilisearchHttpError;
pub struct SearchQueue {
sender: mpsc::Sender<oneshot::Sender<Permit>>,
capacity: usize,
/// If we have waited longer than this to get a permit, we should abort the search request entirely.
/// The client probably already closed the connection, but we have no way to find out.
time_to_abort: Duration,
}
/// You should only run search requests while holding this permit.
/// Once it's dropped, a new search request will be able to process.
/// You should always try to drop the permit yourself calling the `drop` async method on it.
#[derive(Debug)]
pub struct Permit {
sender: mpsc::Sender<()>,
}
impl Permit {
/// Drop the permit giving back on permit to the search queue.
pub async fn drop(self) {
// if the channel is closed then the whole instance is down
let _ = self.sender.send(()).await;
}
}
impl Drop for Permit {
/// The implicit drop implementation can still be called in multiple cases:
/// - We forgot to call the explicit one somewhere => this should be fixed on our side asap
/// - The future is cancelled while running and the permit dropped with it
fn drop(&mut self) {
let sender = self.sender.clone();
// if the channel is closed then the whole instance is down
@ -53,7 +69,11 @@ impl SearchQueue {
let (sender, receiver) = mpsc::channel(1);
tokio::task::spawn(Self::run(capacity, paralellism, receiver));
Self { sender, capacity }
Self { sender, capacity, time_to_abort: Duration::from_secs(60) }
}
pub fn with_time_to_abort(self, time_to_abort: Duration) -> Self {
Self { time_to_abort, ..self }
}
/// This function is the main loop, it's in charge on scheduling which search request should execute first and
@ -119,9 +139,23 @@ impl SearchQueue {
/// Returns a search `Permit`.
/// It should be dropped as soon as you've freed all the RAM associated with the search request being processed.
pub async fn try_get_search_permit(&self) -> Result<Permit, MeilisearchHttpError> {
let now = std::time::Instant::now();
let (sender, receiver) = oneshot::channel();
self.sender.send(sender).await.map_err(|_| MeilisearchHttpError::SearchLimiterIsDown)?;
receiver.await.map_err(|_| MeilisearchHttpError::TooManySearchRequests(self.capacity))
let permit = receiver
.await
.map_err(|_| MeilisearchHttpError::TooManySearchRequests(self.capacity))?;
// If we've been for more than one minute to get a search permit, it's better to simply
// abort the search request than spending time processing something were the client
// most certainly exited or got a timeout a long time ago.
// We may find a better solution in https://github.com/actix/actix-web/issues/3462.
if now.elapsed() > self.time_to_abort {
permit.drop().await;
Err(MeilisearchHttpError::TooManySearchRequests(self.capacity))
} else {
Ok(permit)
}
}
/// Returns `Ok(())` if everything seems normal.

View File

@ -5,63 +5,3 @@ mod payload;
mod tenant_token;
mod tenant_token_multi_search;
use actix_web::http::StatusCode;
use crate::common::{Server, Value};
use crate::json;
impl Server {
pub fn use_api_key(&mut self, api_key: impl AsRef<str>) {
self.service.api_key = Some(api_key.as_ref().to_string());
}
/// Fetch and use the default admin key for nexts http requests.
pub async fn use_admin_key(&mut self, master_key: impl AsRef<str>) {
self.use_api_key(master_key);
let (response, code) = self.list_api_keys("").await;
assert_eq!(200, code, "{:?}", response);
let admin_key = &response["results"][1]["key"];
self.use_api_key(admin_key.as_str().unwrap());
}
pub async fn add_api_key(&self, content: Value) -> (Value, StatusCode) {
let url = "/keys";
self.service.post(url, content).await
}
pub async fn get_api_key(&self, key: impl AsRef<str>) -> (Value, StatusCode) {
let url = format!("/keys/{}", key.as_ref());
self.service.get(url).await
}
pub async fn patch_api_key(&self, key: impl AsRef<str>, content: Value) -> (Value, StatusCode) {
let url = format!("/keys/{}", key.as_ref());
self.service.patch(url, content).await
}
pub async fn list_api_keys(&self, params: &str) -> (Value, StatusCode) {
let url = format!("/keys{params}");
self.service.get(url).await
}
pub async fn delete_api_key(&self, key: impl AsRef<str>) -> (Value, StatusCode) {
let url = format!("/keys/{}", key.as_ref());
self.service.delete(url).await
}
pub async fn dummy_request(
&self,
method: impl AsRef<str>,
url: impl AsRef<str>,
) -> (Value, StatusCode) {
match method.as_ref() {
"POST" => self.service.post(url, json!({})).await,
"PUT" => self.service.put(url, json!({})).await,
"PATCH" => self.service.patch(url, json!({})).await,
"GET" => self.service.get(url).await,
"DELETE" => self.service.delete(url).await,
_ => unreachable!(),
}
}
}

View File

@ -6,7 +6,7 @@ use once_cell::sync::Lazy;
use time::{Duration, OffsetDateTime};
use super::authorization::{ALL_ACTIONS, AUTHORIZATIONS};
use crate::common::{Server, Value};
use crate::common::{Server, Value, DOCUMENTS};
use crate::json;
fn generate_tenant_token(
@ -22,36 +22,6 @@ fn generate_tenant_token(
.unwrap()
}
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"id": "287947",
"color": ["green", "blue"]
},
{
"title": "Captain Marvel",
"id": "299537",
"color": ["yellow", "blue"]
},
{
"title": "Escape Room",
"id": "522681",
"color": ["yellow", "red"]
},
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": ["green", "red"]
},
{
"title": "Glass",
"id": "450465",
"color": ["blue", "red"]
}
])
});
static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
json!({
"message": null,

View File

@ -1,4 +1,5 @@
use std::fmt::Write;
use std::marker::PhantomData;
use std::panic::{catch_unwind, resume_unwind, UnwindSafe};
use std::time::Duration;
@ -9,19 +10,24 @@ use urlencoding::encode as urlencode;
use super::encoder::Encoder;
use super::service::Service;
use super::Value;
use super::{Owned, Shared};
use crate::json;
pub struct Index<'a> {
pub struct Index<'a, State = Owned> {
pub uid: String,
pub service: &'a Service,
pub encoder: Encoder,
pub(super) encoder: Encoder,
pub(super) marker: PhantomData<State>,
}
#[allow(dead_code)]
impl Index<'_> {
pub async fn get(&self) -> (Value, StatusCode) {
let url = format!("/indexes/{}", urlencode(self.uid.as_ref()));
self.service.get(url).await
impl<'a> Index<'a, Owned> {
pub fn to_shared(&self) -> Index<'a, Shared> {
Index {
uid: self.uid.clone(),
service: self.service,
encoder: self.encoder,
marker: PhantomData,
}
}
pub async fn load_test_set(&self) -> u64 {
@ -57,11 +63,7 @@ impl Index<'_> {
}
pub async fn create(&self, primary_key: Option<&str>) -> (Value, StatusCode) {
let body = json!({
"uid": self.uid,
"primaryKey": primary_key,
});
self.service.post_encoded("/indexes", body, self.encoder).await
self._create(primary_key).await
}
pub async fn update_raw(&self, body: Value) -> (Value, StatusCode) {
@ -88,13 +90,7 @@ impl Index<'_> {
documents: Value,
primary_key: Option<&str>,
) -> (Value, StatusCode) {
let url = match primary_key {
Some(key) => {
format!("/indexes/{}/documents?primaryKey={}", urlencode(self.uid.as_ref()), key)
}
None => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())),
};
self.service.post_encoded(url, documents, self.encoder).await
self._add_documents(documents, primary_key).await
}
pub async fn raw_add_documents(
@ -136,80 +132,11 @@ impl Index<'_> {
}
}
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);
for _ in 0..100 {
let (response, status_code) = self.service.get(&url).await;
assert_eq!(200, status_code, "response: {}", response);
if response["status"] == "succeeded" || response["status"] == "failed" {
return response;
}
// wait 0.5 second.
sleep(Duration::from_millis(500)).await;
}
panic!("Timeout waiting for update id");
}
pub async fn get_task(&self, update_id: u64) -> (Value, StatusCode) {
let url = format!("/tasks/{}", update_id);
self.service.get(url).await
}
pub async fn list_tasks(&self) -> (Value, StatusCode) {
let url = format!("/tasks?indexUids={}", self.uid);
self.service.get(url).await
}
pub async fn filtered_tasks(
&self,
types: &[&str],
statuses: &[&str],
canceled_by: &[&str],
) -> (Value, StatusCode) {
let mut url = format!("/tasks?indexUids={}", self.uid);
if !types.is_empty() {
let _ = write!(url, "&types={}", types.join(","));
}
if !statuses.is_empty() {
let _ = write!(url, "&statuses={}", statuses.join(","));
}
if !canceled_by.is_empty() {
let _ = write!(url, "&canceledBy={}", canceled_by.join(","));
}
self.service.get(url).await
}
pub async fn get_document(&self, id: u64, options: Option<Value>) -> (Value, StatusCode) {
let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
if let Some(options) = options {
write!(url, "{}", yaup::to_string(&options).unwrap()).unwrap();
}
self.service.get(url).await
}
pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref()));
self.service.post(url, payload).await
}
pub async fn get_all_documents_raw(&self, options: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), options);
self.service.get(url).await
}
pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) {
let url = format!(
"/indexes/{}/documents{}",
urlencode(self.uid.as_ref()),
yaup::to_string(&options).unwrap()
);
self.service.get(url).await
}
pub async fn delete_document(&self, id: u64) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
self.service.delete(url).await
@ -237,14 +164,8 @@ impl Index<'_> {
self.service.post_encoded(url, body, self.encoder).await
}
pub async fn settings(&self) -> (Value, StatusCode) {
let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref()));
self.service.get(url).await
}
pub async fn update_settings(&self, settings: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref()));
self.service.patch_encoded(url, settings, self.encoder).await
self._update_settings(settings).await
}
pub async fn update_settings_displayed_attributes(
@ -327,6 +248,146 @@ impl Index<'_> {
self.service.delete(url).await
}
pub async fn update_distinct_attribute(&self, value: Value) -> (Value, StatusCode) {
let url =
format!("/indexes/{}/settings/{}", urlencode(self.uid.as_ref()), "distinct-attribute");
self.service.put_encoded(url, value, self.encoder).await
}
}
impl<'a> Index<'a, Shared> {
/// You cannot modify the content of a shared index, thus the delete_document_by_filter call
/// must fail. If the task successfully enqueue itself, we'll wait for the task to finishes,
/// and if it succeed the function will panic.
pub async fn delete_document_by_filter_fail(&self, body: Value) -> (Value, StatusCode) {
let (mut task, code) = self._delete_document_by_filter(body).await;
if code.is_success() {
task = self.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`delete_document_by_filter_fail` succeeded: {}",
serde_json::to_string_pretty(&task).unwrap()
);
}
}
(task, code)
}
}
#[allow(dead_code)]
impl<State> Index<'_, State> {
pub async fn get(&self) -> (Value, StatusCode) {
let url = format!("/indexes/{}", urlencode(self.uid.as_ref()));
self.service.get(url).await
}
/// add_documents is not allowed on shared index but we need to use it to initialize
/// a bunch of very common indexes in `common/mod.rs`.
pub(super) async fn _add_documents(
&self,
documents: Value,
primary_key: Option<&str>,
) -> (Value, StatusCode) {
let url = match primary_key {
Some(key) => {
format!("/indexes/{}/documents?primaryKey={}", urlencode(self.uid.as_ref()), key)
}
None => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())),
};
self.service.post_encoded(url, documents, self.encoder).await
}
pub(super) async fn _update_settings(&self, settings: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref()));
self.service.patch_encoded(url, settings, self.encoder).await
}
pub(super) async fn _delete_document_by_filter(&self, body: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/delete", urlencode(self.uid.as_ref()));
self.service.post_encoded(url, body, self.encoder).await
}
pub(super) async fn _create(&self, primary_key: Option<&str>) -> (Value, StatusCode) {
let body = json!({
"uid": self.uid,
"primaryKey": primary_key,
});
self.service.post_encoded("/indexes", body, self.encoder).await
}
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);
for _ in 0..100 {
let (response, status_code) = self.service.get(&url).await;
assert_eq!(200, status_code, "response: {}", response);
if response["status"] == "succeeded" || response["status"] == "failed" {
return response;
}
// wait 0.5 second.
sleep(Duration::from_millis(500)).await;
}
panic!("Timeout waiting for update id");
}
pub async fn get_task(&self, update_id: u64) -> (Value, StatusCode) {
let url = format!("/tasks/{}", update_id);
self.service.get(url).await
}
pub async fn filtered_tasks(
&self,
types: &[&str],
statuses: &[&str],
canceled_by: &[&str],
) -> (Value, StatusCode) {
let mut url = format!("/tasks?indexUids={}", self.uid);
if !types.is_empty() {
let _ = write!(url, "&types={}", types.join(","));
}
if !statuses.is_empty() {
let _ = write!(url, "&statuses={}", statuses.join(","));
}
if !canceled_by.is_empty() {
let _ = write!(url, "&canceledBy={}", canceled_by.join(","));
}
self.service.get(url).await
}
pub async fn get_document(&self, id: u64, options: Option<Value>) -> (Value, StatusCode) {
let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
if let Some(options) = options {
write!(url, "{}", yaup::to_string(&options).unwrap()).unwrap();
}
self.service.get(url).await
}
pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref()));
self.service.post(url, payload).await
}
pub async fn get_all_documents_raw(&self, options: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), options);
self.service.get(url).await
}
pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) {
let url = format!(
"/indexes/{}/documents{}",
urlencode(self.uid.as_ref()),
yaup::to_string(&options).unwrap()
);
self.service.get(url).await
}
pub async fn settings(&self) -> (Value, StatusCode) {
let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref()));
self.service.get(url).await
}
pub async fn stats(&self) -> (Value, StatusCode) {
let url = format!("/indexes/{}/stats", urlencode(self.uid.as_ref()));
self.service.get(url).await
@ -411,12 +472,6 @@ impl Index<'_> {
self.service.post_encoded(url, query, self.encoder).await
}
pub async fn update_distinct_attribute(&self, value: Value) -> (Value, StatusCode) {
let url =
format!("/indexes/{}/settings/{}", urlencode(self.uid.as_ref()), "distinct-attribute");
self.service.put_encoded(url, value, self.encoder).await
}
pub async fn get_distinct_attribute(&self) -> (Value, StatusCode) {
let url =
format!("/indexes/{}/settings/{}", urlencode(self.uid.as_ref()), "distinct-attribute");

View File

@ -8,9 +8,16 @@ use std::fmt::{self, Display};
#[allow(unused)]
pub use index::GetAllDocumentsOptions;
use meili_snap::json_string;
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
#[allow(unused)]
pub use server::{default_settings, Server};
use tokio::sync::OnceCell;
use crate::common::index::Index;
pub enum Shared {}
pub enum Owned {}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct Value(pub serde_json::Value);
@ -27,10 +34,20 @@ impl Value {
}
}
/// Return `true` if the `status` field is set to `succeeded`.
/// Panic if the `status` field doesn't exists.
#[track_caller]
pub fn is_success(&self) -> bool {
if !self["status"].is_string() {
panic!("Called `is_success` on {}", serde_json::to_string_pretty(&self.0).unwrap());
}
self["status"] == serde_json::Value::String(String::from("succeeded"))
}
// Panic if the json doesn't contain the `status` field set to "succeeded"
#[track_caller]
pub fn succeeded(&self) -> &Self {
if self["status"] != serde_json::Value::String(String::from("succeeded")) {
if !self.is_success() {
panic!("Called succeeded on {}", serde_json::to_string_pretty(&self.0).unwrap());
}
self
@ -122,3 +139,253 @@ macro_rules! test_post_get_search {
.map_err(|e| panic!("panic in post route: {:?}", e.downcast_ref::<&str>().unwrap()));
};
}
pub async fn shared_does_not_exists_index() -> &'static Index<'static, Shared> {
static INDEX: Lazy<Index<'static, Shared>> = Lazy::new(|| {
let server = Server::new_shared();
server._index("DOES_NOT_EXISTS").to_shared()
});
&INDEX
}
pub async fn shared_empty_index() -> &'static Index<'static, Shared> {
static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
INDEX
.get_or_init(|| async {
let server = Server::new_shared();
let index = server._index("EMPTY_INDEX").to_shared();
let (response, _code) = index._create(None).await;
index.wait_task(response.uid()).await.succeeded();
index
})
.await
}
pub static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"id": "287947",
"color": ["green", "blue"],
"_vectors": { "manual": [1, 2, 3]},
},
{
"title": "Captain Marvel",
"id": "299537",
"color": ["yellow", "blue"],
"_vectors": { "manual": [1, 2, 54] },
},
{
"title": "Escape Room",
"id": "522681",
"color": ["yellow", "red"],
"_vectors": { "manual": [10, -23, 32] },
},
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": ["green", "red"],
"_vectors": { "manual": [-100, 231, 32] },
},
{
"title": "Gläss",
"id": "450465",
"color": ["blue", "red"],
"_vectors": { "manual": [-100, 340, 90] },
}
])
});
pub async fn shared_index_with_documents() -> &'static Index<'static, Shared> {
static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
INDEX.get_or_init(|| async {
let server = Server::new_shared();
let index = server._index("SHARED_DOCUMENTS").to_shared();
let documents = DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
index.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["id", "title"], "sortableAttributes": ["id", "title"]}),
)
.await;
index.wait_task(response.uid()).await.succeeded();
index
}).await
}
pub static SCORE_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
},
{
"title": "Batman Returns",
"id": "C",
},
{
"title": "Batman",
"id": "D",
},
{
"title": "Badman",
"id": "E",
}
])
});
pub static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2,
},
{
"name": "buddy",
"age": 4,
},
],
"cattos": "pésti",
"_vectors": { "manual": [1, 2, 3]},
},
{
"id": 654,
"father": "pierre",
"mother": "sabine",
"doggos": [
{
"name": "gros bill",
"age": 8,
},
],
"cattos": ["simba", "pestiféré"],
"_vectors": { "manual": [1, 2, 54] },
},
{
"id": 750,
"father": "romain",
"mother": "michelle",
"cattos": ["enigma"],
"_vectors": { "manual": [10, 23, 32] },
},
{
"id": 951,
"father": "jean-baptiste",
"mother": "sophie",
"doggos": [
{
"name": "turbo",
"age": 5,
},
{
"name": "fast",
"age": 6,
},
],
"cattos": ["moumoute", "gomez"],
"_vectors": { "manual": [10, 23, 32] },
},
])
});
pub async fn shared_index_with_nested_documents() -> &'static Index<'static, Shared> {
static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
INDEX.get_or_init(|| async {
let server = Server::new_shared();
let index = server._index("SHARED_NESTED_DOCUMENTS").to_shared();
let documents = NESTED_DOCUMENTS.clone();
let (response, _code) = index._add_documents(documents, None).await;
index.wait_task(response.uid()).await.succeeded();
let (response, _code) = index
._update_settings(
json!({"filterableAttributes": ["father", "doggos"], "sortableAttributes": ["doggos"]}),
)
.await;
index.wait_task(response.uid()).await.succeeded();
index
}).await
}
pub static FRUITS_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"name": "Exclusive sale: green apple",
"id": "green-apple-boosted",
"BOOST": true
},
{
"name": "Pear",
"id": "pear",
},
{
"name": "Red apple gala",
"id": "red-apple-gala",
},
{
"name": "Exclusive sale: Red Tomato",
"id": "red-tomatoes-boosted",
"BOOST": true
},
{
"name": "Exclusive sale: Red delicious apple",
"id": "red-delicious-boosted",
"BOOST": true,
}
])
});
pub static VECTOR_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": "A",
"description": "the dog barks at the cat",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.9, 0.8, 0.05],
// dimensions [negative/positive, energy]
"sentiment": [-0.1, 0.55]
}
},
{
"id": "B",
"description": "the kitten scratched the beagle",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.8, 0.9, 0.5],
// dimensions [negative/positive, energy]
"sentiment": [-0.2, 0.65]
}
},
{
"id": "C",
"description": "the dog had to stay alone today",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.85, 0.02, 0.1],
// dimensions [negative/positive, energy]
"sentiment": [-1.0, 0.1]
}
},
{
"id": "D",
"description": "the little boy pets the puppy",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.8, 0.09, 0.8],
// dimensions [negative/positive, energy]
"sentiment": [0.8, 0.3]
}
},
])
});

View File

@ -1,7 +1,8 @@
#![allow(dead_code)]
use std::marker::PhantomData;
use std::path::Path;
use std::str::FromStr as _;
use std::str::FromStr;
use std::time::Duration;
use actix_http::body::MessageBody;
@ -10,28 +11,34 @@ use actix_web::http::StatusCode;
use byte_unit::{Byte, Unit};
use clap::Parser;
use meilisearch::option::{IndexerOpts, MaxMemory, MaxThreads, Opt};
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer};
use meilisearch::setup_meilisearch;
use once_cell::sync::Lazy;
use tempfile::TempDir;
use tokio::sync::OnceCell;
use tokio::time::sleep;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use uuid::Uuid;
use super::index::Index;
use super::service::Service;
use super::{Owned, Shared};
use crate::common::encoder::Encoder;
use crate::common::Value;
use crate::json;
pub struct Server {
pub struct Server<State = Owned> {
pub service: Service,
// hold ownership to the tempdir while we use the server instance.
_dir: Option<TempDir>,
_marker: PhantomData<State>,
}
pub static TEST_TEMP_DIR: Lazy<TempDir> = Lazy::new(|| TempDir::new().unwrap());
impl Server {
impl Server<Owned> {
fn into_shared(self) -> Server<Shared> {
Server { service: self.service, _dir: self._dir, _marker: PhantomData }
}
pub async fn new() -> Self {
let dir = TempDir::new().unwrap();
@ -46,7 +53,7 @@ impl Server {
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
let service = Service { index_scheduler, auth, options, api_key: None };
Server { service, _dir: Some(dir) }
Server { service, _dir: Some(dir), _marker: PhantomData }
}
pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self {
@ -61,7 +68,7 @@ impl Server {
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
let service = Service { index_scheduler, auth, options, api_key: None };
Server { service, _dir: Some(dir) }
Server { service, _dir: Some(dir), _marker: PhantomData }
}
pub async fn new_auth() -> Self {
@ -74,38 +81,35 @@ impl Server {
let (index_scheduler, auth) = setup_meilisearch(&options)?;
let service = Service { index_scheduler, auth, options, api_key: None };
Ok(Server { service, _dir: None })
Ok(Server { service, _dir: None, _marker: PhantomData })
}
pub async fn init_web_app(
&self,
) -> impl actix_web::dev::Service<
actix_http::Request,
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
> {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let (_stderr_layer, stderr_layer_handle) = tracing_subscriber::reload::Layer::new(
(Box::new(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE),
)
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
.with_filter(tracing_subscriber::filter::Targets::new()),
);
pub fn use_api_key(&mut self, api_key: impl AsRef<str>) {
self.service.api_key = Some(api_key.as_ref().to_string());
}
actix_web::test::init_service(create_app(
self.service.index_scheduler.clone().into(),
self.service.auth.clone().into(),
self.service.options.clone(),
(route_layer_handle, stderr_layer_handle),
analytics::MockAnalytics::new(&self.service.options),
true,
))
.await
/// Fetch and use the default admin key for nexts http requests.
pub async fn use_admin_key(&mut self, master_key: impl AsRef<str>) {
self.use_api_key(master_key);
let (response, code) = self.list_api_keys("").await;
assert_eq!(200, code, "{:?}", response);
let admin_key = &response["results"][1]["key"];
self.use_api_key(admin_key.as_str().unwrap());
}
pub async fn add_api_key(&self, content: Value) -> (Value, StatusCode) {
let url = "/keys";
self.service.post(url, content).await
}
pub async fn patch_api_key(&self, key: impl AsRef<str>, content: Value) -> (Value, StatusCode) {
let url = format!("/keys/{}", key.as_ref());
self.service.patch(url, content).await
}
pub async fn delete_api_key(&self, key: impl AsRef<str>) -> (Value, StatusCode) {
let url = format!("/keys/{}", key.as_ref());
self.service.delete(url).await
}
/// Returns a view to an index. There is no guarantee that the index exists.
@ -118,15 +122,12 @@ impl Server {
}
pub fn index_with_encoder(&self, uid: impl AsRef<str>, encoder: Encoder) -> Index<'_> {
Index { uid: uid.as_ref().to_string(), service: &self.service, encoder }
}
pub async fn multi_search(&self, queries: Value) -> (Value, StatusCode) {
self.service.post("/multi-search", queries).await
}
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
self.service.get(format!("/indexes{parameters}")).await
Index {
uid: uid.as_ref().to_string(),
service: &self.service,
encoder,
marker: PhantomData,
}
}
pub async fn list_indexes(
@ -150,10 +151,6 @@ impl Server {
}
}
pub async fn version(&self) -> (Value, StatusCode) {
self.service.get("/version").await
}
pub async fn stats(&self) -> (Value, StatusCode) {
self.service.get("/stats").await
}
@ -162,12 +159,174 @@ impl Server {
self.service.get("/tasks").await
}
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
self.service.patch("/experimental-features", value).await
}
pub async fn get_metrics(&self) -> (Value, StatusCode) {
self.service.get("/metrics").await
}
}
impl Server<Shared> {
fn init_new_shared_instance() -> Server<Shared> {
let dir = TempDir::new().unwrap();
if cfg!(windows) {
std::env::set_var("TMP", TEST_TEMP_DIR.path());
} else {
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
}
let options = default_settings(dir.path());
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
let service = Service { index_scheduler, auth, api_key: None, options };
Server { service, _dir: Some(dir), _marker: PhantomData }
}
pub fn new_shared() -> &'static Server<Shared> {
static SERVER: Lazy<Server<Shared>> = Lazy::new(Server::init_new_shared_instance);
&SERVER
}
pub async fn new_shared_with_admin_key() -> &'static Server<Shared> {
static SERVER: OnceCell<Server<Shared>> = OnceCell::const_new();
SERVER
.get_or_init(|| async {
let mut server = Server::new_auth().await;
server.use_admin_key("MASTER_KEY").await;
server.into_shared()
})
.await
}
/// You shouldn't access random indexes on a shared instance thus this method
/// must fail.
pub async fn get_index_fail(&self, uid: impl AsRef<str>) -> (Value, StatusCode) {
let url = format!("/indexes/{}", urlencoding::encode(uid.as_ref()));
let (value, code) = self.service.get(url).await;
if code.is_success() {
panic!("`get_index_fail` succeeded with uid: {}", uid.as_ref());
}
(value, code)
}
pub async fn delete_index_fail(&self, uid: impl AsRef<str>) -> (Value, StatusCode) {
let url = format!("/indexes/{}", urlencoding::encode(uid.as_ref()));
let (value, code) = self.service.delete(url).await;
if code.is_success() {
panic!("`delete_index_fail` succeeded with uid: {}", uid.as_ref());
}
(value, code)
}
pub async fn update_raw_index_fail(
&self,
uid: impl AsRef<str>,
body: Value,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}", urlencoding::encode(uid.as_ref()));
let (value, code) = self.service.patch_encoded(url, body, Encoder::Plain).await;
if code.is_success() {
panic!("`update_raw_index_fail` succeeded with uid: {}", uid.as_ref());
}
(value, code)
}
/// Since this call updates the state of the instance, it must fail.
/// If it doesn't fail, the test will panic to help you debug what
/// is going on.
pub async fn create_index_fail(&self, body: Value) -> (Value, StatusCode) {
let (mut task, code) = self._create_index(body).await;
if code.is_success() {
task = self.wait_task(task.uid()).await;
if task.is_success() {
panic!(
"`create_index_fail` succeeded: {}",
serde_json::to_string_pretty(&task).unwrap()
);
}
}
(task, code)
}
}
impl<State> Server<State> {
pub async fn init_web_app(
&self,
) -> impl actix_web::dev::Service<
actix_http::Request,
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
> {
self.service.init_web_app().await
}
pub async fn list_api_keys(&self, params: &str) -> (Value, StatusCode) {
let url = format!("/keys{params}");
self.service.get(url).await
}
pub async fn dummy_request(
&self,
method: impl AsRef<str>,
url: impl AsRef<str>,
) -> (Value, StatusCode) {
match method.as_ref() {
"POST" => self.service.post(url, json!({})).await,
"PUT" => self.service.put(url, json!({})).await,
"PATCH" => self.service.patch(url, json!({})).await,
"GET" => self.service.get(url).await,
"DELETE" => self.service.delete(url).await,
_ => unreachable!(),
}
}
pub async fn get_api_key(&self, key: impl AsRef<str>) -> (Value, StatusCode) {
let url = format!("/keys/{}", key.as_ref());
self.service.get(url).await
}
pub(super) fn _index(&self, uid: impl AsRef<str>) -> Index<'_> {
Index {
uid: uid.as_ref().to_string(),
service: &self.service,
encoder: Encoder::Plain,
marker: PhantomData,
}
}
/// Returns a view to an index. There is no guarantee that the index exists.
pub fn unique_index(&self) -> Index<'_> {
let uuid = Uuid::new_v4();
Index {
uid: uuid.to_string(),
service: &self.service,
encoder: Encoder::Plain,
marker: PhantomData,
}
}
pub(super) async fn _create_index(&self, body: Value) -> (Value, StatusCode) {
self.service.post("/indexes", body).await
}
pub async fn multi_search(&self, queries: Value) -> (Value, StatusCode) {
self.service.post("/multi-search", queries).await
}
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
self.service.get(format!("/indexes{parameters}")).await
}
pub async fn tasks_filter(&self, filter: &str) -> (Value, StatusCode) {
self.service.get(format!("/tasks?{}", filter)).await
}
pub async fn get_dump_status(&self, uid: &str) -> (Value, StatusCode) {
self.service.get(format!("/dumps/{}/status", uid)).await
pub async fn version(&self) -> (Value, StatusCode) {
self.service.get("/version").await
}
pub async fn create_dump(&self) -> (Value, StatusCode) {
@ -215,14 +374,6 @@ impl Server {
pub async fn get_features(&self) -> (Value, StatusCode) {
self.service.get("/experimental-features").await
}
pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
self.service.patch("/experimental-features", value).await
}
pub async fn get_metrics(&self) -> (Value, StatusCode) {
self.service.get("/metrics").await
}
}
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
@ -240,7 +391,8 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
// memory has to be unlimited because several meilisearch are running in test context.
max_indexing_memory: MaxMemory::unlimited(),
skip_index_budget: true,
max_indexing_threads: MaxThreads::from_str("1").unwrap(),
// Having 2 threads makes the tests way faster
max_indexing_threads: MaxThreads::from_str("2").unwrap(),
},
experimental_enable_metrics: false,
..Parser::parse_from(None as Option<&str>)

View File

@ -1,10 +1,15 @@
use std::num::NonZeroUsize;
use std::sync::Arc;
use actix_web::body::MessageBody;
use actix_web::dev::ServiceResponse;
use actix_web::http::header::ContentType;
use actix_web::http::StatusCode;
use actix_web::test;
use actix_web::test::TestRequest;
use actix_web::web::Data;
use index_scheduler::IndexScheduler;
use meilisearch::search_queue::SearchQueue;
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
use meilisearch_auth::AuthController;
use tracing::level_filters::LevelFilter;
@ -106,7 +111,13 @@ impl Service {
self.request(req).await
}
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
pub async fn init_web_app(
&self,
) -> impl actix_web::dev::Service<
actix_http::Request,
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
> {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
@ -119,16 +130,25 @@ impl Service {
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
.with_filter(tracing_subscriber::filter::Targets::new()),
);
let search_queue = SearchQueue::new(
self.options.experimental_search_queue_size,
NonZeroUsize::new(1).unwrap(),
);
let app = test::init_service(create_app(
actix_web::test::init_service(create_app(
self.index_scheduler.clone().into(),
self.auth.clone().into(),
Data::new(search_queue),
self.options.clone(),
(route_layer_handle, stderr_layer_handle),
analytics::MockAnalytics::new(&self.options),
true,
))
.await;
.await
}
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
let app = self.init_web_app().await;
if let Some(api_key) = &self.api_key {
req = req.insert_header(("Authorization", ["Bearer ", api_key].concat()));

View File

@ -6,6 +6,7 @@ use actix_web::test;
use crate::common::{Server, Value};
#[derive(Debug)]
enum HttpVerb {
Put,
Patch,
@ -80,7 +81,7 @@ async fn error_json_bad_content_type() {
let status_code = res.status();
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
assert_eq!(status_code, 415, "calling the route `{}` without content-type is supposed to throw a bad media type error", route);
assert_eq!(status_code, 415, "calling the route `{verb:?} {route}` without content-type is supposed to throw a bad media type error:\n{}", String::from_utf8_lossy(&body));
assert_eq!(
response,
json!({

View File

@ -1,14 +1,15 @@
use meili_snap::*;
use urlencoding::encode;
use crate::common::Server;
use crate::common::{
shared_does_not_exists_index, shared_empty_index, shared_index_with_documents, Server,
};
use crate::json;
#[actix_rt::test]
async fn get_all_documents_bad_offset() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.get_all_documents_raw("?offset").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -45,9 +46,8 @@ async fn get_all_documents_bad_offset() {
#[actix_rt::test]
async fn get_all_documents_bad_limit() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.get_all_documents_raw("?limit").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -84,17 +84,13 @@ async fn get_all_documents_bad_limit() {
#[actix_rt::test]
async fn get_all_documents_bad_filter() {
let server = Server::new().await;
let index = server.index("test");
let index = shared_does_not_exists_index().await;
// Since the filter can't be parsed automatically by deserr, we have the wrong error message
// if the index does not exist: we could expect to get an error message about the invalid filter before
// the existence of the index is checked, but it is not the case.
let (response, code) = index.get_all_documents_raw("?filter").await;
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test` not found.",
"message": "Index `DOES_NOT_EXISTS` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
@ -105,7 +101,7 @@ async fn get_all_documents_bad_filter() {
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test` not found.",
"message": "Index `DOES_NOT_EXISTS` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
@ -116,42 +112,14 @@ async fn get_all_documents_bad_filter() {
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test` not found.",
"message": "Index `DOES_NOT_EXISTS` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}
"###);
let (response, code) = index.create(None).await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "test",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
let response = server.wait_task(0).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###"
{
"uid": 0,
"indexUid": "test",
"status": "succeeded",
"type": "indexCreation",
"canceledBy": null,
"details": {
"primaryKey": null
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let index = shared_empty_index().await;
let (response, code) = index.get_all_documents_raw("?filter").await;
snapshot!(code, @"200 OK");
@ -168,7 +136,7 @@ async fn get_all_documents_bad_filter() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -189,9 +157,8 @@ async fn get_all_documents_bad_filter() {
#[actix_rt::test]
async fn delete_documents_batch() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.delete_batch_raw(json!("doggo")).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -206,9 +173,8 @@ async fn delete_documents_batch() {
#[actix_rt::test]
async fn replace_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) =
index.raw_add_documents("", vec![("Content-Type", "application/json")], "").await;
snapshot!(code, @"400 Bad Request");
@ -248,9 +214,8 @@ async fn replace_documents_missing_payload() {
#[actix_rt::test]
async fn update_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.raw_update_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -287,9 +252,8 @@ async fn update_documents_missing_payload() {
#[actix_rt::test]
async fn replace_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.raw_add_documents("", Vec::new(), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
@ -316,9 +280,8 @@ async fn replace_documents_missing_content_type() {
#[actix_rt::test]
async fn update_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.raw_update_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
@ -345,9 +308,8 @@ async fn update_documents_missing_content_type() {
#[actix_rt::test]
async fn replace_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.raw_add_documents("", vec![("Content-Type", "doggo")], "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
@ -362,9 +324,8 @@ async fn replace_documents_bad_content_type() {
#[actix_rt::test]
async fn update_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.raw_update_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
@ -379,9 +340,8 @@ async fn update_documents_bad_content_type() {
#[actix_rt::test]
async fn replace_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index
.raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter")
.await;
@ -428,9 +388,8 @@ async fn replace_documents_bad_csv_delimiter() {
#[actix_rt::test]
async fn update_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
@ -475,9 +434,8 @@ async fn update_documents_bad_csv_delimiter() {
#[actix_rt::test]
async fn replace_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index
.raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=a")
.await;
@ -507,9 +465,8 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() {
#[actix_rt::test]
async fn update_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
@ -537,13 +494,12 @@ async fn update_documents_csv_delimiter_with_bad_content_type() {
#[actix_rt::test]
async fn delete_document_by_filter() {
let server = Server::new().await;
let index = server.index("doggo");
let server = Server::new_shared();
let index = server.unique_index();
// send a bad payload type
let (response, code) = index.delete_document_by_filter(json!("hello")).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
snapshot!(response, @r###"
{
"message": "Invalid value type: expected an object, but found a string: `\"hello\"`",
"code": "bad_request",
@ -555,7 +511,7 @@ async fn delete_document_by_filter() {
// send bad payload type
let (response, code) = index.delete_document_by_filter(json!({ "filter": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
snapshot!(response, @r###"
{
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
"code": "invalid_document_filter",
@ -567,9 +523,9 @@ async fn delete_document_by_filter() {
// send bad filter
let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -579,7 +535,7 @@ async fn delete_document_by_filter() {
// send empty filter
let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
snapshot!(response, @r###"
{
"message": "Sending an empty filter is forbidden.",
"code": "invalid_document_filter",
@ -591,7 +547,7 @@ async fn delete_document_by_filter() {
// do not send any filter
let (response, code) = index.delete_document_by_filter(json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
snapshot!(response, @r###"
{
"message": "Missing field `filter`",
"code": "missing_document_filter",
@ -600,15 +556,14 @@ async fn delete_document_by_filter() {
}
"###);
let index = shared_does_not_exists_index().await;
// index does not exists
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
let (response, _code) =
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"})).await;
snapshot!(response, @r###"
{
"uid": 0,
"indexUid": "doggo",
"uid": "[uid]",
"indexUid": "DOES_NOT_EXISTS",
"status": "failed",
"type": "documentDeletion",
"canceledBy": null,
@ -618,7 +573,7 @@ async fn delete_document_by_filter() {
"originalFilter": "\"doggo = bernese\""
},
"error": {
"message": "Index `doggo` not found.",
"message": "Index `DOES_NOT_EXISTS` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
@ -630,19 +585,14 @@ async fn delete_document_by_filter() {
}
"###);
let (response, code) = index.create(None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response["taskUid"].as_u64().unwrap()).await;
// no filterable are set
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
let index = shared_empty_index().await;
let (response, _code) =
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"})).await;
snapshot!(response, @r###"
{
"uid": 2,
"indexUid": "doggo",
"uid": "[uid]",
"indexUid": "EMPTY_INDEX",
"status": "failed",
"type": "documentDeletion",
"canceledBy": null,
@ -664,19 +614,16 @@ async fn delete_document_by_filter() {
}
"###);
let (response, code) = index.update_settings_filterable_attributes(json!(["doggo"])).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response["taskUid"].as_u64().unwrap()).await;
// not filterable while there is a filterable attribute
let index = shared_index_with_documents().await;
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "catto = jorts"})).await;
index.delete_document_by_filter_fail(json!({ "filter": "catto = jorts"})).await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
let response = server.wait_task(response.uid()).await;
snapshot!(response, @r###"
{
"uid": 4,
"indexUid": "doggo",
"uid": "[uid]",
"indexUid": "SHARED_DOCUMENTS",
"status": "failed",
"type": "documentDeletion",
"canceledBy": null,
@ -686,7 +633,7 @@ async fn delete_document_by_filter() {
"originalFilter": "\"catto = jorts\""
},
"error": {
"message": "Attribute `catto` is not filterable. Available filterable attributes are: `doggo`.\n1:6 catto = jorts",
"message": "Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -701,10 +648,10 @@ async fn delete_document_by_filter() {
#[actix_rt::test]
async fn fetch_document_by_filter() {
let server = Server::new().await;
let index = server.index("doggo");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings_filterable_attributes(json!(["color"])).await;
index
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
@ -715,7 +662,7 @@ async fn fetch_document_by_filter() {
Some("id"),
)
.await;
index.wait_task(1).await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.get_document_by_filter(json!(null)).await;
snapshot!(code, @"400 Bad Request");
@ -776,7 +723,7 @@ async fn fetch_document_by_filter() {
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -798,8 +745,7 @@ async fn fetch_document_by_filter() {
#[actix_rt::test]
async fn retrieve_vectors() {
let server = Server::new().await;
let index = server.index("doggo");
let index = shared_empty_index().await;
// GET ALL DOCUMENTS BY QUERY
let (response, _code) = index.get_all_documents_raw("?retrieveVectors=tamo").await;

View File

@ -2097,7 +2097,8 @@ async fn generate_and_import_dump_containing_vectors() {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}}"
"documentTemplate": "{{doc.doggo}}",
"documentTemplateMaxBytes": 400
}
},
"searchCutoffMs": null,

View File

@ -5,7 +5,7 @@ use crate::json;
#[actix_rt::test]
async fn get_indexes_bad_offset() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.list_indexes_raw("?offset=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -21,7 +21,7 @@ async fn get_indexes_bad_offset() {
#[actix_rt::test]
async fn get_indexes_bad_limit() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.list_indexes_raw("?limit=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -37,7 +37,7 @@ async fn get_indexes_bad_limit() {
#[actix_rt::test]
async fn get_indexes_unknown_field() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.list_indexes_raw("?doggo=nolimit").await;
snapshot!(code, @"400 Bad Request");
@ -53,9 +53,9 @@ async fn get_indexes_unknown_field() {
#[actix_rt::test]
async fn create_index_missing_uid() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.create_index(json!({ "primaryKey": "doggo" })).await;
let (response, code) = server.create_index_fail(json!({ "primaryKey": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -69,9 +69,9 @@ async fn create_index_missing_uid() {
#[actix_rt::test]
async fn create_index_bad_uid() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.create_index(json!({ "uid": "the best doggo" })).await;
let (response, code) = server.create_index_fail(json!({ "uid": "the best doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -82,7 +82,7 @@ async fn create_index_bad_uid() {
}
"###);
let (response, code) = server.create_index(json!({ "uid": true })).await;
let (response, code) = server.create_index_fail(json!({ "uid": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -96,10 +96,10 @@ async fn create_index_bad_uid() {
#[actix_rt::test]
async fn create_index_bad_primary_key() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server
.create_index(json!({ "uid": "doggo", "primaryKey": ["the", "best", "doggo"] }))
.create_index_fail(json!({ "uid": "doggo", "primaryKey": ["the", "best", "doggo"] }))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -114,9 +114,10 @@ async fn create_index_bad_primary_key() {
#[actix_rt::test]
async fn create_index_unknown_field() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.create_index(json!({ "uid": "doggo", "doggo": "bernese" })).await;
let (response, code) =
server.create_index_fail(json!({ "uid": "doggo", "doggo": "bernese" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -130,10 +131,8 @@ async fn create_index_unknown_field() {
#[actix_rt::test]
async fn get_index_bad_uid() {
let server = Server::new().await;
let index = server.index("the good doggo");
let (response, code) = index.get().await;
let server = Server::new_shared();
let (response, code) = server.get_index_fail("the good doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -147,9 +146,8 @@ async fn get_index_bad_uid() {
#[actix_rt::test]
async fn update_index_bad_primary_key() {
let server = Server::new().await;
let index = server.index("doggo");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_raw(json!({ "primaryKey": ["doggo"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -164,9 +162,8 @@ async fn update_index_bad_primary_key() {
#[actix_rt::test]
async fn update_index_immutable_uid() {
let server = Server::new().await;
let index = server.index("doggo");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_raw(json!({ "uid": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -181,9 +178,8 @@ async fn update_index_immutable_uid() {
#[actix_rt::test]
async fn update_index_immutable_created_at() {
let server = Server::new().await;
let index = server.index("doggo");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_raw(json!({ "createdAt": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -198,9 +194,8 @@ async fn update_index_immutable_created_at() {
#[actix_rt::test]
async fn update_index_immutable_updated_at() {
let server = Server::new().await;
let index = server.index("doggo");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_raw(json!({ "updatedAt": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -215,9 +210,8 @@ async fn update_index_immutable_updated_at() {
#[actix_rt::test]
async fn update_index_unknown_field() {
let server = Server::new().await;
let index = server.index("doggo");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_raw(json!({ "doggo": "bork" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -232,10 +226,9 @@ async fn update_index_unknown_field() {
#[actix_rt::test]
async fn update_index_bad_uid() {
let server = Server::new().await;
let index = server.index("the good doggo");
let (response, code) = index.update_raw(json!({ "primaryKey": "doggo" })).await;
let server = Server::new_shared();
let (response, code) =
server.update_raw_index_fail("the good doggo", json!({ "primaryKey": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -249,10 +242,8 @@ async fn update_index_bad_uid() {
#[actix_rt::test]
async fn delete_index_bad_uid() {
let server = Server::new().await;
let index = server.index("the good doggo");
let (response, code) = index.delete().await;
let server = Server::new_shared();
let (response, code) = server.delete_index_fail("the good doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{

View File

@ -5,7 +5,7 @@ use crate::json;
#[actix_rt::test]
async fn logs_stream_bad_target() {
let server = Server::new().await;
let server = Server::new_shared();
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await;
@ -58,7 +58,7 @@ async fn logs_stream_bad_target() {
#[actix_rt::test]
async fn logs_stream_bad_mode() {
let server = Server::new().await;
let server = Server::new_shared();
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await;
@ -99,7 +99,7 @@ async fn logs_stream_bad_mode() {
#[actix_rt::test]
async fn logs_stream_bad_profile_memory() {
let server = Server::new().await;
let server = Server::new_shared();
// Wrong type
let (response, code) =
@ -156,7 +156,7 @@ async fn logs_stream_bad_profile_memory() {
#[actix_rt::test]
async fn logs_stream_without_enabling_the_route() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.service.post("/logs/stream", json!({})).await;
snapshot!(code, @"400 Bad Request");

View File

@ -1,10 +1,13 @@
mod error;
use std::num::NonZeroUsize;
use std::rc::Rc;
use std::str::FromStr;
use actix_web::http::header::ContentType;
use actix_web::web::Data;
use meili_snap::snapshot;
use meilisearch::search_queue::SearchQueue;
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt;
@ -40,10 +43,15 @@ async fn basic_test_log_stream_route() {
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("OFF").unwrap()),
);
let search_queue = SearchQueue::new(
server.service.options.experimental_search_queue_size,
NonZeroUsize::new(1).unwrap(),
);
let app = actix_web::test::init_service(create_app(
server.service.index_scheduler.clone().into(),
server.service.auth.clone().into(),
Data::new(search_queue),
server.service.options.clone(),
(route_layer_handle, stderr_layer_handle),
analytics::MockAnalytics::new(&server.service.options),

View File

@ -1,16 +1,13 @@
use meili_snap::*;
use super::DOCUMENTS;
use crate::common::Server;
use crate::common::{shared_does_not_exists_index, Server};
use crate::json;
#[actix_rt::test]
async fn search_unexisting_index() {
let server = Server::new().await;
let index = server.index("test");
let index = shared_does_not_exists_index().await;
let expected_response = json!({
"message": "Index `test` not found.",
"message": "Index `DOES_NOT_EXISTS` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
@ -26,8 +23,8 @@ async fn search_unexisting_index() {
#[actix_rt::test]
async fn search_unexisting_parameter() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index
.search(json!({"marin": "hello"}), |response, code| {
@ -39,8 +36,8 @@ async fn search_unexisting_parameter() {
#[actix_rt::test]
async fn search_bad_q() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"q": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
@ -57,8 +54,8 @@ async fn search_bad_q() {
#[actix_rt::test]
async fn search_bad_offset() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"offset": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -85,8 +82,8 @@ async fn search_bad_offset() {
#[actix_rt::test]
async fn search_bad_limit() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"limit": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -113,8 +110,8 @@ async fn search_bad_limit() {
#[actix_rt::test]
async fn search_bad_page() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"page": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -141,8 +138,8 @@ async fn search_bad_page() {
#[actix_rt::test]
async fn search_bad_hits_per_page() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"hitsPerPage": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -169,8 +166,8 @@ async fn search_bad_hits_per_page() {
#[actix_rt::test]
async fn search_bad_attributes_to_retrieve() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"attributesToRetrieve": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -187,8 +184,8 @@ async fn search_bad_attributes_to_retrieve() {
#[actix_rt::test]
async fn search_bad_retrieve_vectors() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"retrieveVectors": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -237,8 +234,8 @@ async fn search_bad_retrieve_vectors() {
#[actix_rt::test]
async fn search_bad_attributes_to_crop() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"attributesToCrop": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -255,8 +252,8 @@ async fn search_bad_attributes_to_crop() {
#[actix_rt::test]
async fn search_bad_crop_length() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"cropLength": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -283,8 +280,8 @@ async fn search_bad_crop_length() {
#[actix_rt::test]
async fn search_bad_attributes_to_highlight() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"attributesToHighlight": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -303,14 +300,12 @@ async fn search_bad_attributes_to_highlight() {
async fn search_bad_filter() {
// Since a filter is deserialized as a json Value it will never fail to deserialize.
// Thus the error message is not generated by deserr but written by us.
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
// index does not exists error.
let (_, code) = index.create(None).await;
server.wait_task(0).await;
snapshot!(code, @"202 Accepted");
let (response, _code) = index.create(None).await;
server.wait_task(response.uid()).await.succeeded();
let (response, code) = index.search_post(json!({ "filter": true })).await;
snapshot!(code, @"400 Bad Request");
@ -327,8 +322,8 @@ async fn search_bad_filter() {
#[actix_rt::test]
async fn search_bad_sort() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"sort": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -345,8 +340,8 @@ async fn search_bad_sort() {
#[actix_rt::test]
async fn search_bad_show_matches_position() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"showMatchesPosition": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -373,8 +368,8 @@ async fn search_bad_show_matches_position() {
#[actix_rt::test]
async fn search_bad_facets() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"facets": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -391,8 +386,8 @@ async fn search_bad_facets() {
#[actix_rt::test]
async fn search_bad_threshold() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"rankingScoreThreshold": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -408,8 +403,8 @@ async fn search_bad_threshold() {
#[actix_rt::test]
async fn search_invalid_threshold() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"rankingScoreThreshold": 42})).await;
snapshot!(code, @"400 Bad Request");
@ -425,11 +420,11 @@ async fn search_invalid_threshold() {
#[actix_rt::test]
async fn search_non_filterable_facets() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let server = Server::new_shared();
let index = server.unique_index();
let (response, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
// Wait for the settings update to complete
index.wait_task(0).await;
index.wait_task(response.uid()).await.succeeded();
let (response, code) = index.search_post(json!({"facets": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
@ -456,10 +451,11 @@ async fn search_non_filterable_facets() {
#[actix_rt::test]
async fn search_non_filterable_facets_multiple_filterable() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"filterableAttributes": ["title", "genres"]})).await;
index.wait_task(0).await;
let server = Server::new_shared();
let index = server.unique_index();
let (response, _code) =
index.update_settings(json!({"filterableAttributes": ["title", "genres"]})).await;
index.wait_task(response.uid()).await.succeeded();
let (response, code) = index.search_post(json!({"facets": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
@ -486,10 +482,10 @@ async fn search_non_filterable_facets_multiple_filterable() {
#[actix_rt::test]
async fn search_non_filterable_facets_no_filterable() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"filterableAttributes": []})).await;
index.wait_task(0).await;
let server = Server::new_shared();
let index = server.unique_index();
let (response, _code) = index.update_settings(json!({"filterableAttributes": []})).await;
index.wait_task(response.uid()).await.succeeded();
let (response, code) = index.search_post(json!({"facets": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
@ -516,10 +512,11 @@ async fn search_non_filterable_facets_no_filterable() {
#[actix_rt::test]
async fn search_non_filterable_facets_multiple_facets() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"filterableAttributes": ["title", "genres"]})).await;
index.wait_task(0).await;
let server = Server::new_shared();
let index = server.unique_index();
let (response, _uid) =
index.update_settings(json!({"filterableAttributes": ["title", "genres"]})).await;
index.wait_task(response.uid()).await.succeeded();
let (response, code) = index.search_post(json!({"facets": ["doggo", "neko"]})).await;
snapshot!(code, @"400 Bad Request");
@ -546,8 +543,8 @@ async fn search_non_filterable_facets_multiple_facets() {
#[actix_rt::test]
async fn search_bad_highlight_pre_tag() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"highlightPreTag": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
@ -564,8 +561,8 @@ async fn search_bad_highlight_pre_tag() {
#[actix_rt::test]
async fn search_bad_highlight_post_tag() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"highlightPostTag": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
@ -582,8 +579,8 @@ async fn search_bad_highlight_post_tag() {
#[actix_rt::test]
async fn search_bad_crop_marker() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"cropMarker": ["doggo"]})).await;
snapshot!(code, @"400 Bad Request");
@ -600,8 +597,8 @@ async fn search_bad_crop_marker() {
#[actix_rt::test]
async fn search_bad_matching_strategy() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.search_post(json!({"matchingStrategy": "doggo"})).await;
snapshot!(code, @"400 Bad Request");
@ -639,20 +636,17 @@ async fn search_bad_matching_strategy() {
#[actix_rt::test]
async fn filter_invalid_syntax_object() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, None).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
index
.search(json!({"filter": "title & Glass"}), |response, code| {
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -665,20 +659,17 @@ async fn filter_invalid_syntax_object() {
#[actix_rt::test]
async fn filter_invalid_syntax_array() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, None).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
index
.search(json!({"filter": ["title & Glass"]}), |response, code| {
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -691,14 +682,11 @@ async fn filter_invalid_syntax_array() {
#[actix_rt::test]
async fn filter_invalid_syntax_string() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
@ -716,14 +704,11 @@ async fn filter_invalid_syntax_string() {
#[actix_rt::test]
async fn filter_invalid_attribute_array() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
@ -741,14 +726,11 @@ async fn filter_invalid_attribute_array() {
#[actix_rt::test]
async fn filter_invalid_attribute_string() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
@ -766,14 +748,11 @@ async fn filter_invalid_attribute_string() {
#[actix_rt::test]
async fn filter_reserved_geo_attribute_array() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
@ -791,14 +770,11 @@ async fn filter_reserved_geo_attribute_array() {
#[actix_rt::test]
async fn filter_reserved_geo_attribute_string() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
@ -816,14 +792,11 @@ async fn filter_reserved_geo_attribute_string() {
#[actix_rt::test]
async fn filter_reserved_attribute_array() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
@ -841,14 +814,11 @@ async fn filter_reserved_attribute_array() {
#[actix_rt::test]
async fn filter_reserved_attribute_string() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
@ -866,14 +836,11 @@ async fn filter_reserved_attribute_string() {
#[actix_rt::test]
async fn filter_reserved_geo_point_array() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
@ -891,14 +858,11 @@ async fn filter_reserved_geo_point_array() {
#[actix_rt::test]
async fn filter_reserved_geo_point_string() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
@ -916,14 +880,11 @@ async fn filter_reserved_geo_point_string() {
#[actix_rt::test]
async fn sort_geo_reserved_attribute() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"sortableAttributes": ["id"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"sortableAttributes": ["id"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a sort expression. Use the _geoPoint(latitude, longitude) built-in rule to sort on _geo field coordinates.",
@ -946,14 +907,11 @@ async fn sort_geo_reserved_attribute() {
#[actix_rt::test]
async fn sort_reserved_attribute() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"sortableAttributes": ["id"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.update_settings(json!({"sortableAttributes": ["id"]})).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a sort expression.",
@ -976,14 +934,10 @@ async fn sort_reserved_attribute() {
#[actix_rt::test]
async fn sort_unsortable_attribute() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"sortableAttributes": ["id"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let server = Server::new_shared();
let index = server.unique_index();
let (response, _code) = index.update_settings(json!({"sortableAttributes": ["id"]})).await;
index.wait_task(response.uid()).await.succeeded();
let expected_response = json!({
"message": "Attribute `title` is not sortable. Available sortable attributes are: `id`.",
@ -1006,14 +960,11 @@ async fn sort_unsortable_attribute() {
#[actix_rt::test]
async fn sort_invalid_syntax() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"sortableAttributes": ["id"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, _code) = index.update_settings(json!({"sortableAttributes": ["id"]})).await;
index.wait_task(response.uid()).await.succeeded();
let expected_response = json!({
"message": "Invalid syntax for the sort parameter: expected expression ending by `:asc` or `:desc`, found `title`.",
@ -1036,18 +987,15 @@ async fn sort_invalid_syntax() {
#[actix_rt::test]
async fn sort_unset_ranking_rule() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
index
let (response, _code) = index
.update_settings(
json!({"sortableAttributes": ["title"], "rankingRules": ["proximity", "exactness"]}),
)
.await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
index.wait_task(response.uid()).await.succeeded();
let expected_response = json!({
"message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.",
@ -1070,14 +1018,11 @@ async fn sort_unset_ranking_rule() {
#[actix_rt::test]
async fn search_on_unknown_field() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
index.wait_task(0).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let server = Server::new_shared();
let index = server.unique_index();
let (response, _code) =
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
index.wait_task(response.uid()).await.succeeded();
index
.search(
@ -1099,14 +1044,11 @@ async fn search_on_unknown_field() {
#[actix_rt::test]
async fn search_on_unknown_field_plus_joker() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
index.wait_task(0).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let server = Server::new_shared();
let index = server.unique_index();
let (response, _code) =
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
index.wait_task(response.uid()).await.succeeded();
index
.search(
@ -1145,11 +1087,10 @@ async fn search_on_unknown_field_plus_joker() {
#[actix_rt::test]
async fn distinct_at_search_time() {
let server = Server::new().await;
let index = server.index("tamo");
let server = Server::new_shared();
let index = server.unique_index();
let (task, _) = index.create(None).await;
let task = index.wait_task(task.uid()).await;
snapshot!(task, name: "task-succeed");
index.wait_task(task.uid()).await.succeeded();
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
@ -1210,8 +1151,8 @@ async fn distinct_at_search_time() {
async fn search_with_contains_without_enabling_the_feature() {
// Since a filter is deserialized as a json Value it will never fail to deserialize.
// Thus the error message is not generated by deserr but written by us.
let server = Server::new().await;
let index = server.index("doggo");
let server = Server::new_shared();
let index = server.unique_index();
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
// index does not exists error.
let (task, _code) = index.create(None).await;
@ -1222,7 +1163,7 @@ async fn search_with_contains_without_enabling_the_feature() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
@ -1235,7 +1176,7 @@ async fn search_with_contains_without_enabling_the_feature() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
"message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
@ -1251,7 +1192,7 @@ async fn search_with_contains_without_enabling_the_feature() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
@ -1263,7 +1204,7 @@ async fn search_with_contains_without_enabling_the_feature() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"

View File

@ -128,7 +128,7 @@ async fn simple_search() {
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
@ -137,7 +137,7 @@ async fn simple_search() {
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}),
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
@ -146,7 +146,7 @@ async fn simple_search() {
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}),
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
@ -161,7 +161,7 @@ async fn limit_offset() {
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
)
.await;
snapshot!(code, @"200 OK");
@ -174,7 +174,7 @@ async fn limit_offset() {
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
)
.await;
snapshot!(code, @"200 OK");
@ -188,8 +188,11 @@ async fn simple_search_hf() {
let server = Server::new().await;
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) =
index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
snapshot!(response["semanticHitCount"], @"0");
@ -197,7 +200,7 @@ async fn simple_search_hf() {
let (response, code) = index
.search_post(
// disable ranking score as the vectors between architectures are not equal
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}),
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
@ -206,7 +209,7 @@ async fn simple_search_hf() {
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}),
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
@ -215,7 +218,7 @@ async fn simple_search_hf() {
let (response, code) = index
.search_post(
json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
@ -224,7 +227,7 @@ async fn simple_search_hf() {
let (response, code) = index
.search_post(
json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
@ -237,7 +240,7 @@ async fn distribution_shift() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true});
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true});
let (response, code) = index.search_post(search.clone()).await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
@ -271,7 +274,7 @@ async fn highlighter() {
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.2},
"hybrid": {"embedder": "default", "semanticRatio": 0.2},
"retrieveVectors": true,
"attributesToHighlight": [
"desc",
@ -287,7 +290,7 @@ async fn highlighter() {
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.8},
"hybrid": {"embedder": "default", "semanticRatio": 0.8},
"retrieveVectors": true,
"showRankingScore": true,
"attributesToHighlight": [
@ -304,7 +307,7 @@ async fn highlighter() {
// no highlighting on full semantic
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 1.0},
"hybrid": {"embedder": "default", "semanticRatio": 1.0},
"retrieveVectors": true,
"showRankingScore": true,
"attributesToHighlight": [
@ -326,7 +329,7 @@ async fn invalid_semantic_ratio() {
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}),
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}),
)
.await;
snapshot!(code, @"400 Bad Request");
@ -341,7 +344,7 @@ async fn invalid_semantic_ratio() {
let (response, code) = index
.search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}),
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}),
)
.await;
snapshot!(code, @"400 Bad Request");
@ -357,7 +360,7 @@ async fn invalid_semantic_ratio() {
let (response, code) = index
.search_get(
&yaup::to_string(
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}),
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}),
)
.unwrap(),
)
@ -375,7 +378,7 @@ async fn invalid_semantic_ratio() {
let (response, code) = index
.search_get(
&yaup::to_string(
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}),
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}),
)
.unwrap(),
)
@ -398,7 +401,7 @@ async fn single_document() {
let (response, code) = index
.search_post(
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
)
.await;
@ -414,7 +417,7 @@ async fn query_combination() {
// search without query and vector, but with hybrid => still placeholder
let (response, code) = index
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.await;
snapshot!(code, @"200 OK");
@ -423,7 +426,7 @@ async fn query_combination() {
// same with a different semantic ratio
let (response, code) = index
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
.await;
snapshot!(code, @"200 OK");
@ -432,7 +435,7 @@ async fn query_combination() {
// wrong vector dimensions
let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.await;
snapshot!(code, @"400 Bad Request");
@ -447,7 +450,7 @@ async fn query_combination() {
// full vector
let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.await;
snapshot!(code, @"200 OK");
@ -456,7 +459,7 @@ async fn query_combination() {
// full keyword, without a query
let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
.await;
snapshot!(code, @"200 OK");
@ -465,7 +468,7 @@ async fn query_combination() {
// query + vector, full keyword => keyword
let (response, code) = index
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
.await;
snapshot!(code, @"200 OK");
@ -480,7 +483,7 @@ async fn query_combination() {
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.",
"message": "Invalid request: missing `hybrid` parameter when `vector` is present.",
"code": "missing_search_hybrid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_search_hybrid"
@ -490,7 +493,7 @@ async fn query_combination() {
// full vector, without a vector => error
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
)
.await;
@ -507,7 +510,7 @@ async fn query_combination() {
// hybrid without a vector => full keyword
let (response, code) = index
.search_post(
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}),
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
)
.await;
@ -523,7 +526,7 @@ async fn retrieve_vectors() {
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
@ -573,7 +576,7 @@ async fn retrieve_vectors() {
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");

File diff suppressed because it is too large Load Diff

View File

@ -16,208 +16,17 @@ mod restrict_searchable;
mod search_queue;
use meilisearch::Opt;
use once_cell::sync::Lazy;
use tempfile::TempDir;
use crate::common::{default_settings, Server, Value};
use crate::common::{
default_settings, shared_index_with_documents, shared_index_with_nested_documents, Server,
DOCUMENTS, FRUITS_DOCUMENTS, NESTED_DOCUMENTS, SCORE_DOCUMENTS, VECTOR_DOCUMENTS,
};
use crate::json;
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"id": "287947",
"_vectors": { "manual": [1, 2, 3]},
},
{
"title": "Captain Marvel",
"id": "299537",
"_vectors": { "manual": [1, 2, 54] },
},
{
"title": "Escape Room",
"id": "522681",
"_vectors": { "manual": [10, -23, 32] },
},
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"_vectors": { "manual": [-100, 231, 32] },
},
{
"title": "Gläss",
"id": "450465",
"_vectors": { "manual": [-100, 340, 90] },
}
])
});
static SCORE_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Batman the dark knight returns: Part 1",
"id": "A",
},
{
"title": "Batman the dark knight returns: Part 2",
"id": "B",
},
{
"title": "Batman Returns",
"id": "C",
},
{
"title": "Batman",
"id": "D",
},
{
"title": "Badman",
"id": "E",
}
])
});
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2,
},
{
"name": "buddy",
"age": 4,
},
],
"cattos": "pésti",
"_vectors": { "manual": [1, 2, 3]},
},
{
"id": 654,
"father": "pierre",
"mother": "sabine",
"doggos": [
{
"name": "gros bill",
"age": 8,
},
],
"cattos": ["simba", "pestiféré"],
"_vectors": { "manual": [1, 2, 54] },
},
{
"id": 750,
"father": "romain",
"mother": "michelle",
"cattos": ["enigma"],
"_vectors": { "manual": [10, 23, 32] },
},
{
"id": 951,
"father": "jean-baptiste",
"mother": "sophie",
"doggos": [
{
"name": "turbo",
"age": 5,
},
{
"name": "fast",
"age": 6,
},
],
"cattos": ["moumoute", "gomez"],
"_vectors": { "manual": [10, 23, 32] },
},
])
});
static FRUITS_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"name": "Exclusive sale: green apple",
"id": "green-apple-boosted",
"BOOST": true
},
{
"name": "Pear",
"id": "pear",
},
{
"name": "Red apple gala",
"id": "red-apple-gala",
},
{
"name": "Exclusive sale: Red Tomato",
"id": "red-tomatoes-boosted",
"BOOST": true
},
{
"name": "Exclusive sale: Red delicious apple",
"id": "red-delicious-boosted",
"BOOST": true,
}
])
});
static VECTOR_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": "A",
"description": "the dog barks at the cat",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.9, 0.8, 0.05],
// dimensions [negative/positive, energy]
"sentiment": [-0.1, 0.55]
}
},
{
"id": "B",
"description": "the kitten scratched the beagle",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.8, 0.9, 0.5],
// dimensions [negative/positive, energy]
"sentiment": [-0.2, 0.65]
}
},
{
"id": "C",
"description": "the dog had to stay alone today",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.85, 0.02, 0.1],
// dimensions [negative/positive, energy]
"sentiment": [-1.0, 0.1]
}
},
{
"id": "D",
"description": "the little boy pets the puppy",
"_vectors": {
// dimensions [canine, feline, young]
"animal": [0.8, 0.09, 0.8],
// dimensions [negative/positive, energy]
"sentiment": [0.8, 0.3]
}
},
])
});
#[actix_rt::test]
async fn simple_placeholder_search() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(json!({}), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -225,11 +34,7 @@ async fn simple_placeholder_search() {
})
.await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let index = shared_index_with_nested_documents().await;
index
.search(json!({}), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -240,13 +45,7 @@ async fn simple_placeholder_search() {
#[actix_rt::test]
async fn simple_search() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(json!({"q": "glass"}), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -254,11 +53,7 @@ async fn simple_search() {
})
.await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let index = shared_index_with_nested_documents().await;
index
.search(json!({"q": "pésti"}), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -290,13 +85,7 @@ async fn phrase_search_with_stop_word() {
#[actix_rt::test]
async fn negative_phrase_search() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(json!({"q": "-\"train your dragon\"" }), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -312,13 +101,7 @@ async fn negative_phrase_search() {
#[actix_rt::test]
async fn negative_word_search() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(json!({"q": "-escape" }), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -343,13 +126,7 @@ async fn negative_word_search() {
#[actix_rt::test]
async fn non_negative_search() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(json!({"q": "- escape" }), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -442,13 +219,7 @@ async fn test_thai_language() {
#[actix_rt::test]
async fn search_multiple_params() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(
json!({
@ -465,11 +236,7 @@ async fn search_multiple_params() {
)
.await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let index = shared_index_with_nested_documents().await;
index
.search(
json!({
@ -555,15 +322,7 @@ async fn search_with_filter_string_notation() {
#[actix_rt::test]
async fn search_with_filter_array_notation() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let index = shared_index_with_documents().await;
let (response, code) = index
.search_post(json!({
"filter": ["title = Gläss"]
@ -609,15 +368,7 @@ async fn search_with_contains_filter() {
#[actix_rt::test]
async fn search_with_sort_on_numbers() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"sortableAttributes": ["id"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let index = shared_index_with_documents().await;
index
.search(
json!({
@ -630,14 +381,7 @@ async fn search_with_sort_on_numbers() {
)
.await;
let index = server.index("nested");
index.update_settings(json!({"sortableAttributes": ["doggos.age"]})).await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
let index = shared_index_with_nested_documents().await;
index
.search(
json!({
@ -653,15 +397,7 @@ async fn search_with_sort_on_numbers() {
#[actix_rt::test]
async fn search_with_sort_on_strings() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"sortableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let index = shared_index_with_documents().await;
index
.search(
json!({
@ -674,14 +410,7 @@ async fn search_with_sort_on_strings() {
)
.await;
let index = server.index("nested");
index.update_settings(json!({"sortableAttributes": ["doggos.name"]})).await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
let index = shared_index_with_nested_documents().await;
index
.search(
json!({
@ -697,15 +426,7 @@ async fn search_with_sort_on_strings() {
#[actix_rt::test]
async fn search_with_multiple_sort() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"sortableAttributes": ["id", "title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let index = shared_index_with_documents().await;
let (response, code) = index
.search_post(json!({
"sort": ["id:asc", "title:desc"]
@ -717,15 +438,7 @@ async fn search_with_multiple_sort() {
#[actix_rt::test]
async fn search_facet_distribution() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let index = shared_index_with_documents().await;
index
.search(
json!({
@ -740,13 +453,7 @@ async fn search_facet_distribution() {
)
.await;
let index = server.index("nested");
index.update_settings(json!({"filterableAttributes": ["father", "doggos.name"]})).await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
let index = shared_index_with_nested_documents().await;
// TODO: TAMO: fix the test
index
@ -773,9 +480,6 @@ async fn search_facet_distribution() {
)
.await;
index.update_settings(json!({"filterableAttributes": ["doggos"]})).await;
index.wait_task(4).await;
index
.search(
json!({
@ -811,9 +515,6 @@ async fn search_facet_distribution() {
)
.await;
index.update_settings(json!({"filterableAttributes": ["doggos.name"]})).await;
index.wait_task(5).await;
index
.search(
json!({
@ -1043,6 +744,10 @@ async fn test_score_details() {
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": [
"green",
"red"
],
"_vectors": {
"manual": [
-100.0,
@ -1394,22 +1099,28 @@ async fn experimental_feature_vector_store() {
index.add_documents(json!(documents), None).await;
index.wait_task(0).await;
index
.search(json!({
let (response, code) = index
.search_post(json!({
"vector": [1.0, 2.0, 3.0],
"hybrid": {
"embedder": "manual",
},
"showRankingScore": true
}), |response, code|{
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
}))
.await;
{
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}
index
.search(json!({
"retrieveVectors": true,
@ -1457,6 +1168,9 @@ async fn experimental_feature_vector_store() {
let (response, code) = index
.search_post(json!({
"vector": [1.0, 2.0, 3.0],
"hybrid": {
"embedder": "manual",
},
"showRankingScore": true,
"retrieveVectors": true,
}))
@ -1469,6 +1183,10 @@ async fn experimental_feature_vector_store() {
{
"title": "Shazam!",
"id": "287947",
"color": [
"green",
"blue"
],
"_vectors": {
"manual": {
"embeddings": [
@ -1486,6 +1204,10 @@ async fn experimental_feature_vector_store() {
{
"title": "Captain Marvel",
"id": "299537",
"color": [
"yellow",
"blue"
],
"_vectors": {
"manual": {
"embeddings": [
@ -1503,6 +1225,10 @@ async fn experimental_feature_vector_store() {
{
"title": "Gläss",
"id": "450465",
"color": [
"blue",
"red"
],
"_vectors": {
"manual": {
"embeddings": [
@ -1520,6 +1246,10 @@ async fn experimental_feature_vector_store() {
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": [
"green",
"red"
],
"_vectors": {
"manual": {
"embeddings": [
@ -1537,6 +1267,10 @@ async fn experimental_feature_vector_store() {
{
"title": "Escape Room",
"id": "522681",
"color": [
"yellow",
"red"
],
"_vectors": {
"manual": {
"embeddings": [
@ -1803,6 +1537,10 @@ async fn simple_search_with_strange_synonyms() {
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": [
"green",
"red"
],
"_vectors": {
"manual": [
-100.0,
@ -1824,6 +1562,10 @@ async fn simple_search_with_strange_synonyms() {
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": [
"green",
"red"
],
"_vectors": {
"manual": [
-100.0,
@ -1845,6 +1587,10 @@ async fn simple_search_with_strange_synonyms() {
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": [
"green",
"red"
],
"_vectors": {
"manual": [
-100.0,

File diff suppressed because it is too large Load Diff

View File

@ -1,16 +1,10 @@
use super::shared_index_with_documents;
use crate::common::Server;
use crate::json;
use crate::search::DOCUMENTS;
#[actix_rt::test]
async fn default_search_should_return_estimated_total_hit() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(json!({}), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -28,13 +22,7 @@ async fn default_search_should_return_estimated_total_hit() {
#[actix_rt::test]
async fn simple_search() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(json!({"page": 1}), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -53,13 +41,7 @@ async fn simple_search() {
#[actix_rt::test]
async fn page_zero_should_not_return_any_result() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(json!({"page": 0}), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -73,13 +55,7 @@ async fn page_zero_should_not_return_any_result() {
#[actix_rt::test]
async fn hits_per_page_1() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(json!({"hitsPerPage": 1}), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -93,13 +69,7 @@ async fn hits_per_page_1() {
#[actix_rt::test]
async fn hits_per_page_0_should_not_return_any_result() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = shared_index_with_documents().await;
index
.search(json!({"hitsPerPage": 0}), |response, code| {
assert_eq!(code, 200, "{}", response);
@ -113,8 +83,8 @@ async fn hits_per_page_0_should_not_return_any_result() {
#[actix_rt::test]
async fn ensure_placeholder_search_hit_count_valid() {
let server = Server::new().await;
let index = server.index("basic");
let server = Server::new_shared();
let index = server.unique_index();
let documents = json!([
{
@ -143,15 +113,15 @@ async fn ensure_placeholder_search_hit_count_valid() {
"distinct": 3,
}
]);
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (task, _code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await.succeeded();
let (_response, _code) = index
let (response, _code) = index
.update_settings(
json!({ "rankingRules": ["distinct:asc"], "distinctAttribute": "distinct"}),
)
.await;
index.wait_task(1).await;
index.wait_task(response.uid()).await.succeeded();
for page in 0..=4 {
index

View File

@ -8,8 +8,8 @@ use crate::json;
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");
index.add_documents(documents.clone(), None).await;
index.wait_task(0).await;
let (task, _code) = index.add_documents(documents.clone(), None).await;
index.wait_task(task.uid()).await.succeeded();
index
}

View File

@ -37,6 +37,43 @@ async fn search_queue_register() {
.unwrap();
}
#[actix_rt::test]
async fn search_queue_register_with_explicit_drop() {
let queue = SearchQueue::new(4, NonZeroUsize::new(2).unwrap());
// First, use all the cores
let permit1 = queue.try_get_search_permit().await.unwrap();
let _permit2 = queue.try_get_search_permit().await.unwrap();
// If we free one spot we should be able to register one new search
permit1.drop().await;
let permit3 = queue.try_get_search_permit().await.unwrap();
// And again
permit3.drop().await;
let _permit4 = queue.try_get_search_permit().await.unwrap();
}
#[actix_rt::test]
async fn search_queue_register_with_time_to_abort() {
let queue = Arc::new(
SearchQueue::new(1, NonZeroUsize::new(1).unwrap())
.with_time_to_abort(Duration::from_secs(1)),
);
// First, use all the cores
let permit1 = queue.try_get_search_permit().await.unwrap();
let q = queue.clone();
let permit2 = tokio::task::spawn(async move { q.try_get_search_permit().await });
tokio::time::sleep(Duration::from_secs(1)).await;
permit1.drop().await;
let ret = permit2.await.unwrap();
snapshot!(ret.unwrap_err(), @"Too many search requests running at the same time: 1. Retry after 10s.");
}
#[actix_rt::test]
async fn wait_till_cores_are_available() {
let queue = Arc::new(SearchQueue::new(4, NonZeroUsize::new(1).unwrap()));

View File

@ -5,8 +5,8 @@ use crate::json;
#[actix_rt::test]
async fn settings_bad_displayed_attributes() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "displayedAttributes": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
@ -33,8 +33,8 @@ async fn settings_bad_displayed_attributes() {
#[actix_rt::test]
async fn settings_bad_searchable_attributes() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "searchableAttributes": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
@ -61,8 +61,8 @@ async fn settings_bad_searchable_attributes() {
#[actix_rt::test]
async fn settings_bad_filterable_attributes() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "filterableAttributes": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
@ -89,8 +89,8 @@ async fn settings_bad_filterable_attributes() {
#[actix_rt::test]
async fn settings_bad_sortable_attributes() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "sortableAttributes": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
@ -117,8 +117,8 @@ async fn settings_bad_sortable_attributes() {
#[actix_rt::test]
async fn settings_bad_ranking_rules() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "rankingRules": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
@ -145,8 +145,8 @@ async fn settings_bad_ranking_rules() {
#[actix_rt::test]
async fn settings_bad_stop_words() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "stopWords": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
@ -173,8 +173,8 @@ async fn settings_bad_stop_words() {
#[actix_rt::test]
async fn settings_bad_synonyms() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "synonyms": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
@ -201,8 +201,8 @@ async fn settings_bad_synonyms() {
#[actix_rt::test]
async fn settings_bad_distinct_attribute() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "distinctAttribute": ["doggo"] })).await;
snapshot!(code, @"400 Bad Request");
@ -229,8 +229,8 @@ async fn settings_bad_distinct_attribute() {
#[actix_rt::test]
async fn settings_bad_typo_tolerance() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "typoTolerance": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
@ -284,8 +284,8 @@ async fn settings_bad_typo_tolerance() {
#[actix_rt::test]
async fn settings_bad_faceting() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "faceting": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
@ -312,8 +312,8 @@ async fn settings_bad_faceting() {
#[actix_rt::test]
async fn settings_bad_pagination() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "pagination": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
@ -340,8 +340,8 @@ async fn settings_bad_pagination() {
#[actix_rt::test]
async fn settings_bad_search_cutoff_ms() {
let server = Server::new().await;
let index = server.index("test");
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index.update_settings(json!({ "searchCutoffMs": "doggo" })).await;
snapshot!(code, @"400 Bad Request");

View File

@ -190,7 +190,8 @@ async fn secrets_are_hidden_in_settings() {
"source": "rest",
"apiKey": "My suXXXXXX...",
"dimensions": 4,
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
"documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
"documentTemplateMaxBytes": 400,
"url": "https://localhost:7777",
"request": "{{text}}",
"response": "{{embedding}}",

View File

@ -18,7 +18,7 @@ async fn similar_unexisting_index() {
});
index
.similar(json!({"id": 287947}), |response, code| {
.similar(json!({"id": 287947, "embedder": "manual"}), |response, code| {
assert_eq!(code, 404);
assert_eq!(response, expected_response);
})
@ -44,7 +44,7 @@ async fn similar_feature_not_enabled() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.similar_post(json!({"id": 287947})).await;
let (response, code) = index.similar_post(json!({"id": 287947, "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -199,7 +199,8 @@ async fn similar_not_found_id() {
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await;
let (response, code) =
index.similar_post(json!({"id": "definitely-doesnt-exist", "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -230,7 +231,8 @@ async fn similar_bad_offset() {
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await;
let (response, code) =
index.similar_post(json!({"id": 287947, "offset": "doggo", "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -241,7 +243,7 @@ async fn similar_bad_offset() {
}
"###);
let (response, code) = index.similar_get("?id=287947&offset=doggo").await;
let (response, code) = index.similar_get("?id=287947&offset=doggo&embedder=manual").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -272,7 +274,8 @@ async fn similar_bad_limit() {
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await;
let (response, code) =
index.similar_post(json!({"id": 287947, "limit": "doggo", "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -283,7 +286,7 @@ async fn similar_bad_limit() {
}
"###);
let (response, code) = index.similar_get("?id=287946&limit=doggo").await;
let (response, code) = index.similar_get("?id=287946&limit=doggo&embedder=manual").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -323,7 +326,8 @@ async fn similar_bad_filter() {
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await;
let (response, code) =
index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -361,10 +365,10 @@ async fn filter_invalid_syntax_object() {
index.wait_task(value.uid()).await;
index
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
.similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
@ -400,10 +404,10 @@ async fn filter_invalid_syntax_array() {
index.wait_task(value.uid()).await;
index
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
.similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
@ -446,7 +450,7 @@ async fn filter_invalid_syntax_string() {
});
index
.similar(
json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}),
json!({"id": 287947, "filter": "title = Glass XOR title = Glass", "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
@ -486,10 +490,13 @@ async fn filter_invalid_attribute_array() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.similar(
json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
}
@ -524,10 +531,13 @@ async fn filter_invalid_attribute_string() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.similar(
json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
}
@ -562,10 +572,13 @@ async fn filter_reserved_geo_attribute_array() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.similar(
json!({"id": 287947, "filter": ["_geo = Glass"], "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
}
@ -600,10 +613,13 @@ async fn filter_reserved_geo_attribute_string() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.similar(
json!({"id": 287947, "filter": "_geo = Glass", "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
}
@ -638,10 +654,13 @@ async fn filter_reserved_attribute_array() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.similar(
json!({"id": 287947, "filter": ["_geoDistance = Glass"], "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
}
@ -676,10 +695,13 @@ async fn filter_reserved_attribute_string() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.similar(
json!({"id": 287947, "filter": "_geoDistance = Glass", "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
}
@ -714,10 +736,13 @@ async fn filter_reserved_geo_point_array() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.similar(
json!({"id": 287947, "filter": ["_geoPoint = Glass"], "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
}
@ -752,10 +777,13 @@ async fn filter_reserved_geo_point_string() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.similar(
json!({"id": 287947, "filter": "_geoPoint = Glass", "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
}
@ -765,7 +793,8 @@ async fn similar_bad_retrieve_vectors() {
server.set_features(json!({"vectorStore": true})).await;
let index = server.index("test");
let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await;
let (response, code) =
index.similar_post(json!({"retrieveVectors": "doggo", "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -776,7 +805,8 @@ async fn similar_bad_retrieve_vectors() {
}
"###);
let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await;
let (response, code) =
index.similar_post(json!({"retrieveVectors": [true], "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{

View File

@ -80,9 +80,11 @@ async fn basic() {
index.wait_task(value.uid()).await;
index
.similar(json!({"id": 143, "retrieveVectors": true}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
.similar(
json!({"id": 143, "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
@ -154,13 +156,16 @@ async fn basic() {
}
]
"###);
})
},
)
.await;
index
.similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
.similar(
json!({"id": "299537", "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "How to Train Your Dragon: The Hidden World",
@ -232,7 +237,8 @@ async fn basic() {
}
]
"###);
})
},
)
.await;
}
@ -272,7 +278,7 @@ async fn ranking_score_threshold() {
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}),
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
@ -358,7 +364,7 @@ async fn ranking_score_threshold() {
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}),
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
@ -426,7 +432,7 @@ async fn ranking_score_threshold() {
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}),
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
@ -476,7 +482,7 @@ async fn ranking_score_threshold() {
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}),
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
@ -508,7 +514,7 @@ async fn ranking_score_threshold() {
index
.similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}),
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @"[]");
@ -553,7 +559,7 @@ async fn filter() {
index
.similar(
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}),
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
@ -617,7 +623,7 @@ async fn filter() {
index
.similar(
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}),
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
@ -681,9 +687,11 @@ async fn limit_and_offset() {
index.wait_task(value.uid()).await;
index
.similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
.similar(
json!({"id": 143, "limit": 1, "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"title": "Escape Room",
@ -704,12 +712,13 @@ async fn limit_and_offset() {
}
]
"###);
})
},
)
.await;
index
.similar(
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}),
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"

View File

@ -5,7 +5,7 @@ use crate::json;
#[actix_rt::test]
async fn swap_indexes_bad_format() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.index_swap(json!("doggo")).await;
snapshot!(code, @"400 Bad Request");
@ -32,7 +32,7 @@ async fn swap_indexes_bad_format() {
#[actix_rt::test]
async fn swap_indexes_bad_indexes() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.index_swap(json!([{ "indexes": "doggo"}])).await;
snapshot!(code, @"400 Bad Request");

View File

@ -4,7 +4,7 @@ use crate::common::Server;
#[actix_rt::test]
async fn task_bad_uids() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("uids=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -53,7 +53,7 @@ async fn task_bad_uids() {
#[actix_rt::test]
async fn task_bad_canceled_by() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("canceledBy=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -91,7 +91,7 @@ async fn task_bad_canceled_by() {
#[actix_rt::test]
async fn task_bad_types() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("types=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -129,7 +129,7 @@ async fn task_bad_types() {
#[actix_rt::test]
async fn task_bad_statuses() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("statuses=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -167,7 +167,7 @@ async fn task_bad_statuses() {
#[actix_rt::test]
async fn task_bad_index_uids() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("indexUids=the%20good%20doggo").await;
snapshot!(code, @"400 Bad Request");
@ -205,7 +205,7 @@ async fn task_bad_index_uids() {
#[actix_rt::test]
async fn task_bad_limit() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("limit=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -243,7 +243,7 @@ async fn task_bad_limit() {
#[actix_rt::test]
async fn task_bad_from() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("from=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -281,7 +281,7 @@ async fn task_bad_from() {
#[actix_rt::test]
async fn task_bad_after_enqueued_at() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("afterEnqueuedAt=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -319,7 +319,7 @@ async fn task_bad_after_enqueued_at() {
#[actix_rt::test]
async fn task_bad_before_enqueued_at() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("beforeEnqueuedAt=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -357,7 +357,7 @@ async fn task_bad_before_enqueued_at() {
#[actix_rt::test]
async fn task_bad_after_started_at() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("afterStartedAt=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -395,7 +395,7 @@ async fn task_bad_after_started_at() {
#[actix_rt::test]
async fn task_bad_before_started_at() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("beforeStartedAt=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -433,7 +433,7 @@ async fn task_bad_before_started_at() {
#[actix_rt::test]
async fn task_bad_after_finished_at() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("afterFinishedAt=doggo").await;
snapshot!(code, @"400 Bad Request");
@ -471,7 +471,7 @@ async fn task_bad_after_finished_at() {
#[actix_rt::test]
async fn task_bad_before_finished_at() {
let server = Server::new().await;
let server = Server::new_shared();
let (response, code) = server.tasks_filter("beforeFinishedAt=doggo").await;
snapshot!(code, @"400 Bad Request");

View File

@ -66,8 +66,8 @@ async fn list_tasks() {
async fn list_tasks_with_star_filters() {
let server = Server::new().await;
let index = server.index("test");
index.create(None).await;
index.wait_task(0).await;
let (task, _code) = index.create(None).await;
index.wait_task(task.uid()).await;
index
.add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None)
.await;

View File

@ -0,0 +1,380 @@
use meili_snap::{json_string, snapshot};
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
use crate::vector::generate_default_user_provided_documents;
#[actix_rt::test]
async fn retrieve_binary_quantize_status_in_the_settings() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (settings, code) = index.settings().await;
snapshot!(code, @"200 OK");
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": false,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (settings, code) = index.settings().await;
snapshot!(code, @"200 OK");
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": true,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (settings, code) = index.settings().await;
snapshot!(code, @"200 OK");
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###);
}
#[actix_rt::test]
async fn binary_quantize_before_sending_documents() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": true,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
// Make sure the documents are binary quantized
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
-1.0,
-1.0,
1.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [
[
1.0,
1.0,
-1.0
]
],
"regenerate": false
}
}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
#[actix_rt::test]
async fn binary_quantize_after_sending_documents() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": true,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
// Make sure the documents are binary quantized
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
-1.0,
-1.0,
1.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [
[
1.0,
1.0,
-1.0
]
],
"regenerate": false
}
}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
#[actix_rt::test]
async fn try_to_disable_binary_quantization() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": true,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": false,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
let ret = server.wait_task(response.uid()).await;
snapshot!(ret, @r###"
{
"uid": "[uid]",
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": false
}
}
},
"error": {
"message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn binary_quantize_clear_documents() {
let server = Server::new().await;
let index = generate_default_user_provided_documents(&server).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"binaryQuantized": true,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (value, _code) = index.clear_all_documents().await;
index.wait_task(value.uid()).await.succeeded();
// Make sure the documents DB has been cleared
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
// Make sure the arroy DB has been cleared
let (documents, _code) =
index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await;
snapshot!(documents, @r###"
{
"hits": [],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 0,
"semanticHitCount": 0
}
"###);
}

View File

@ -1,11 +1,15 @@
mod binary_quantized;
mod openai;
mod rest;
mod settings;
use std::str::FromStr;
use meili_snap::{json_string, snapshot};
use meilisearch::option::MaxThreads;
use crate::common::index::Index;
use crate::common::{GetAllDocumentsOptions, Server};
use crate::common::{default_settings, GetAllDocumentsOptions, Server};
use crate::json;
async fn get_server_vector() -> Server {
@ -503,7 +507,11 @@ async fn user_provided_embeddings_error() {
#[actix_rt::test]
async fn user_provided_vectors_error() {
let server = Server::new().await;
let temp = tempfile::tempdir().unwrap();
let mut options = default_settings(temp.path());
// If we have more than one indexing thread the error messages below may become inconsistent
options.indexer_options.max_indexing_threads = MaxThreads::from_str("1").unwrap();
let server = Server::new_with_options(options).await.unwrap();
let index = generate_default_user_provided_documents(&server).await;
@ -617,7 +625,8 @@ async fn clear_documents() {
"###);
// Make sure the arroy DB has been cleared
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
let (documents, _code) =
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await;
snapshot!(documents, @r###"
{
"hits": [],
@ -678,7 +687,11 @@ async fn add_remove_one_vector_4588() {
let task = index.wait_task(value.uid()).await;
snapshot!(task, name: "document-deleted");
let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
let (documents, _code) = index
.search_post(
json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }),
)
.await;
snapshot!(documents, @r###"
{
"hits": [

View File

@ -302,7 +302,8 @@ async fn create_mock_with_template(
"source": "openAi",
"url": url,
"apiKey": API_KEY,
"documentTemplate": document_template
"documentTemplate": document_template,
"documentTemplateMaxBytes": 8000000,
});
model_dimensions.add_to_settings(&mut embedder_settings);
@ -448,7 +449,7 @@ async fn it_works() {
let (response, code) = index
.search_post(json!({
"q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"},
}))
.await;
snapshot!(code, @"200 OK");
@ -488,7 +489,7 @@ async fn it_works() {
let (response, code) = index
.search_post(json!({
"q": "petit chien",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -528,7 +529,7 @@ async fn it_works() {
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -615,7 +616,7 @@ async fn tokenize_long_text() {
"q": "grand chien de berger des montagnes",
"showRankingScore": true,
"attributesToRetrieve": ["id"],
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -693,6 +694,7 @@ async fn bad_api_key() {
"model": "text-embedding-3-large",
"apiKey": "XXX...",
"documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
"documentTemplateMaxBytes": 8000000,
"url": "[url]"
}
}
@ -735,6 +737,7 @@ async fn bad_api_key() {
"source": "openAi",
"model": "text-embedding-3-large",
"documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
"documentTemplateMaxBytes": 8000000,
"url": "[url]"
}
}
@ -1061,7 +1064,7 @@ async fn smaller_dimensions() {
let (response, code) = index
.search_post(json!({
"q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1101,7 +1104,7 @@ async fn smaller_dimensions() {
let (response, code) = index
.search_post(json!({
"q": "petit chien",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1141,7 +1144,7 @@ async fn smaller_dimensions() {
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1292,7 +1295,7 @@ async fn small_embedding_model() {
let (response, code) = index
.search_post(json!({
"q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1332,7 +1335,7 @@ async fn small_embedding_model() {
let (response, code) = index
.search_post(json!({
"q": "petit chien",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1372,7 +1375,7 @@ async fn small_embedding_model() {
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1522,7 +1525,7 @@ async fn legacy_embedding_model() {
let (response, code) = index
.search_post(json!({
"q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1562,7 +1565,7 @@ async fn legacy_embedding_model() {
let (response, code) = index
.search_post(json!({
"q": "petit chien",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1602,7 +1605,7 @@ async fn legacy_embedding_model() {
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1753,7 +1756,7 @@ async fn it_still_works() {
let (response, code) = index
.search_post(json!({
"q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1793,7 +1796,7 @@ async fn it_still_works() {
let (response, code) = index
.search_post(json!({
"q": "petit chien",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
@ -1833,7 +1836,7 @@ async fn it_still_works() {
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0}
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");

View File

@ -1,3 +1,4 @@
use std::collections::BTreeMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use meili_snap::{json_string, snapshot};
@ -37,6 +38,46 @@ async fn create_mock() -> (MockServer, Value) {
(mock_server, embedder_settings)
}
async fn create_mock_map() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("name: kefir\n", [0.0, 0.1, 0.2]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST"))
.and(path("/"))
.respond_with(move |req: &Request| {
let text: String = req.body_json().unwrap();
match text_to_embedding.get(text.as_str()) {
Some(embedding) => {
ResponseTemplate::new(200).set_body_json(json!({ "data": embedding }))
}
None => ResponseTemplate::new(404)
.set_body_json(json!({"error": "text not found", "text": text})),
}
})
.mount(&mock_server)
.await;
let url = mock_server.uri();
let embedder_settings = json!({
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{text}}",
"response": {
"data": "{{embedding}}"
}
});
(mock_server, embedder_settings)
}
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
struct MultipleRequest {
input: Vec<String>,
@ -1100,6 +1141,7 @@ async fn server_returns_bad_request() {
let (response, code) = index
.update_settings(json!({
"searchableAttributes": ["name", "missing_field"],
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}", "dimensions": 3 }),
},
@ -1115,6 +1157,10 @@ async fn server_returns_bad_request() {
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"searchableAttributes": [
"name",
"missing_field"
],
"embedders": {
"rest": {
"source": "rest",
@ -1148,7 +1194,7 @@ async fn server_returns_bad_request() {
"indexedDocuments": 0
},
"error": {
"message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\" id: 1\\\\n name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 24\"}`",
"message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -1891,3 +1937,109 @@ async fn server_custom_header() {
}
"###);
}
#[actix_rt::test]
async fn searchable_reindex() {
let (_mock, setting) = create_mock_map().await;
let server = get_server_vector().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"searchableAttributes": ["name", "missing_field"],
"embedders": {
"rest": setting,
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
"indexUid": "doggo",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"searchableAttributes": [
"name",
"missing_field"
],
"embedders": {
"rest": {
"source": "rest",
"dimensions": 3,
"url": "[url]",
"request": "{{text}}",
"response": {
"data": "{{embedding}}"
}
}
}
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) =
index.add_documents(json!( { "id": 1, "name": "kefir", "breed": "patou" }), None).await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// triggers reindexing with the new searchable attribute.
// as the mock intentionally doesn't know of this text, the task will fail, outputting the putative rendered text.
let (response, code) = index
.update_settings(json!({
"searchableAttributes": ["breed"],
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"searchableAttributes": [
"breed"
]
},
"error": {
"message": "While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}

View File

@ -218,7 +218,8 @@ async fn reset_embedder_documents() {
"###);
// Make sure the arroy DB has been cleared
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
let (documents, _code) =
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await;
snapshot!(json_string!(documents), @r###"
{
"message": "Cannot find embedder with name `default`.",

View File

@ -15,5 +15,6 @@ dump = { path = "../dump" }
file-store = { path = "../file-store" }
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
serde = { version = "1.0.209", features = ["derive"] }
time = { version = "0.3.36", features = ["formatting"] }
uuid = { version = "1.10.0", features = ["v4"], default-features = false }

View File

@ -2,7 +2,7 @@ use std::fs::{read_dir, read_to_string, remove_file, File};
use std::io::BufWriter;
use std::path::PathBuf;
use anyhow::Context;
use anyhow::{bail, Context};
use clap::{Parser, Subcommand};
use dump::{DumpWriter, IndexMetadata};
use file_store::FileStore;
@ -10,9 +10,10 @@ use meilisearch_auth::AuthController;
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::index::{db_name, main_key};
use meilisearch_types::milli::{obkv_to_json, BEU32};
use meilisearch_types::tasks::{Status, Task};
use meilisearch_types::versioning::check_version_file;
use meilisearch_types::versioning::{create_version_file, get_version, parse_version};
use meilisearch_types::Index;
use time::macros::format_description;
use time::OffsetDateTime;
@ -62,21 +63,458 @@ enum Command {
#[arg(long)]
skip_enqueued_tasks: bool,
},
/// Attempts to upgrade from one major version to the next without a dump.
///
/// Make sure to run this commmand when Meilisearch is not running!
/// If Meilisearch is running while executing this command, the database could be corrupted
/// (contain data from both the old and the new versions)
///
/// Supported upgrade paths:
///
/// - v1.9.0 -> v1.10.0
OfflineUpgrade {
#[arg(long)]
target_version: String,
},
}
fn main() -> anyhow::Result<()> {
let Cli { db_path, command } = Cli::parse();
check_version_file(&db_path).context("While checking the version file")?;
let detected_version = get_version(&db_path).context("While checking the version file")?;
match command {
Command::ClearTaskQueue => clear_task_queue(db_path),
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
}
Command::OfflineUpgrade { target_version } => {
let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade()
}
}
}
struct OfflineUpgrade {
db_path: PathBuf,
current_version: (String, String, String),
target_version: (String, String, String),
}
impl OfflineUpgrade {
fn upgrade(self) -> anyhow::Result<()> {
// TODO: if we make this process support more versions, introduce a more flexible way of checking for the version
// currently only supports v1.9 to v1.10
let (current_major, current_minor, current_patch) = &self.current_version;
match (current_major.as_str(), current_minor.as_str(), current_patch.as_str()) {
("1", "9", _) => {}
_ => {
bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9")
}
}
let (target_major, target_minor, target_patch) = &self.target_version;
match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
("1", "10", _) => {}
_ => {
bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10")
}
}
println!("Upgrading from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
self.v1_9_to_v1_10()?;
println!("Writing VERSION file");
create_version_file(&self.db_path, target_major, target_minor, target_patch)
.context("while writing VERSION file after the upgrade")?;
println!("Success");
Ok(())
}
fn v1_9_to_v1_10(&self) -> anyhow::Result<()> {
// 2 changes here
// 1. date format. needs to be done before opening the Index
// 2. REST embedders. We don't support this case right now, so bail
let index_scheduler_path = self.db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
.with_context(|| {
format!("While trying to open {:?}", index_scheduler_path.display())
})?;
let mut sched_wtxn = env.write_txn()?;
let index_mapping: Database<Str, UuidCodec> =
try_opening_database(&env, &sched_wtxn, "index-mapping")?;
let index_stats: Database<UuidCodec, Unspecified> =
try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| {
format!("While trying to open {:?}", index_scheduler_path.display())
})?;
let index_count =
index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?;
// FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn
// 1. immutably for the iteration
// 2. mutably for updating index stats
let indexes: Vec<_> = index_mapping
.iter(&sched_wtxn)?
.map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
.collect();
let mut rest_embedders = Vec::new();
let mut unwrapped_indexes = Vec::new();
// check that update can take place
for (index_index, result) in indexes.into_iter().enumerate() {
let (uid, uuid) = result?;
let index_path = self.db_path.join("indexes").join(uuid.to_string());
println!(
"[{}/{index_count}]Checking that update can take place for `{uid}` at `{}`",
index_index + 1,
index_path.display()
);
let index_env = unsafe {
// FIXME: fetch the 25 magic number from the index file
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display())
})?
};
let index_txn = index_env.read_txn().with_context(|| {
format!(
"while obtaining a write transaction for index {uid} at {}",
index_path.display()
)
})?;
println!("\t- Checking for incompatible embedders (REST embedders)");
let rest_embedders_for_index = find_rest_embedders(&uid, &index_env, &index_txn)?;
if rest_embedders_for_index.is_empty() {
unwrapped_indexes.push((uid, uuid));
} else {
// no need to add to unwrapped indexes because we'll exit early
rest_embedders.push((uid, rest_embedders_for_index));
}
}
if !rest_embedders.is_empty() {
let rest_embedders = rest_embedders
.into_iter()
.flat_map(|(index, embedders)| std::iter::repeat(index.clone()).zip(embedders))
.map(|(index, embedder)| format!("\t- embedder `{embedder}` in index `{index}`"))
.collect::<Vec<_>>()
.join("\n");
bail!("The update cannot take place because there are REST embedder(s). Remove them before proceeding with the update:\n{rest_embedders}\n\n\
The database has not been modified and is still a valid v1.9 database.");
}
println!("Update can take place, updating");
for (index_index, (uid, uuid)) in unwrapped_indexes.into_iter().enumerate() {
let index_path = self.db_path.join("indexes").join(uuid.to_string());
println!(
"[{}/{index_count}]Updating index `{uid}` at `{}`",
index_index + 1,
index_path.display()
);
let index_env = unsafe {
// FIXME: fetch the 25 magic number from the index file
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display())
})?
};
let mut index_wtxn = index_env.write_txn().with_context(|| {
format!(
"while obtaining a write transaction for index `{uid}` at `{}`",
index_path.display()
)
})?;
println!("\t- Updating index stats");
update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?;
println!("\t- Updating date format");
update_date_format(&uid, &index_env, &mut index_wtxn)?;
index_wtxn.commit().with_context(|| {
format!(
"while committing the write txn for index `{uid}` at {}",
index_path.display()
)
})?;
}
sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?;
println!("Upgrading database succeeded");
Ok(())
}
}
pub mod v1_9 {
pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
/// The statistics that can be computed from an `Index` object.
#[derive(serde::Serialize, serde::Deserialize, Debug)]
pub struct IndexStats {
/// Number of documents in the index.
pub number_of_documents: u64,
/// Size taken up by the index' DB, in bytes.
///
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
/// are not returned to the disk after a deletion, this number is typically larger than
/// `used_database_size` that only includes the size of the used pages.
pub database_size: u64,
/// Size taken by the used pages of the index' DB, in bytes.
///
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
/// this value is typically smaller than `database_size`.
pub used_database_size: u64,
/// Association of every field name with the number of times it occurs in the documents.
pub field_distribution: FieldDistribution,
/// Creation date of the index.
pub created_at: time::OffsetDateTime,
/// Date of the last update of the index.
pub updated_at: time::OffsetDateTime,
}
use serde::{Deserialize, Serialize};
#[derive(Debug, Deserialize, Serialize)]
pub struct IndexEmbeddingConfig {
pub name: String,
pub config: EmbeddingConfig,
}
#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
pub struct EmbeddingConfig {
/// Options of the embedder, specific to each kind of embedder
pub embedder_options: EmbedderOptions,
}
/// Options of an embedder, specific to each kind of embedder.
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
pub enum EmbedderOptions {
HuggingFace(hf::EmbedderOptions),
OpenAi(openai::EmbedderOptions),
Ollama(ollama::EmbedderOptions),
UserProvided(manual::EmbedderOptions),
Rest(rest::EmbedderOptions),
}
impl Default for EmbedderOptions {
fn default() -> Self {
Self::OpenAi(openai::EmbedderOptions { api_key: None, dimensions: None })
}
}
mod hf {
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
pub struct EmbedderOptions {
pub model: String,
pub revision: Option<String>,
}
}
mod openai {
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
pub struct EmbedderOptions {
pub api_key: Option<String>,
pub dimensions: Option<usize>,
}
}
mod ollama {
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
pub struct EmbedderOptions {
pub embedding_model: String,
pub url: Option<String>,
pub api_key: Option<String>,
}
}
mod manual {
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
pub struct EmbedderOptions {
pub dimensions: usize,
}
}
mod rest {
#[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize, Hash)]
pub struct EmbedderOptions {
pub api_key: Option<String>,
pub dimensions: Option<usize>,
pub url: String,
pub input_field: Vec<String>,
// path to the array of embeddings
pub path_to_embeddings: Vec<String>,
// shape of a single embedding
pub embedding_object: Vec<String>,
}
}
pub type OffsetDateTime = time::OffsetDateTime;
}
pub mod v1_10 {
use crate::v1_9;
pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
/// The statistics that can be computed from an `Index` object.
#[derive(serde::Serialize, serde::Deserialize, Debug)]
pub struct IndexStats {
/// Number of documents in the index.
pub number_of_documents: u64,
/// Size taken up by the index' DB, in bytes.
///
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
/// are not returned to the disk after a deletion, this number is typically larger than
/// `used_database_size` that only includes the size of the used pages.
pub database_size: u64,
/// Size taken by the used pages of the index' DB, in bytes.
///
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
/// this value is typically smaller than `database_size`.
pub used_database_size: u64,
/// Association of every field name with the number of times it occurs in the documents.
pub field_distribution: FieldDistribution,
/// Creation date of the index.
#[serde(with = "time::serde::rfc3339")]
pub created_at: time::OffsetDateTime,
/// Date of the last update of the index.
#[serde(with = "time::serde::rfc3339")]
pub updated_at: time::OffsetDateTime,
}
impl From<v1_9::IndexStats> for IndexStats {
fn from(
v1_9::IndexStats {
number_of_documents,
database_size,
used_database_size,
field_distribution,
created_at,
updated_at,
}: v1_9::IndexStats,
) -> Self {
IndexStats {
number_of_documents,
database_size,
used_database_size,
field_distribution,
created_at,
updated_at,
}
}
}
#[derive(serde::Serialize, serde::Deserialize)]
#[serde(transparent)]
pub struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] pub time::OffsetDateTime);
}
fn update_index_stats(
index_stats: Database<UuidCodec, Unspecified>,
index_uid: &str,
index_uuid: uuid::Uuid,
sched_wtxn: &mut RwTxn,
) -> anyhow::Result<()> {
let ctx = || format!("while updating index stats for index `{index_uid}`");
let stats: Option<v1_9::IndexStats> = index_stats
.remap_data_type::<SerdeJson<v1_9::IndexStats>>()
.get(sched_wtxn, &index_uuid)
.with_context(ctx)?;
if let Some(stats) = stats {
let stats: v1_10::IndexStats = stats.into();
index_stats
.remap_data_type::<SerdeJson<v1_10::IndexStats>>()
.put(sched_wtxn, &index_uuid, &stats)
.with_context(ctx)?;
}
Ok(())
}
fn update_date_format(
index_uid: &str,
index_env: &Env,
index_wtxn: &mut RwTxn,
) -> anyhow::Result<()> {
let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN)
.with_context(|| format!("while updating date format for index `{index_uid}`"))?;
date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?;
date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?;
Ok(())
}
fn find_rest_embedders(
index_uid: &str,
index_env: &Env,
index_txn: &RoTxn,
) -> anyhow::Result<Vec<String>> {
let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN)
.with_context(|| format!("while checking REST embedders for index `{index_uid}`"))?;
let mut rest_embedders = vec![];
for config in main
.remap_types::<Str, SerdeJson<Vec<v1_9::IndexEmbeddingConfig>>>()
.get(index_txn, main_key::EMBEDDING_CONFIGS)?
.unwrap_or_default()
{
if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options {
rest_embedders.push(config.name);
}
}
Ok(rest_embedders)
}
fn date_round_trip(
wtxn: &mut RwTxn,
index_uid: &str,
db: Database<Unspecified, Unspecified>,
key: &str,
) -> anyhow::Result<()> {
let datetime =
db.remap_types::<Str, SerdeJson<v1_9::OffsetDateTime>>().get(wtxn, key).with_context(
|| format!("could not read `{key}` while updating date format for index `{index_uid}`"),
)?;
if let Some(datetime) = datetime {
db.remap_types::<Str, SerdeJson<v1_10::OffsetDateTime>>()
.put(wtxn, key, &v1_10::OffsetDateTime(datetime))
.with_context(|| {
format!(
"could not write `{key}` while updating date format for index `{index_uid}`"
)
})?;
}
Ok(())
}
/// Clears the task queue located at `db_path`.
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
let path = db_path.join("tasks");
@ -244,7 +682,7 @@ fn export_a_dump(
format!("While iterating on content file {:?}", content_file_uuid)
})? {
dump_content_file
.push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
.push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
}
dump_content_file.flush()?;
count += 1;

View File

@ -12,12 +12,14 @@ readme.workspace = true
license.workspace = true
[dependencies]
big_s = "1.0.2"
bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3"
bstr = "1.9.1"
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
charabia = { version = "0.9.0", default-features = false }
# charabia = { version = "0.9.0", default-features = false }
charabia = { git = "https://github.com/meilisearch/charabia", branch = "mutualize-char-normalizer", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.13"
deserr = "0.6.2"
@ -27,9 +29,9 @@ fst = "0.4.7"
fxhash = "0.2.1"
geoutils = "0.5.1"
grenad = { version = "0.4.7", default-features = false, features = [
"rayon",
"tempfile",
] }
"rayon", # TODO Should we keep this feature
"tempfile"
], git = "https://github.com/meilisearch/grenad", branch = "various-improvements" }
heed = { version = "0.20.3", default-features = false, features = [
"serde-json",
"serde-bincode",
@ -40,14 +42,14 @@ json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memchr = "2.5.0"
memmap2 = "0.9.4"
obkv = "0.2.2"
obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
once_cell = "1.19.0"
ordered-float = "4.2.1"
rayon = "1.10.0"
roaring = { version = "0.10.6", features = ["serde"] }
rstar = { version = "0.12.0", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
slice-group-by = "0.3.1"
smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = "1.13.2"
@ -80,16 +82,16 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
tiktoken-rs = "0.5.9"
liquid = "0.26.6"
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
arroy = "0.4.0"
arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
rand = "0.8.5"
tracing = "0.1.40"
ureq = { version = "2.10.0", features = ["json"] }
url = "2.5.2"
rayon-par-bridge = "0.1.0"
hashbrown = "0.14.5"
[dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false }
big_s = "1.0.2"
insta = "1.39.0"
maplit = "1.0.2"
md5 = "0.7.0"
@ -106,6 +108,8 @@ all-tokenizations = [
"charabia/greek",
"charabia/khmer",
"charabia/vietnamese",
"charabia/swedish-recomposition",
"charabia/german-segmentation",
]
# Use POSIX semaphores instead of SysV semaphores in LMDB
@ -138,6 +142,9 @@ khmer = ["charabia/khmer"]
# allow vietnamese specialized tokenization
vietnamese = ["charabia/vietnamese"]
# allow german specialized tokenization
german = ["charabia/german-segmentation"]
# force swedish character recomposition
swedish-recomposition = ["charabia/swedish-recomposition"]

View File

@ -292,7 +292,7 @@ mod test {
.unwrap()
.into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!(
val,
@ -321,7 +321,7 @@ mod test {
.into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!(
val,
@ -348,7 +348,7 @@ mod test {
.into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!(
val,
@ -375,7 +375,7 @@ mod test {
.into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!(
val,
@ -402,7 +402,7 @@ mod test {
.into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!(
val,
@ -429,7 +429,7 @@ mod test {
.into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!(
val,
@ -456,7 +456,7 @@ mod test {
.into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!(
val,
@ -483,7 +483,7 @@ mod test {
.into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!(
val,
@ -510,7 +510,7 @@ mod test {
.into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!(
val,
@ -555,7 +555,7 @@ mod test {
.into_cursor_and_fields_index();
let doc = cursor.next_document().unwrap().unwrap();
let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
let val = obkv_to_object(doc, &index).map(Value::from).unwrap();
assert_eq!(
val,

View File

@ -69,7 +69,7 @@ impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
#[derive(Debug, Clone)]
pub struct EnrichedDocument<'a> {
pub document: KvReader<'a, FieldId>,
pub document: &'a KvReader<FieldId>,
pub document_id: DocumentId,
}

View File

@ -27,7 +27,7 @@ use crate::{FieldId, Object, Result};
const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();
/// Helper function to convert an obkv reader into a JSON object.
pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
obkv.iter()
.map(|(field_id, value)| {
let field_name = index
@ -76,7 +76,7 @@ impl DocumentsBatchIndex {
self.0.get_by_right(name).cloned()
}
pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> {
pub fn recreate_json(&self, document: &obkv::KvReaderU16) -> Result<Object> {
let mut map = Object::new();
for (k, v) in document.iter() {

View File

@ -1,8 +1,10 @@
use std::borrow::Cow;
use std::iter;
use std::result::Result as StdResult;
use serde_json::Value;
use serde_json::{from_str, Value};
use crate::update::new::{CowStr, TopLevelMap};
use crate::{FieldId, InternalError, Object, Result, UserError};
/// The symbol used to define levels in a nested primary key.
@ -52,7 +54,7 @@ impl<'a> PrimaryKey<'a> {
pub fn document_id(
&self,
document: &obkv::KvReader<'_, FieldId>,
document: &obkv::KvReader<FieldId>,
fields: &impl FieldIdMapper,
) -> Result<StdResult<String, DocumentIdExtractionError>> {
match self {
@ -100,6 +102,45 @@ impl<'a> PrimaryKey<'a> {
}
}
/// Returns the document ID based on the primary and
/// search for it recursively in zero-copy-deserialized documents.
pub fn document_id_from_top_level_map<'p>(
&self,
document: &TopLevelMap<'p>,
) -> Result<StdResult<CowStr<'p>, DocumentIdExtractionError>> {
fn get_docid<'p>(
document: &TopLevelMap<'p>,
primary_key: &[&str],
) -> Result<StdResult<CowStr<'p>, DocumentIdExtractionError>> {
match primary_key {
[] => unreachable!("arrrgh"), // would None be ok?
[primary_key] => match document.0.get(*primary_key) {
Some(value) => match from_str::<u64>(value.get()) {
Ok(value) => Ok(Ok(CowStr(Cow::Owned(value.to_string())))),
Err(_) => match from_str(value.get()) {
Ok(document_id) => Ok(Ok(document_id)),
Err(e) => Ok(Err(DocumentIdExtractionError::InvalidDocumentId(
UserError::SerdeJson(e),
))),
},
},
None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
},
[head, tail @ ..] => match document.0.get(*head) {
Some(value) => {
let document = from_str(value.get()).map_err(InternalError::SerdeJson)?;
get_docid(&document, tail)
}
None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
},
}
}
/// TODO do not allocate a vec everytime here
let primary_key: Vec<_> = self.name().split(PRIMARY_KEY_SPLIT_SYMBOL).collect();
get_docid(document, &primary_key)
}
/// Returns an `Iterator` that gives all the possible fields names the primary key
/// can have depending of the first level name and depth of the objects.
pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {

View File

@ -72,15 +72,24 @@ impl<R> DocumentsBatchCursor<R> {
}
impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
/// Returns a single document from the database.
pub fn get(
&mut self,
offset: u32,
) -> Result<Option<&KvReader<FieldId>>, DocumentsBatchCursorError> {
match self.cursor.move_on_key_equal_to(offset.to_be_bytes())? {
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => Ok(Some(value.into())),
_otherwise => Ok(None),
}
}
/// Returns the next document, starting from the first one. Subsequent calls to
/// `next_document` advance the document reader until all the documents have been read.
pub fn next_document(
&mut self,
) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> {
) -> Result<Option<&KvReader<FieldId>>, DocumentsBatchCursorError> {
match self.cursor.move_on_next()? {
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
Ok(Some(KvReader::new(value)))
}
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => Ok(Some(value.into())),
_otherwise => Ok(None),
}
}

View File

@ -258,6 +258,12 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
},
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
InvalidSettingsDimensions { embedder_name: String },
#[error(
"`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors."
)]
InvalidDisableBinaryQuantization { embedder_name: String },
#[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String },
#[error("Document editions cannot modify a document's primary key")]

View File

@ -4,6 +4,9 @@ use serde::{Deserialize, Serialize};
use crate::FieldId;
mod global;
pub use global::GlobalFieldsIdsMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FieldsIdsMap {
names_ids: BTreeMap<String, FieldId>,

View File

@ -0,0 +1,86 @@
use std::collections::BTreeMap;
use std::sync::RwLock;
use crate::{FieldId, FieldsIdsMap};
/// A fields ids map that can be globally updated to add fields
#[derive(Debug, Clone)]
pub struct GlobalFieldsIdsMap<'indexing> {
global: &'indexing RwLock<FieldsIdsMap>,
local: LocalFieldsIdsMap,
}
#[derive(Debug, Clone)]
struct LocalFieldsIdsMap {
names_ids: BTreeMap<String, FieldId>,
ids_names: BTreeMap<FieldId, String>,
}
impl LocalFieldsIdsMap {
fn new(global: &RwLock<FieldsIdsMap>) -> Self {
let global = global.read().unwrap();
Self { names_ids: global.names_ids.clone(), ids_names: global.ids_names.clone() }
}
fn insert(&mut self, name: &str, field_id: FieldId) {
self.names_ids.insert(name.to_owned(), field_id);
self.ids_names.insert(field_id, name.to_owned());
}
fn name(&self, id: FieldId) -> Option<&str> {
self.ids_names.get(&id).map(String::as_str)
}
fn id(&self, name: &str) -> Option<FieldId> {
self.names_ids.get(name).copied()
}
}
impl<'indexing> GlobalFieldsIdsMap<'indexing> {
pub fn new(global: &'indexing RwLock<FieldsIdsMap>) -> Self {
Self { local: LocalFieldsIdsMap::new(global), global }
}
/// Returns the field id related to a field name, it will create a new field id if the
/// name is not already known. Returns `None` if the maximum field id as been reached.
pub fn id_or_insert(&mut self, name: &str) -> Option<FieldId> {
if let Some(field_id) = self.local.id(name) {
return Some(field_id);
}
{
// optimistically lookup the global map
let global = self.global.read().unwrap();
if let Some(field_id) = global.id(name) {
self.local.insert(name, field_id);
return Some(field_id);
}
}
{
let mut global = self.global.write().unwrap();
if let Some(field_id) = global.id(name) {
self.local.insert(name, field_id);
return Some(field_id);
}
let field_id = global.insert(name)?;
self.local.insert(name, field_id);
Some(field_id)
}
}
/// Get the name of a field based on its id.
pub fn name(&mut self, id: FieldId) -> Option<&str> {
if self.local.name(id).is_none() {
let global = self.global.read().unwrap();
let name = global.name(id)?;
self.local.insert(name, id);
}
self.local.name(id)
}
}

View File

@ -6,10 +6,10 @@ use obkv::{KvReaderU16, KvWriterU16};
pub struct ObkvCodec;
impl<'a> heed::BytesDecode<'a> for ObkvCodec {
type DItem = KvReaderU16<'a>;
type DItem = &'a KvReaderU16;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(KvReaderU16::new(bytes))
Ok(KvReaderU16::from_slice(bytes))
}
}

View File

@ -122,7 +122,7 @@ impl CboRoaringBitmapCodec {
/// Merges a DelAdd delta into a CboRoaringBitmap.
pub fn merge_deladd_into<'a>(
deladd: KvReaderDelAdd<'_>,
deladd: &KvReaderDelAdd,
previous: &[u8],
buffer: &'a mut Vec<u8>,
) -> io::Result<Option<&'a [u8]>> {

View File

@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision;
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
use crate::vector::{Embedding, EmbeddingConfig};
use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@ -162,7 +162,7 @@ pub struct Index {
/// Maps an embedder name to its id in the arroy store.
pub embedder_category_id: Database<Str, U8>,
/// Vector store based on arroy™.
pub vector_arroy: arroy::Database<arroy::distances::Angular>,
pub vector_arroy: arroy::Database<Unspecified>,
/// Maps the document id to the document as an obkv store.
pub(crate) documents: Database<BEU32, ObkvCodec>,
@ -1251,12 +1251,20 @@ impl Index {
/* documents */
/// Returns a document by using the document id.
pub fn document<'t>(&self, rtxn: &'t RoTxn, id: DocumentId) -> Result<&'t obkv::KvReaderU16> {
self.documents
.get(rtxn, &id)?
.ok_or(UserError::UnknownInternalDocumentId { document_id: id })
.map_err(Into::into)
}
/// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
pub fn iter_documents<'a, 't: 'a>(
&'a self,
rtxn: &'t RoTxn<'t>,
ids: impl IntoIterator<Item = DocumentId> + 'a,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
Ok(ids.into_iter().map(move |id| {
let kv = self
.documents
@ -1271,7 +1279,7 @@ impl Index {
&self,
rtxn: &'t RoTxn<'t>,
ids: impl IntoIterator<Item = DocumentId>,
) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
) -> Result<Vec<(DocumentId, &'t obkv::KvReaderU16)>> {
self.iter_documents(rtxn, ids)?.collect()
}
@ -1279,7 +1287,7 @@ impl Index {
pub fn all_documents<'a, 't: 'a>(
&'a self,
rtxn: &'t RoTxn<'t>,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
self.iter_documents(rtxn, self.documents_ids(rtxn)?)
}
@ -1303,7 +1311,7 @@ impl Index {
})?;
Ok(self.iter_documents(rtxn, ids)?.map(move |entry| -> Result<_> {
let (_docid, obkv) = entry?;
match primary_key.document_id(&obkv, &fields)? {
match primary_key.document_id(obkv, &fields)? {
Ok(document_id) => Ok(document_id),
Err(_) => Err(InternalError::DocumentsError(
crate::documents::Error::InvalidDocumentFormat,
@ -1614,15 +1622,17 @@ impl Index {
&'a self,
rtxn: &'a RoTxn<'a>,
embedder_id: u8,
) -> impl Iterator<Item = Result<arroy::Reader<'a, arroy::distances::Angular>>> + 'a {
quantized: bool,
) -> impl Iterator<Item = Result<ArroyWrapper>> + 'a {
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
arroy::Reader::open(rtxn, k, self.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata(_) => Ok(None),
e => Err(e.into()),
})
.transpose()
let reader = ArroyWrapper::new(self.vector_arroy, k, quantized);
// Here we don't care about the dimensions, but we want to know if we can read
// in the database or if its metadata are missing because there is no document with that many vectors.
match reader.dimensions(rtxn) {
Ok(_) => Some(Ok(reader)),
Err(arroy::Error::MissingMetadata(_)) => None,
Err(e) => Some(Err(e.into())),
}
})
}
@ -1644,32 +1654,18 @@ impl Index {
docid: DocumentId,
) -> Result<BTreeMap<String, Vec<Embedding>>> {
let mut res = BTreeMap::new();
for row in self.embedder_category_id.iter(rtxn)? {
let (embedder_name, embedder_id) = row?;
let embedder_id = (embedder_id as u16) << 8;
let mut embeddings = Vec::new();
'vectors: for i in 0..=u8::MAX {
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata(_) => Ok(None),
e => Err(e),
})
.transpose();
let Some(reader) = reader else {
break 'vectors;
};
let embedding = reader?.item_vector(rtxn, docid)?;
if let Some(embedding) = embedding {
embeddings.push(embedding)
} else {
break 'vectors;
}
}
res.insert(embedder_name.to_owned(), embeddings);
let embedding_configs = self.embedding_configs(rtxn)?;
for config in embedding_configs {
let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
let embeddings = self
.arroy_readers(rtxn, embedder_id, config.config.quantized())
.map_while(|reader| {
reader
.and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into()))
.transpose()
})
.collect::<Result<Vec<_>>>()?;
res.insert(config.name.to_owned(), embeddings);
}
Ok(res)
}

Some files were not shown because too many files have changed in this diff Show More