Compare commits

...

161 Commits

Author SHA1 Message Date
ManyTheFish
1197ec32e6 feat(metrics): add personalization count to metrics endpoint
- Add MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS metric to track personalized searches
- Increment metric directly in search analytics when personalization is used
- Metric automatically exposed in /metrics endpoint for monitoring
2025-07-28 13:53:37 +02:00
ManyTheFish
0afab11e25 feat(analytics): add personalization tracking to segment analytics
- Add total_personalized field to SearchAggregator to track personalization usage
- Track when search requests include personalization parameters
- Include personalization data in analytics JSON output
- Maintain clean personalization service interface
2025-07-28 13:53:37 +02:00
ManyTheFish
e3062c4070 refactor(personalization): improve Cohere reranking logic and error handling
- Replace and_then() with early return for missing personalization
- Simplify reranking by building new hits vector instead of swapping
- Add debug logging for reranked indices
- Fix potential index out-of-bounds issues in reranking
2025-07-28 13:53:37 +02:00
ManyTheFish
c56be3d820 refactor: split PersonalizationService into enum with CohereService
- Refactor PersonalizationService as enum with Cohere and Uninitialized variants
- Create dedicated CohereService struct with rerank_search_results method
- Split constructor into cohere() and uninitialized() methods
- Move all Cohere logic into CohereService for better separation of concerns
- Update tests and lib.rs to use new API
- Improve code organization and maintainability
2025-07-28 13:53:37 +02:00
ManyTheFish
be68dd6785 feat: refine personalization query by merging with user context
- Merge initial query with user context to create a comprehensive prompt
- Only skip reranking if both query and user_context are None
- Support reranking with query-only, user_context-only, or both
- Use 'let else' pattern for cleaner error handling
- Add comprehensive tests for different parameter combinations
- Improve prompt format for better reranking effectiveness
2025-07-28 13:53:37 +02:00
ManyTheFish
bae231fb91 refactor: rename personalization API fields and move checks inside service
- Rename 'personalization' field to 'personalize' in API
- Rename 'userProfile' to 'userContext' in personalization object
- Remove 'personalized' boolean field (activation now based on non-null 'personalize')
- Move personalization checks inside rerank_search_results function
- Use 'let else' pattern for better error handling
- Update error types and messages to reflect new field names
- Update all search routes and analytics to use new field names
2025-07-28 13:53:37 +02:00
ManyTheFish
34f18ad3a8 feat: add personalization service with EnglishV3-only reranking
- Add new personalization module with Cohere integration
- Implement rerank_search_results method using EnglishV3 model
- Remove fallback logic to EnglishV2 for simplified behavior
- Add comprehensive error handling and logging
- Include unit tests for service behavior
- Update search route to support personalization feature
2025-07-28 13:53:37 +02:00
ManyTheFish
a7cdcad4b2 feat: add personalization parameters to /search route
- Add Personalization struct with personalized boolean and user_profile string
- Add personalizationPersonalized and personalizationUserProfile query parameters to SearchQueryGet
- Follow same pattern as hybrid parameters (hybridEmbedder, hybridSemanticRatio)
- Add validation: personalizationUserProfile requires personalizationPersonalized
- Add error codes for personalization parameters
- Update analytics and facet search to handle new personalization field
- Remove serde dependencies from Personalization struct, use Deserr only
2025-07-28 13:53:37 +02:00
ManyTheFish
c0cd05e5f6 feat: add experimental_personalization_api_key feature to RoFeatures
- Add MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY environment variable
- Add experimental_personalization_api_key field to Opt struct with CLI and env support
- Add experimental_personalization_api_key field to InstanceTogglableFeatures
- Store personalization API key in FeatureData for access through IndexScheduler
- Add experimental_personalization_api_key() method to IndexScheduler
- Update analytics destructuring to include new field
- Maintain RoFeatures Copy trait while properly handling Option<String>
2025-07-28 13:53:36 +02:00
Louis Dureuil
d4c88f28f3 Merge pull request #5780 from meilisearch/fix-api-keys
Fix api key action inconsistencies
2025-07-28 10:33:48 +00:00
Mubelotix
d90c76d3cc Update tests 2025-07-28 11:35:15 +02:00
Mubelotix
f6bc6854f8 Fix key action inconsistencies 2025-07-28 11:10:55 +02:00
Clément Renault
42001a25ff Merge pull request #5770 from meilisearch/fix/update-index-chat-settings
fix: index chat settings `searchParameters` incorrectly set with `limit: 20` when sending empty object
2025-07-22 13:26:01 +00:00
Louis Dureuil
080d5f94dd Merge pull request #5763 from meilisearch/embedding-fixes
Regenerate all fragments when coming from a user provided vector
2025-07-22 08:35:07 +00:00
nicolasvienot
ba0f50e5ef fix: update default deserialization for ChatSearchParams limit field 2025-07-22 10:24:18 +02:00
Louis Dureuil
ce6230aa85 Merge pull request #5762 from meilisearch/new-document-indexer-for-dumps
Use the edition 2024 documents indexer in the dumps
2025-07-21 14:53:43 +00:00
Louis Dureuil
6dc241f9de Fix tests 2025-07-21 15:11:24 +02:00
Louis Dureuil
01d1ef65c4 Update search and docs usages 2025-07-21 15:11:24 +02:00
Louis Dureuil
3246667590 when exporting vectors, for regenerate to false when the embedder has fragments 2025-07-21 15:11:24 +02:00
Louis Dureuil
109395c199 Index::embeddings specifies if the embedder has fragments 2025-07-21 15:11:24 +02:00
Louis Dureuil
a0b71a8785 EmbedderOptions::has_fragments() 2025-07-21 15:11:24 +02:00
Louis Dureuil
00a5c86f13 Remove accidentally added db snap 2025-07-21 15:11:24 +02:00
Louis Dureuil
366c37a686 Fix new indexer 2025-07-21 15:11:23 +02:00
Louis Dureuil
afc164a271 Fix in old indexer 2025-07-21 15:11:23 +02:00
Kerollmops
bdc2d1e64d Move the edition 2024 dump parameter to the right place 2025-07-21 14:50:05 +02:00
Tamo
0312fb22b8 Merge pull request #5761 from meilisearch/fix-chat-settings-dumpless-upgrade
Fix chat settings dumpless upgrade
2025-07-17 15:57:39 +00:00
Clément Renault
b85657de1e Update memmap2 version everywhere 2025-07-17 17:30:44 +02:00
Clément Renault
626be0ef28 Small typo fix
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-07-17 17:27:00 +02:00
Clément Renault
1b476b8a35 Add documentation to the new documents_file dump reader method
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-07-17 17:26:41 +02:00
Clément Renault
a1b42c10e2 Make clippy happy 2025-07-17 17:21:03 +02:00
Clément Renault
d67db6e3c2 Use the edition 2024 documents indexer in the dumps 2025-07-17 17:12:51 +02:00
Clément Renault
760ccffdbd Expose the documents files from the dumps 2025-07-17 17:12:51 +02:00
Clément Renault
338806283b Do not track meilisearch databases 2025-07-17 17:12:51 +02:00
Clément Renault
fe15e11c9d Introduce a new CLI and env var to use the old document indexer when
importing dumps
2025-07-17 17:12:51 +02:00
Clément Renault
f1d92bfead Make sure the new filter chat setting is set to it's default value if
missing
2025-07-17 15:36:21 +02:00
Clément Renault
a005a062da Add security if chat settings parameters are missing 2025-07-17 15:27:53 +02:00
Clément Renault
fd8b2451d7 Merge pull request #5754 from kametsun/fix/incorrect-stats-doc-count
Fix incorrect document count in stats after clearing all documents
2025-07-17 06:48:51 +00:00
Louis Dureuil
058f9ffda5 Merge pull request #5734 from meilisearch/request-fragments-test
Tests for multimodal
2025-07-16 11:04:00 +00:00
Louis Dureuil
5d363205a5 Merge pull request #5716 from meilisearch/document-sorting
Allow sorting on the /documents route
2025-07-16 10:26:50 +00:00
Mubelotix
a683faa882 Apply review suggestions 2025-07-16 11:03:24 +02:00
Louis Dureuil
8887cbdcd5 Merge pull request #5725 from meilisearch/fix-threshold-overcounting-bug
Fix Total Hits being wrong when rankingScoreThreshold is used
2025-07-16 07:15:24 +00:00
Many the fish
634865ff53 Merge pull request #5710 from meilisearch/chat-route-support-filters
Introduce filters in the chat completions
2025-07-15 16:10:49 +00:00
Mubelotix
36fccf8525 Merge remote-tracking branch 'origin/release-v1.16.0' into fix-threshold-overcounting-bug 2025-07-15 18:01:29 +02:00
Mubelotix
d6bd60d569 Apply review suggestions
Co-Authored-By: Louis Dureuil <louis.dureuil@xinra.net>
2025-07-15 18:00:37 +02:00
Mubelotix
48ad959fc1 Merge remote-tracking branch 'origin/release-v1.16.0' into document-sorting 2025-07-15 17:41:46 +02:00
Mubelotix
1bc30cb4c8 Restore old benchmark names 2025-07-15 17:34:04 +02:00
Mubelotix
77138a42d6 Apply review suggestions
Add preconditions

Fix underflow

Remove unwrap

Turn methods to associated functions

Apply review suggestions
2025-07-15 17:31:11 +02:00
Kerollmops
0791506124 Fix some proposals 2025-07-15 17:10:45 +02:00
Kerollmops
2a015ac3b8 Implement basic few shot prompting to improve the query capabilities 2025-07-15 14:50:10 +02:00
Clément Renault
6f248b78a9 Merge pull request #5751 from meilisearch/fix-searchable-attributes-order
Fix: Preserve order of searchable attributes when modified
2025-07-15 10:38:11 +00:00
Many the fish
d694e312ff Update crates/milli/src/update/settings.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-07-15 11:54:59 +02:00
Clément Renault
d76dcc8998 Make clippy happy 2025-07-15 11:49:48 +02:00
Clément Renault
e654f66223 Support filtering 2025-07-15 11:49:47 +02:00
Clément Renault
34f2ab7093 WIP report search errors to the LLM 2025-07-15 11:49:46 +02:00
Clément Renault
1a9dbd364e Fix some issues 2025-07-15 11:49:46 +02:00
Clément Renault
662c5d9871 Introduce filters in the chat completions 2025-07-15 11:49:45 +02:00
kametsun
5cd61b50f9 Fix formatting 2025-07-12 18:19:26 +09:00
kametsun
9a9be76757 add: verify that the statistics are correctly update assert 2025-07-12 11:15:44 +09:00
kametsun
cfa6ba6c3b Fix stats showing wrong document count after clear all
Update database stats after clearing documents to ensure
/stats endpoint returns correct numberOfDocuments: 0 instead
of stale count.
2025-07-12 11:15:44 +09:00
Clément Renault
f4f333dbf6 Merge pull request #5753 from meilisearch/export-fixes
Various fixes on the export route
2025-07-11 19:15:42 +00:00
Mubelotix
1ade76ba10 Remove sneaky debug 2025-07-11 12:27:04 +02:00
Mubelotix
ae26658913 Use the most appropriate unit in payload_too_large error 2025-07-11 12:27:03 +02:00
Mubelotix
aa09edb3fb Fix errors being silently dropped 2025-07-11 12:27:03 +02:00
Mubelotix
3f42f1a036 Get rid of bearer 2025-07-11 12:27:03 +02:00
Mubelotix
9bdfdd395b Fix document step overflowing 2025-07-11 12:27:03 +02:00
Mubelotix
78d0625a91 Decrease default payload size for exports 2025-07-11 12:27:03 +02:00
ManyTheFish
3f655ea20e compare user defined searchable fields instead of internal searchable fields 2025-07-10 18:24:23 +02:00
ManyTheFish
50bc1d55f3 Add test reproducing the bug 2025-07-10 18:23:46 +02:00
Mubelotix
0a4f2ef891 Leak mock servers 2025-07-08 15:27:35 +02:00
Tamo
faa1f7c5b7 Merge pull request #5693 from Mubelotix/default-key
Add a Read-Only Admin API Key by default
2025-07-08 12:38:29 +00:00
Mubelotix
3cc5d86598 Format 2025-07-08 13:57:17 +02:00
Mubelotix
1ae47bec77 Improve composite test 2025-07-08 13:57:07 +02:00
Mubelotix
2f1be0ff86 Ignore faulty test (see #5746) 2025-07-08 13:55:07 +02:00
Mubelotix
9cee432255 Fix broken tests 2025-07-08 13:36:26 +02:00
Mubelotix
ff8d48d2f1 Merge branch 'main' into default-key 2025-07-08 12:21:46 +02:00
Mubelotix
a56c036994 Update crates/meilisearch-types/src/keys.rs
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2025-07-08 12:18:52 +02:00
Tamo
511c48f520 Merge pull request #5737 from meilisearch/request-fragments-dumpless-upgrade
Fix the dumpless upgrade from v1.15 to v1.16 for request fragments
2025-07-08 08:49:38 +00:00
Louis Dureuil
4623691d1f Don't make the type-that-shall-not-be-written serializable
Following tamo's advice

Co-Authored-By: Tamo <tamo@meilisearch.com>
2025-07-08 10:04:33 +02:00
Mubelotix
3261aadcf2 Add composite test 2025-07-07 16:50:39 +02:00
Mubelotix
073e9f2967 Disable similarity check on composite embedders using fragments 2025-07-07 16:46:16 +02:00
Louis Dureuil
5f8f48ec95 Add new snapshot checking for regenerativeness 2025-07-07 16:43:05 +02:00
Louis Dureuil
ed2fe365a0 Fix existing snaps 2025-07-07 16:42:50 +02:00
Louis Dureuil
f7c8a77f89 Update v1.12.0 DB to contain vectors 2025-07-07 16:01:50 +02:00
Clément Renault
a8030850ee Merge pull request #5733 from meilisearch/improve-export-analytics
Improve the analytics of the `/export` route
2025-07-07 12:26:11 +00:00
Mubelotix
132065afda Minor improvements 2025-07-07 13:10:16 +02:00
Mubelotix
51c298662b Merge branch 'main' into request-fragments-test 2025-07-07 13:00:21 +02:00
Mubelotix
70a860a0f0 Merge branch 'main' into fix-threshold-overcounting-bug 2025-07-07 12:26:37 +02:00
Louis Dureuil
a3254d7d7d Implement dumpless upgrade from v1.15 to v1.16 2025-07-07 11:57:08 +02:00
Louis Dureuil
73c9c1ebdc Add compile-time checks for dumpless upgrade 2025-07-07 11:34:18 +02:00
Clément Renault
4c7a6e5c1b Do not leak private URLs 2025-07-07 11:07:58 +02:00
Mubelotix
fa3990daf9 Format 2025-07-04 13:33:49 +02:00
Mubelotix
c5993196b3 Add test 2025-07-04 13:32:55 +02:00
Mubelotix
16234e1313 Add fragment swapping test 2025-07-04 13:25:42 +02:00
Mubelotix
be9f4f96df Add experimental feature test 2025-07-04 13:15:15 +02:00
Mubelotix
b274106ad3 Add test 2025-07-04 13:05:52 +02:00
Mubelotix
48527761e7 Add test 2025-07-04 12:01:15 +02:00
Mubelotix
6792d048b8 Test both fragments and document template 2025-07-04 11:47:38 +02:00
Kerollmops
07bfed99e6 Expose the host in the analytics 2025-07-04 11:08:02 +02:00
Mubelotix
8dfded2993 Update tests 2025-07-04 10:49:03 +02:00
Mubelotix
3714f16696 Fix bug 2025-07-04 10:40:50 +02:00
Mubelotix
d0cd3cacec Add a way to reproduce the bug 2025-07-03 18:18:04 +02:00
Mubelotix
caccb51814 Add a complex value test 2025-07-03 16:10:23 +02:00
Mubelotix
cf9b311f71 Format 2025-07-03 15:53:09 +02:00
Mubelotix
7423243be0 Add test with multiple embedders 2025-07-03 15:52:18 +02:00
Mubelotix
5690700601 Add fragment addition test 2025-07-03 15:19:31 +02:00
Mubelotix
2faad504c6 Add test 2025-07-03 15:12:47 +02:00
Mubelotix
2bcd69750f Add fragment modification test 2025-07-03 15:08:27 +02:00
Mubelotix
de24e75be8 Update test 2025-07-03 15:00:11 +02:00
Louis Dureuil
a3af9fe057 new extractor bugfixes:
- fix old_has_fragments
- new_is_user_provided is always false when generating fragments,
  even if no fragment ever matches
2025-07-03 14:44:34 +02:00
Louis Dureuil
90683d0e4e add snapshot of get settings 2025-07-03 14:43:06 +02:00
Mubelotix
5c79273748 Add TODOs 2025-07-03 14:42:49 +02:00
Mubelotix
b45eea0d3e Add test for fragment deletion 2025-07-03 13:26:44 +02:00
Mubelotix
0b89ef1fd7 Make tests use a shared index 2025-07-03 11:32:49 +02:00
Mubelotix
65ba7b47af Test search fragments 2025-07-03 11:32:49 +02:00
Mubelotix
8af76a65bf Add test_fragment_indexing 2025-07-03 11:32:49 +02:00
Mubelotix
f60814b319 Add benchmark 2025-07-02 12:06:00 +02:00
Mubelotix
5a675bcb82 Add benchmarks 2025-07-02 11:50:32 +02:00
Mubelotix
600178c5ab Still limit to max hits 2025-07-01 18:33:09 +02:00
Mubelotix
dedae94102 Fix #5274 2025-07-01 16:22:25 +02:00
Mubelotix
7ae9a4afee Add a test for issue #5274 2025-07-01 15:42:43 +02:00
Mubelotix
e92b6beb20 Revert making check_sort_criteria usable without a search context 2025-07-01 14:26:55 +02:00
Mubelotix
27cc357362 Document code 2025-07-01 14:21:55 +02:00
Mubelotix
73dfeefc7c Remove plural form 2025-07-01 14:08:46 +02:00
Mubelotix
d85480de89 Move sort code out of facet 2025-07-01 14:05:47 +02:00
Mubelotix
9f55708d84 Format 2025-07-01 13:58:56 +02:00
Mubelotix
280c3907be Add test to sort the unsortable 2025-07-01 13:58:37 +02:00
Mubelotix
8419fd9b3b Ditch usage of check_sort_criteria 2025-07-01 13:42:38 +02:00
Mubelotix
283944ea89 Differentiate between document sort error and search sort error 2025-07-01 12:03:50 +02:00
Mubelotix
8aacd6374a Optimize geo sort 2025-07-01 11:50:01 +02:00
Mubelotix
8326f34ad1 Add analytics 2025-07-01 11:35:28 +02:00
Mubelotix
f4a908669c Add tests 2025-07-01 10:02:15 +02:00
Mubelotix
eb2c2815b6 Fix panic 2025-07-01 10:00:10 +02:00
Mubelotix
29e9c74a49 Merge two ifs 2025-06-30 16:17:04 +02:00
Mubelotix
f6803dd7d1 Simplify iterator chaining in facet sort 2025-06-30 14:05:23 +02:00
Mubelotix
f86f4f619f Implement geo sort on documents 2025-06-30 13:57:30 +02:00
Mubelotix
e35d58b531 Move geosort code out of search 2025-06-30 13:12:00 +02:00
Mubelotix
63827bbee0 Move sorting code out of search 2025-06-30 11:59:59 +02:00
Mubelotix
340d9e6edc Optimize facet sort
5 to 10x speedup
2025-06-27 14:40:55 +02:00
Mubelotix
28adbc0d18 Update tests 2025-06-27 09:47:46 +02:00
Mubelotix
e3fba62e13 Fix typo 2025-06-27 09:40:59 +02:00
Mubelotix
fb9170b8e3 Keep name consistent with others 2025-06-27 09:40:30 +02:00
Mubelotix
c15763f910 Improve key description
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-06-27 09:39:24 +02:00
Mubelotix
4534dc2cab Create another deserr error 2025-06-25 16:45:32 +02:00
Mubelotix
b05cb80803 Take sort criteria from the request 2025-06-25 16:41:08 +02:00
Mubelotix
6e0526090a Implement sorting documents 2025-06-25 15:36:12 +02:00
Mubelotix
2090e9ea31 Update test 2025-06-25 10:08:25 +02:00
Mubelotix
1c8f1c18f4 Fix constant name and key description 2025-06-25 09:59:34 +02:00
Mubelotix
c4a96b40eb Remove KeysGet from AllGet 2025-06-24 17:40:06 +02:00
Mubelotix
2d6dc83940 Format the code 2025-06-19 15:55:12 +02:00
Mubelotix
ab768f379f Fix comment 2025-06-19 15:49:34 +02:00
Mubelotix
705e9a9e5e Make the uuids random again to prevent abuse using rainbow tables 2025-06-19 15:45:09 +02:00
Mubelotix
67f2a30d7c Fix test 2025-06-19 13:10:08 +02:00
Mubelotix
99732f4084 Fix some tests 2025-06-19 13:04:55 +02:00
Mubelotix
5081d837ea Fix AllGet action being included in All 2025-06-19 12:12:30 +02:00
Mubelotix
9e1cb792f4 Rename Action::AllRead to AllGet 2025-06-19 11:55:25 +02:00
Mubelotix
b6b7ede266 Rename Action *.read to *.get 2025-06-19 11:53:42 +02:00
Mubelotix
f50e586a4f Allow management key to read other keys 2025-06-19 11:52:58 +02:00
Mubelotix
11fedea788 Set static uuids to keys 2025-06-19 11:42:45 +02:00
Mubelotix
032b34c377 Add a default management key 2025-06-19 11:29:32 +02:00
Mubelotix
b421c8e7de Add an AllRead key 2025-06-19 11:29:16 +02:00
Mubelotix
00eb258a53 Fix comment 2025-06-19 11:16:07 +02:00
108 changed files with 6345 additions and 1582 deletions

10
.gitignore vendored
View File

@@ -5,18 +5,24 @@
**/*.json_lines
**/*.rs.bk
/*.mdb
/data.ms
/*.ms
/snapshots
/dumps
/bench
/_xtask_benchmark.ms
/benchmarks
.DS_Store
# Snapshots
## ... large
*.full.snap
## ... unreviewed
## ... unreviewed
*.snap.new
## ... pending
*.pending-snap
# Tmp files
.tmp*
# Database snapshot
crates/meilisearch/db.snapshot

38
Cargo.lock generated
View File

@@ -1194,6 +1194,21 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
[[package]]
name = "cohere-rust"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b553b385b0f2562138baea705b5707335314f8e91a58e7d1a03c3a6c332423"
dependencies = [
"bytes",
"reqwest",
"serde",
"serde_json",
"strum_macros 0.26.4",
"thiserror 1.0.69",
"tokio",
]
[[package]]
name = "color-spantrace"
version = "0.3.0"
@@ -3445,7 +3460,7 @@ dependencies = [
"serde_json",
"serde_yaml",
"strum",
"strum_macros",
"strum_macros 0.27.1",
"unicode-blocks",
"unicode-normalization",
"unicode-segmentation",
@@ -3753,6 +3768,7 @@ dependencies = [
"bytes",
"cargo_toml",
"clap",
"cohere-rust",
"crossbeam-channel",
"deserr",
"dump",
@@ -3775,6 +3791,7 @@ dependencies = [
"meili-snap",
"meilisearch-auth",
"meilisearch-types",
"memmap2",
"mimalloc",
"mime",
"mopa-maintained",
@@ -3908,9 +3925,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
name = "memmap2"
version = "0.9.5"
version = "0.9.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
checksum = "483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28"
dependencies = [
"libc",
"stable_deref_trait",
@@ -5821,7 +5838,20 @@ version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32"
dependencies = [
"strum_macros",
"strum_macros 0.27.1",
]
[[package]]
name = "strum_macros"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.101",
]
[[package]]

View File

@@ -14,7 +14,7 @@ license.workspace = true
anyhow = "1.0.98"
bumpalo = "3.18.1"
csv = "1.3.1"
memmap2 = "0.9.5"
memmap2 = "0.9.7"
milli = { path = "../milli" }
mimalloc = { version = "0.1.47", default-features = false }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
@@ -51,3 +51,7 @@ harness = false
[[bench]]
name = "indexing"
harness = false
[[bench]]
name = "sort"
harness = false

View File

@@ -0,0 +1,114 @@
//! This benchmark module is used to compare the performance of sorting documents in /search VS /documents
//!
//! The tests/benchmarks were designed in the context of a query returning only 20 documents.
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use utils::Conf;
#[cfg(not(windows))]
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields =
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let sortable_fields =
["_geo", "name", "population", "elevation", "timezone", "modification-date"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_sortable_fields(sortable_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
dataset_format: "jsonl",
configure: base_conf,
primary_key: Some("geonameid"),
queries: &[""],
offsets: &[
Some((0, 20)), // The most common query in the real world
Some((0, 500)), // A query that ranges over many documents
Some((980, 20)), // The worst query that could happen in the real world
Some((800_000, 20)) // The worst query
],
get_documents: true,
..Conf::BASE
};
fn bench_sort(c: &mut criterion::Criterion) {
#[rustfmt::skip]
let confs = &[
utils::Conf {
group_name: "without sort",
sort: None,
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different values",
sort: Some(vec!["name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar values",
sort: Some(vec!["timezone:desc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar then different values",
sort: Some(vec!["timezone:desc", "name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different then similar values",
sort: Some(vec!["timezone:desc", "name:asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "geo sort",
sample_size: Some(10),
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many similar values then geo sort",
sample_size: Some(50),
sort: Some(vec!["timezone:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many different values then geo sort",
sample_size: Some(50),
sort: Some(vec!["name:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "sort on many fields",
sort: Some(vec!["population:asc", "name:asc", "elevation:asc", "timezone:asc"]),
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_sort);
criterion_main!(benches);

View File

@@ -9,6 +9,7 @@ use anyhow::Context;
use bumpalo::Bump;
use criterion::BenchmarkId;
use memmap2::Mmap;
use milli::documents::sort::recursive_sort;
use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer;
@@ -35,6 +36,12 @@ pub struct Conf<'a> {
pub configure: fn(&mut Settings),
pub filter: Option<&'a str>,
pub sort: Option<Vec<&'a str>>,
/// set to skip documents (offset, limit)
pub offsets: &'a [Option<(usize, usize)>],
/// enable if you want to bench getting documents without querying
pub get_documents: bool,
/// configure the benchmark sample size
pub sample_size: Option<usize>,
/// enable or disable the optional words on the query
pub optional_words: bool,
/// primary key, if there is None we'll auto-generate docids for every documents
@@ -52,6 +59,9 @@ impl Conf<'_> {
configure: |_| (),
filter: None,
sort: None,
offsets: &[None],
get_documents: false,
sample_size: None,
optional_words: true,
primary_key: None,
};
@@ -145,25 +155,79 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
let name = format!("{}: {}", file_name, conf.group_name);
let mut group = c.benchmark_group(&name);
if let Some(sample_size) = conf.sample_size {
group.sample_size(sample_size);
}
for &query in conf.queries {
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
let _ids = search.execute().unwrap();
});
});
for offset in conf.offsets {
let parameter = match offset {
None => query.to_string(),
Some((offset, limit)) => format!("{query}[{offset}:{limit}]"),
};
group.bench_with_input(
BenchmarkId::from_parameter(parameter),
&query,
|b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search
.query(query)
.terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
if let Some((offset, limit)) = offset {
search.offset(*offset).limit(*limit);
}
let _ids = search.execute().unwrap();
});
},
);
}
}
if conf.get_documents {
for offset in conf.offsets {
let parameter = match offset {
None => String::from("get_documents"),
Some((offset, limit)) => format!("get_documents[{offset}:{limit}]"),
};
group.bench_with_input(BenchmarkId::from_parameter(parameter), &(), |b, &()| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
let all_docs = index.documents_ids(&rtxn).unwrap();
let facet_sort =
recursive_sort(&index, &rtxn, sort, &all_docs).unwrap();
let iter = facet_sort.iter().unwrap();
if let Some((offset, limit)) = offset {
let _results = iter.skip(*offset).take(*limit).collect::<Vec<_>>();
} else {
let _results = iter.collect::<Vec<_>>();
}
} else {
let all_docs = index.documents_ids(&rtxn).unwrap();
if let Some((offset, limit)) = offset {
let _results =
all_docs.iter().skip(*offset).take(*limit).collect::<Vec<_>>();
} else {
let _results = all_docs.iter().collect::<Vec<_>>();
}
}
});
});
}
}
group.finish();
index.prepare_for_closing().wait();

View File

@@ -1,3 +1,4 @@
use std::fs::File;
use std::str::FromStr;
use super::v2_to_v3::CompatV2ToV3;
@@ -94,6 +95,10 @@ impl CompatIndexV1ToV2 {
self.from.documents().map(|it| Box::new(it) as Box<dyn Iterator<Item = _>>)
}
pub fn documents_file(&self) -> &File {
self.from.documents_file()
}
pub fn settings(&mut self) -> Result<v2::settings::Settings<v2::settings::Checked>> {
Ok(v2::settings::Settings::<v2::settings::Unchecked>::from(self.from.settings()?).check())
}

View File

@@ -1,3 +1,4 @@
use std::fs::File;
use std::str::FromStr;
use time::OffsetDateTime;
@@ -122,6 +123,13 @@ impl CompatIndexV2ToV3 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV2ToV3::V2(v2) => v2.documents_file(),
CompatIndexV2ToV3::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> {
let settings = match self {
CompatIndexV2ToV3::V2(from) => from.settings()?,

View File

@@ -1,3 +1,5 @@
use std::fs::File;
use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3};
use super::v4_to_v5::CompatV4ToV5;
use crate::reader::{v3, v4, UpdateFile};
@@ -252,6 +254,13 @@ impl CompatIndexV3ToV4 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV3ToV4::V3(v3) => v3.documents_file(),
CompatIndexV3ToV4::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v4::Settings<v4::Checked>> {
Ok(match self {
CompatIndexV3ToV4::V3(v3) => {

View File

@@ -1,3 +1,5 @@
use std::fs::File;
use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4};
use super::v5_to_v6::CompatV5ToV6;
use crate::reader::{v4, v5, Document};
@@ -241,6 +243,13 @@ impl CompatIndexV4ToV5 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV4ToV5::V4(v4) => v4.documents_file(),
CompatIndexV4ToV5::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v5::Settings<v5::Checked>> {
match self {
CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()),

View File

@@ -1,3 +1,4 @@
use std::fs::File;
use std::num::NonZeroUsize;
use std::str::FromStr;
@@ -243,6 +244,13 @@ impl CompatIndexV5ToV6 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV5ToV6::V5(v5) => v5.documents_file(),
CompatIndexV5ToV6::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
match self {
CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()),

View File

@@ -192,6 +192,14 @@ impl DumpIndexReader {
}
}
/// A reference to a file in the NDJSON format containing all the documents of the index
pub fn documents_file(&self) -> &File {
match self {
DumpIndexReader::Current(v6) => v6.documents_file(),
DumpIndexReader::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
match self {
DumpIndexReader::Current(v6) => v6.settings(),

View File

@@ -72,6 +72,10 @@ impl V1IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<self::settings::Settings> {
Ok(serde_json::from_reader(&mut self.settings)?)
}

View File

@@ -203,6 +203,10 @@ impl V2IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -215,6 +215,10 @@ impl V3IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -210,6 +210,10 @@ impl V4IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -247,6 +247,10 @@ impl V5IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -284,6 +284,10 @@ impl V6IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
patch_embedders(&mut settings);

View File

@@ -26,7 +26,7 @@ flate2 = "1.1.2"
indexmap = "2.9.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.5"
memmap2 = "0.9.7"
page_size = "0.6.0"
rayon = "1.10.0"
roaring = { version = "0.10.12", features = ["serde"] }

View File

@@ -24,6 +24,7 @@ pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
network: Arc<RwLock<Network>>,
experimental_personalization_api_key: Option<String>,
}
#[derive(Debug, Clone, Copy)]
@@ -174,7 +175,12 @@ impl FeatureData {
let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
let InstanceTogglableFeatures {
metrics,
logs_route,
contains_filter,
experimental_personalization_api_key,
} = instance_features;
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: metrics || persisted_features.metrics,
logs_route: logs_route || persisted_features.logs_route,
@@ -189,6 +195,7 @@ impl FeatureData {
persisted: runtime_features_db,
runtime,
network: Arc::new(RwLock::new(network)),
experimental_personalization_api_key,
})
}
@@ -219,6 +226,10 @@ impl FeatureData {
RoFeatures::new(self)
}
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
self.experimental_personalization_api_key.as_ref()
}
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
&mut wtxn,

View File

@@ -20,6 +20,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let IndexScheduler {
cleanup_enabled: _,
experimental_no_edition_2024_for_dumps: _,
processing_tasks,
env,
version,

View File

@@ -168,6 +168,9 @@ pub struct IndexScheduler {
/// Whether we should automatically cleanup the task queue or not.
pub(crate) cleanup_enabled: bool,
/// Whether we should use the old document indexer or the new one.
pub(crate) experimental_no_edition_2024_for_dumps: bool,
/// The webhook url we should send tasks to after processing every batches.
pub(crate) webhook_url: Option<String>,
/// The Authorization header to send to the webhook URL.
@@ -210,6 +213,7 @@ impl IndexScheduler {
index_mapper: self.index_mapper.clone(),
cleanup_enabled: self.cleanup_enabled,
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
webhook_url: self.webhook_url.clone(),
webhook_authorization_header: self.webhook_authorization_header.clone(),
embedders: self.embedders.clone(),
@@ -280,7 +284,8 @@ impl IndexScheduler {
let version = versioning::Versioning::new(&env, from_db_version)?;
let mut wtxn = env.write_txn()?;
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
let features =
features::FeatureData::new(&env, &mut wtxn, options.instance_features.clone())?;
let queue = Queue::new(&env, &mut wtxn, &options)?;
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
let chat_settings = env.create_database(&mut wtxn, Some(CHAT_SETTINGS_DB_NAME))?;
@@ -296,6 +301,9 @@ impl IndexScheduler {
index_mapper,
env,
cleanup_enabled: options.cleanup_enabled,
experimental_no_edition_2024_for_dumps: options
.indexer_config
.experimental_no_edition_2024_for_dumps,
webhook_url: options.webhook_url,
webhook_authorization_header: options.webhook_authorization_header,
embedders: Default::default(),
@@ -594,6 +602,11 @@ impl IndexScheduler {
Ok(nbr_index_processing_tasks > 0)
}
/// Whether the index should use the old document indexer.
pub fn no_edition_2024_for_dumps(&self) -> bool {
self.experimental_no_edition_2024_for_dumps
}
/// Return the tasks matching the query from the user's point of view along
/// with the total number of tasks matching the query, ignoring from and limit.
///
@@ -834,6 +847,10 @@ impl IndexScheduler {
self.features.features()
}
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
self.features.experimental_personalization_api_key()
}
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
self.features.put_runtime_features(wtxn, features)?;

View File

@@ -5,6 +5,7 @@ use std::sync::atomic::Ordering;
use dump::IndexMetadata;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{self};
@@ -227,12 +228,21 @@ impl IndexScheduler {
return Err(Error::from_milli(user_err, Some(uid.to_string())));
};
for (embedder_name, (embeddings, regenerate)) in embeddings {
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate,
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
}

View File

@@ -9,6 +9,7 @@ use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::update::{request_threads, Setting};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
@@ -62,13 +63,14 @@ impl IndexScheduler {
let ExportIndexSettings { filter, override_settings } = export_settings;
let index = self.index(uid)?;
let index_rtxn = index.read_txn()?;
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
// First, check if the index already exists
let url = format!("{base_url}/indexes/{uid}");
let response = retry(&must_stop_processing, || {
let mut request = agent.get(&url);
if let Some(api_key) = api_key {
request = request.set("Authorization", &format!("Bearer {api_key}"));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(Default::default()).map_err(into_backoff_error)
@@ -90,8 +92,8 @@ impl IndexScheduler {
let url = format!("{base_url}/indexes");
retry(&must_stop_processing, || {
let mut request = agent.post(&url);
if let Some(api_key) = api_key {
request = request.set("Authorization", &format!("Bearer {api_key}"));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
@@ -103,8 +105,8 @@ impl IndexScheduler {
let url = format!("{base_url}/indexes/{uid}");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(api_key) = api_key {
request = request.set("Authorization", &format!("Bearer {api_key}"));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
@@ -122,7 +124,6 @@ impl IndexScheduler {
}
// Retry logic for sending settings
let url = format!("{base_url}/indexes/{uid}/settings");
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = bearer.as_ref() {
@@ -167,10 +168,10 @@ impl IndexScheduler {
},
);
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(50 * 1024 * 1024); // defaults to 50 MiB
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
let documents_url = format!("{base_url}/indexes/{uid}/documents");
request_threads()
let results = request_threads()
.broadcast(|ctx| {
let index_rtxn = index
.read_txn()
@@ -229,12 +230,21 @@ impl IndexScheduler {
));
};
for (embedder_name, (embeddings, regenerate)) in embeddings {
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
),
regenerate,
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors.insert(
embedder_name,
@@ -265,9 +275,8 @@ impl IndexScheduler {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(api_key) = api_key {
request = request
.set("Authorization", &(format!("Bearer {api_key}")));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
})?;
@@ -276,7 +285,7 @@ impl IndexScheduler {
}
buffer.extend_from_slice(&tmp_buffer);
if i % 100 == 0 {
if i > 0 && i % 100 == 0 {
step.fetch_add(100, atomic::Ordering::Relaxed);
}
}
@@ -284,8 +293,8 @@ impl IndexScheduler {
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
if let Some(api_key) = api_key {
request = request.set("Authorization", &(format!("Bearer {api_key}")));
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&buffer).map_err(into_backoff_error)
})?;
@@ -298,6 +307,9 @@ impl IndexScheduler {
Some(uid.to_string()),
)
})?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
}

View File

@@ -3,6 +3,7 @@ use std::collections::BTreeMap;
use big_s::S;
use insta::assert_json_snapshot;
use meili_snap::{json_string, snapshot};
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
use meilisearch_types::milli::vector::SearchQuery;
@@ -220,8 +221,8 @@ fn import_vectors() {
let embeddings = index.embeddings(&rtxn, 0).unwrap();
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -311,9 +312,9 @@ fn import_vectors() {
let embeddings = index.embeddings(&rtxn, 0).unwrap();
// automatically changed to patou because set to regenerate
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == patou_embed, @"true");
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == patou_embed, @"true");
// remained beagle
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -497,13 +498,13 @@ fn import_vectors_first_and_embedder_later() {
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty(), "{embedding:?}");
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
assert!(!embeddings.is_empty(), "{embeddings:?}");
// the document with the id 3 should keep its original embedding
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embeddings, _) = &embeddings["my_doggo_embedder"];
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
snapshot!(embeddings.len(), @"1");
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
@@ -558,7 +559,7 @@ fn import_vectors_first_and_embedder_later() {
"###);
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["my_doggo_embedder"];
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
@@ -566,7 +567,7 @@ fn import_vectors_first_and_embedder_later() {
// the document with the id 4 should generate an embedding
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["my_doggo_embedder"];
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
}
@@ -696,7 +697,7 @@ fn delete_document_containing_vector() {
"###);
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["manual"];
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["manual"];
assert!(!embedding.is_empty(), "{embedding:?}");
index_scheduler

View File

@@ -158,7 +158,7 @@ impl AuthController {
self.store.delete_all_keys()
}
/// Delete all the keys in the DB.
/// Insert a key directly into the store.
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
self.store.put_api_key(key)?;
Ok(())
@@ -351,6 +351,7 @@ pub struct IndexSearchRules {
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
store.put_api_key(Key::default_chat())?;
store.put_api_key(Key::default_read_only_admin())?;
store.put_api_key(Key::default_admin())?;
store.put_api_key(Key::default_search())?;

View File

@@ -88,7 +88,13 @@ impl HeedAuthStore {
let mut actions = HashSet::new();
for action in &key.actions {
match action {
Action::All => actions.extend(enum_iterator::all::<Action>()),
Action::All => {
actions.extend(enum_iterator::all::<Action>());
actions.remove(&Action::AllGet);
}
Action::AllGet => {
actions.extend(enum_iterator::all::<Action>().filter(|a| a.is_read()))
}
Action::DocumentsAll => {
actions.extend(
[Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd]

View File

@@ -24,7 +24,7 @@ enum-iterator = "2.1.0"
file-store = { path = "../file-store" }
flate2 = "1.1.2"
fst = "0.4.7"
memmap2 = "0.9.5"
memmap2 = "0.9.7"
milli = { path = "../milli" }
roaring = { version = "0.10.12", features = ["serde"] }
rustc-hash = "2.1.1"

View File

@@ -237,6 +237,7 @@ InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQU
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
@@ -309,6 +310,8 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
InvalidSearchPersonalize , InvalidRequest , BAD_REQUEST ;
InvalidSearchPersonalizeUserContext , InvalidRequest , BAD_REQUEST ;
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
@@ -415,6 +418,7 @@ InvalidChatCompletionPrompts , InvalidRequest , BAD_REQU
InvalidChatCompletionSystemPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchDescriptionPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST
}
@@ -476,7 +480,8 @@ impl ErrorCode for milli::Error {
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidSearchSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidDocumentSortableAttribute { .. } => Code::InvalidDocumentSort,
UserError::InvalidSearchableAttribute { .. } => {
Code::InvalidSearchAttributesToSearchOn
}
@@ -492,7 +497,8 @@ impl ErrorCode for milli::Error {
UserError::InvalidVectorsMapType { .. }
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
UserError::SortError(_) => Code::InvalidSearchSort,
UserError::SortError { search: true, .. } => Code::InvalidSearchSort,
UserError::SortError { search: false, .. } => Code::InvalidDocumentSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidSettingsTypoTolerance
}
@@ -646,6 +652,18 @@ impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
}
}
impl fmt::Display for deserr_codes::InvalidSearchPersonalize {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `personalize` is invalid, expected a JSON object with optional `userContext` string.")
}
}
impl fmt::Display for deserr_codes::InvalidSearchPersonalizeUserContext {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `userContext` is invalid, expected a string.")
}
}
#[macro_export]
macro_rules! internal_error {
($target:ty : $($other:path), *) => {

View File

@@ -4,10 +4,11 @@ use serde::{Deserialize, Serialize};
use crate::error::{Code, ResponseError};
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search.";
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. ";
pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str =
"Search the database for relevant JSON documents using an optional query.";
"Query: 'best story about Rust before 2018' with year: 2018, 2020, 2021\nlabel: analysis, golang, javascript\ntype: story, link\nvote: 300, 298, 278\n: {\"q\": \"\", \"filter\": \"category = Rust AND type = story AND year < 2018 AND vote > 100\"}\nQuery: 'A black or green car that can go fast with red brakes' with maxspeed_kmh: 200, 150, 130\ncolor: black, grey, red, green\nbrand: Toyota, Renault, Jeep, Ferrari\n: {\"q\": \"red brakes\", \"filter\": \"maxspeed_kmh > 150 AND color IN ['black', green]\"}\nQuery: 'Superman movie released in 2018 or after' with year: 2018, 2020, 2021\ngenres: Drama, Comedy, Adventure, Fiction\n: {\"q\":\"Superman\",\"filter\":\"genres IN [Adventure, Fiction] AND year >= 2018\"}";
pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results.";
pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: ";
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query.";
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
@@ -24,11 +25,12 @@ pub struct RuntimeTogglableFeatures {
pub multimodal: bool,
}
#[derive(Default, Debug, Clone, Copy)]
#[derive(Default, Debug, Clone)]
pub struct InstanceTogglableFeatures {
pub metrics: bool,
pub logs_route: bool,
pub contains_filter: bool,
pub experimental_personalization_api_key: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
@@ -161,18 +163,31 @@ impl ChatCompletionSource {
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct ChatCompletionPrompts {
#[serde(default)]
pub system: String,
#[serde(default)]
pub search_description: String,
#[serde(default)]
pub search_q_param: String,
#[serde(default = "default_search_filter_param")]
pub search_filter_param: String,
#[serde(default)]
pub search_index_uid_param: String,
}
/// This function is used for when the search_filter_param is
/// not provided and this can happen when the database is in v1.15.
fn default_search_filter_param() -> String {
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string()
}
impl Default for ChatCompletionPrompts {
fn default() -> Self {
Self {
system: DEFAULT_CHAT_SYSTEM_PROMPT.to_string(),
search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(),
search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
search_filter_param: DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
}
}

View File

@@ -144,6 +144,21 @@ impl Key {
}
}
pub fn default_read_only_admin() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
Self {
name: Some("Default Read-Only Admin API Key".to_string()),
description: Some("Use it to read information across the whole database. Caution! Do not expose this key on a public frontend".to_string()),
uid,
actions: vec![Action::AllGet, Action::KeysGet],
indexes: vec![IndexUidPattern::all()],
expires_at: None,
created_at: now,
updated_at: now,
}
}
pub fn default_search() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
@@ -347,6 +362,9 @@ pub enum Action {
#[serde(rename = "chatsSettings.update")]
#[deserr(rename = "chatsSettings.update")]
ChatsSettingsUpdate,
#[serde(rename = "*.get")]
#[deserr(rename = "*.get")]
AllGet,
}
impl Action {
@@ -385,6 +403,7 @@ impl Action {
METRICS_GET => Some(Self::MetricsGet),
DUMPS_ALL => Some(Self::DumpsAll),
DUMPS_CREATE => Some(Self::DumpsCreate),
SNAPSHOTS_ALL => Some(Self::SnapshotsAll),
SNAPSHOTS_CREATE => Some(Self::SnapshotsCreate),
VERSION => Some(Self::Version),
KEYS_CREATE => Some(Self::KeysAdd),
@@ -393,12 +412,60 @@ impl Action {
KEYS_DELETE => Some(Self::KeysDelete),
EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet),
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
EXPORT => Some(Self::Export),
NETWORK_GET => Some(Self::NetworkGet),
NETWORK_UPDATE => Some(Self::NetworkUpdate),
ALL_GET => Some(Self::AllGet),
_otherwise => None,
}
}
/// Whether the action should be included in [Action::AllRead].
pub fn is_read(&self) -> bool {
use Action::*;
// It's using an exhaustive match to force the addition of new actions.
match self {
// Any action that expands to others must return false, as it wouldn't be able to expand recursively.
All | AllGet | DocumentsAll | IndexesAll | ChatsAll | TasksAll | SettingsAll
| StatsAll | MetricsAll | DumpsAll | SnapshotsAll | ChatsSettingsAll => false,
Search => true,
DocumentsAdd => false,
DocumentsGet => true,
DocumentsDelete => false,
Export => true,
IndexesAdd => false,
IndexesGet => true,
IndexesUpdate => false,
IndexesDelete => false,
IndexesSwap => false,
TasksCancel => false,
TasksDelete => false,
TasksGet => true,
SettingsGet => true,
SettingsUpdate => false,
StatsGet => true,
MetricsGet => true,
DumpsCreate => false,
SnapshotsCreate => false,
Version => true,
KeysAdd => false,
KeysGet => false, // Disabled in order to prevent privilege escalation
KeysUpdate => false,
KeysDelete => false,
ExperimentalFeaturesGet => true,
ExperimentalFeaturesUpdate => false,
NetworkGet => true,
NetworkUpdate => false,
ChatCompletions => false, // Disabled because it might trigger generation of new chats
ChatsGet => true,
ChatsDelete => false,
ChatsSettingsGet => true,
ChatsSettingsUpdate => false,
}
}
pub const fn repr(&self) -> u8 {
*self as u8
}
@@ -408,6 +475,7 @@ pub mod actions {
use super::Action::*;
pub(crate) const ALL: u8 = All.repr();
pub const ALL_GET: u8 = AllGet.repr();
pub const SEARCH: u8 = Search.repr();
pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr();
pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr();
@@ -432,6 +500,7 @@ pub mod actions {
pub const METRICS_GET: u8 = MetricsGet.repr();
pub const DUMPS_ALL: u8 = DumpsAll.repr();
pub const DUMPS_CREATE: u8 = DumpsCreate.repr();
pub const SNAPSHOTS_ALL: u8 = SnapshotsAll.repr();
pub const SNAPSHOTS_CREATE: u8 = SnapshotsCreate.repr();
pub const VERSION: u8 = Version.repr();
pub const KEYS_CREATE: u8 = KeysAdd.repr();
@@ -454,3 +523,68 @@ pub mod actions {
pub const CHATS_SETTINGS_GET: u8 = ChatsSettingsGet.repr();
pub const CHATS_SETTINGS_UPDATE: u8 = ChatsSettingsUpdate.repr();
}
#[cfg(test)]
pub(crate) mod test {
use super::actions::*;
use super::Action::*;
use super::*;
#[test]
fn test_action_repr_and_constants() {
assert!(All.repr() == 0 && ALL == 0);
assert!(Search.repr() == 1 && SEARCH == 1);
assert!(DocumentsAll.repr() == 2 && DOCUMENTS_ALL == 2);
assert!(DocumentsAdd.repr() == 3 && DOCUMENTS_ADD == 3);
assert!(DocumentsGet.repr() == 4 && DOCUMENTS_GET == 4);
assert!(DocumentsDelete.repr() == 5 && DOCUMENTS_DELETE == 5);
assert!(IndexesAll.repr() == 6 && INDEXES_ALL == 6);
assert!(IndexesAdd.repr() == 7 && INDEXES_CREATE == 7);
assert!(IndexesGet.repr() == 8 && INDEXES_GET == 8);
assert!(IndexesUpdate.repr() == 9 && INDEXES_UPDATE == 9);
assert!(IndexesDelete.repr() == 10 && INDEXES_DELETE == 10);
assert!(IndexesSwap.repr() == 11 && INDEXES_SWAP == 11);
assert!(TasksAll.repr() == 12 && TASKS_ALL == 12);
assert!(TasksCancel.repr() == 13 && TASKS_CANCEL == 13);
assert!(TasksDelete.repr() == 14 && TASKS_DELETE == 14);
assert!(TasksGet.repr() == 15 && TASKS_GET == 15);
assert!(SettingsAll.repr() == 16 && SETTINGS_ALL == 16);
assert!(SettingsGet.repr() == 17 && SETTINGS_GET == 17);
assert!(SettingsUpdate.repr() == 18 && SETTINGS_UPDATE == 18);
assert!(StatsAll.repr() == 19 && STATS_ALL == 19);
assert!(StatsGet.repr() == 20 && STATS_GET == 20);
assert!(MetricsAll.repr() == 21 && METRICS_ALL == 21);
assert!(MetricsGet.repr() == 22 && METRICS_GET == 22);
assert!(DumpsAll.repr() == 23 && DUMPS_ALL == 23);
assert!(DumpsCreate.repr() == 24 && DUMPS_CREATE == 24);
assert!(SnapshotsAll.repr() == 25 && SNAPSHOTS_ALL == 25);
assert!(SnapshotsCreate.repr() == 26 && SNAPSHOTS_CREATE == 26);
assert!(Version.repr() == 27 && VERSION == 27);
assert!(KeysAdd.repr() == 28 && KEYS_CREATE == 28);
assert!(KeysGet.repr() == 29 && KEYS_GET == 29);
assert!(KeysUpdate.repr() == 30 && KEYS_UPDATE == 30);
assert!(KeysDelete.repr() == 31 && KEYS_DELETE == 31);
assert!(ExperimentalFeaturesGet.repr() == 32 && EXPERIMENTAL_FEATURES_GET == 32);
assert!(ExperimentalFeaturesUpdate.repr() == 33 && EXPERIMENTAL_FEATURES_UPDATE == 33);
assert!(Export.repr() == 34 && EXPORT == 34);
assert!(NetworkGet.repr() == 35 && NETWORK_GET == 35);
assert!(NetworkUpdate.repr() == 36 && NETWORK_UPDATE == 36);
assert!(ChatCompletions.repr() == 37 && CHAT_COMPLETIONS == 37);
assert!(ChatsAll.repr() == 38 && CHATS_ALL == 38);
assert!(ChatsGet.repr() == 39 && CHATS_GET == 39);
assert!(ChatsDelete.repr() == 40 && CHATS_DELETE == 40);
assert!(ChatsSettingsAll.repr() == 41 && CHATS_SETTINGS_ALL == 41);
assert!(ChatsSettingsGet.repr() == 42 && CHATS_SETTINGS_GET == 42);
assert!(ChatsSettingsUpdate.repr() == 43 && CHATS_SETTINGS_UPDATE == 43);
assert!(AllGet.repr() == 44 && ALL_GET == 44);
}
#[test]
fn test_from_repr() {
for action in enum_iterator::all::<Action>() {
let repr = action.repr();
let action_from_repr = Action::from_repr(repr);
assert_eq!(Some(action), action_from_repr, "Failed for action: {:?}", action);
}
}
}

View File

@@ -50,6 +50,7 @@ jsonwebtoken = "9.3.1"
lazy_static = "1.5.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.7"
mimalloc = { version = "0.1.47", default-features = false }
mime = "0.3.17"
num_cpus = "1.17.0"
@@ -94,6 +95,7 @@ uuid = { version = "1.17.0", features = ["serde", "v4"] }
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.4", features = ["serde"] }
cohere-rust = "0.6.0"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }

View File

@@ -104,6 +104,4 @@ impl Analytics for MockAnalytics {
_request: &HttpRequest,
) {
}
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
}

View File

@@ -73,12 +73,6 @@ pub enum DocumentDeletionKind {
PerFilter,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
}
/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
pub trait Aggregate: 'static + mopa::Any + Send {
/// The name of the event that will be sent to segment.

View File

@@ -203,6 +203,7 @@ struct Infos {
experimental_composite_embedders: bool,
experimental_embedding_cache_entries: usize,
experimental_no_snapshot_compaction: bool,
experimental_no_edition_2024_for_dumps: bool,
experimental_no_edition_2024_for_settings: bool,
gpu_enabled: bool,
db_path: bool,
@@ -281,6 +282,7 @@ impl Infos {
indexer_options,
config_file_path,
no_analytics: _,
experimental_personalization_api_key: _,
} = options;
let schedule_snapshot = match schedule_snapshot {
@@ -293,6 +295,7 @@ impl Infos {
max_indexing_threads,
skip_index_budget: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
} = indexer_options;
let RuntimeTogglableFeatures {
@@ -329,6 +332,7 @@ impl Infos {
experimental_composite_embedders: composite_embedders,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
experimental_no_edition_2024_for_dumps,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),

View File

@@ -49,7 +49,7 @@ pub enum MeilisearchHttpError {
TooManySearchRequests(usize),
#[error("Internal error: Search limiter is down.")]
SearchLimiterIsDown,
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(if *.0 % 1024 == 0 { UnitType::Binary } else { UnitType::Decimal }))]
PayloadTooLarge(usize),
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()

View File

@@ -9,6 +9,7 @@ pub mod middleware;
pub mod option;
#[cfg(test)]
mod option_test;
pub mod personalization;
pub mod routes;
pub mod search;
pub mod search_queue;
@@ -30,6 +31,7 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest};
use analytics::Analytics;
use anyhow::bail;
use bumpalo::Bump;
use error::PayloadError;
use extractors::payload::PayloadConfig;
use index_scheduler::versioning::Versioning;
@@ -38,6 +40,7 @@ use meilisearch_auth::{open_auth_store_env, AuthController};
use meilisearch_types::milli::constants::VERSION_MAJOR;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::new::indexer;
use meilisearch_types::milli::update::{
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
};
@@ -533,7 +536,7 @@ fn import_dump(
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
let uid = metadata.uid.clone();
tracing::info!("Importing index `{}`.", metadata.uid);
tracing::info!("Importing index `{uid}`.");
let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
@@ -552,48 +555,100 @@ fn import_dump(
apply_settings_to_builder(&settings, &mut builder);
let embedder_stats: Arc<EmbedderStats> = Default::default();
builder.execute(&|| false, &progress, embedder_stats.clone())?;
wtxn.commit()?;
// 5.3 Import the documents.
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
builder.append_json_object(&document?)?;
let mut wtxn = index.write_txn()?;
let rtxn = index.read_txn()?;
if index_scheduler.no_edition_2024_for_dumps() {
// 5.3 Import the documents.
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
builder.append_json_object(&document?)?;
}
// This flush the content of the batch builder.
let file = builder.into_inner()?.into_inner()?;
// 5.3.2 We feed it to the milli index.
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
&index,
indexer_config,
IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|| false,
&embedder_stats,
)?;
let builder = builder.with_embedders(embedders);
let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?;
} else {
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
let primary_key = index.primary_key(&rtxn)?;
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer = indexer::DocumentOperation::new();
let embedders = index.embedding_configs().embedding_configs(&rtxn)?;
let embedders = index_scheduler.embedders(uid.clone(), embedders)?;
let mmap = unsafe { memmap2::Mmap::map(index_reader.documents_file())? };
indexer.replace_documents(&mmap)?;
let indexer_config = index_scheduler.indexer_config();
let pool = &indexer_config.thread_pool;
let indexer_alloc = Bump::new();
let (document_changes, mut operation_stats, primary_key) = indexer.into_changes(
&indexer_alloc,
&index,
&rtxn,
primary_key,
&mut new_fields_ids_map,
&|| false, // never stop processing a dump
progress.clone(),
)?;
let operation_stats = operation_stats.pop().unwrap();
if let Some(error) = operation_stats.error {
return Err(error.into());
}
let _congestion = indexer::index(
&mut wtxn,
&index,
pool,
indexer_config.grenad_parameters(),
&db_fields_ids_map,
new_fields_ids_map,
primary_key,
&document_changes,
embedders,
&|| false, // never stop processing a dump
&progress,
&embedder_stats,
)?;
}
// This flush the content of the batch builder.
let file = builder.into_inner()?.into_inner()?;
// 5.3.2 We feed it to the milli index.
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
&index,
indexer_config,
IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|| false,
&embedder_stats,
)?;
let builder = builder.with_embedders(embedders);
let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?;
wtxn.commit()?;
tracing::info!("All documents successfully imported.");
index_scheduler.refresh_index_stats(&uid)?;
}
@@ -622,12 +677,19 @@ pub fn configure_data(
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Data<Analytics>,
) {
// Create personalization service with API key from options
let personalization_service = index_scheduler
.experimental_personalization_api_key()
.cloned()
.map(personalization::PersonalizationService::cohere)
.unwrap_or_else(personalization::PersonalizationService::uninitialized);
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
config
.app_data(index_scheduler)
.app_data(auth)
.app_data(search_queue)
.app_data(analytics)
.app_data(web::Data::new(personalization_service))
.app_data(web::Data::new(logs_route))
.app_data(web::Data::new(logs_stderr))
.app_data(web::Data::new(opt.clone()))

View File

@@ -111,4 +111,9 @@ lazy_static! {
"Meilisearch Task Queue Size Until Stop Registering",
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
"meilisearch_personalized_search_requests",
"Meilisearch number of search requests with personalization"
))
.expect("Can't create a metric");
}

View File

@@ -68,6 +68,10 @@ const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
const MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY: &str =
"MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
@@ -467,6 +471,12 @@ pub struct Opt {
#[serde(default)]
pub experimental_no_snapshot_compaction: bool,
/// Experimental personalization API key feature.
///
/// Sets the API key for personalization features.
#[clap(long, env = MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY)]
pub experimental_personalization_api_key: Option<String>,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
@@ -572,6 +582,7 @@ impl Opt {
experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
experimental_personalization_api_key,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -672,6 +683,12 @@ impl Opt {
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
experimental_no_snapshot_compaction.to_string(),
);
if let Some(experimental_personalization_api_key) = experimental_personalization_api_key {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY,
experimental_personalization_api_key,
);
}
indexer_options.export_to_env();
}
@@ -724,6 +741,7 @@ impl Opt {
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
contains_filter: self.experimental_contains_filter,
experimental_personalization_api_key: self.experimental_personalization_api_key.clone(),
}
}
}
@@ -759,6 +777,15 @@ pub struct IndexerOpts {
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)]
#[serde(default)]
pub experimental_no_edition_2024_for_settings: bool,
/// Experimental make dump imports use the old document indexer.
///
/// When enabled, Meilisearch will use the old document indexer when importing dumps.
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/851>.
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS)]
#[serde(default)]
pub experimental_no_edition_2024_for_dumps: bool,
}
impl IndexerOpts {
@@ -769,6 +796,7 @@ impl IndexerOpts {
max_indexing_threads,
skip_index_budget: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
@@ -788,6 +816,12 @@ impl IndexerOpts {
experimental_no_edition_2024_for_settings.to_string(),
);
}
if experimental_no_edition_2024_for_dumps {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS,
experimental_no_edition_2024_for_dumps.to_string(),
);
}
}
}
@@ -808,6 +842,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
skip_index_budget: other.skip_index_budget,
experimental_no_edition_2024_for_settings: other
.experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps: other.experimental_no_edition_2024_for_dumps,
chunk_compression_type: Default::default(),
chunk_compression_level: Default::default(),
documents_chunk_size: Default::default(),

View File

@@ -0,0 +1,244 @@
use crate::search::{Personalize, SearchResult};
use cohere_rust::{
api::rerank::{ReRankModel, ReRankRequest},
Cohere,
};
use meilisearch_types::error::ResponseError;
use tracing::{debug, error, info};
pub struct CohereService {
cohere: Cohere,
}
impl CohereService {
pub fn new(api_key: String) -> Self {
info!("Personalization service initialized with Cohere API");
Self { cohere: Cohere::new("https://api.cohere.ai", api_key) }
}
pub async fn rerank_search_results(
&self,
search_result: SearchResult,
personalize: Option<&Personalize>,
query: Option<&str>,
) -> Result<SearchResult, ResponseError> {
// Extract user context from personalization
let Some(user_context) = personalize.and_then(|p| p.user_context.as_deref()) else {
return Ok(search_result);
};
// Build the prompt by merging query and user context
let prompt = match query {
Some(q) => format!("User Context: {user_context}\nQuery: {q}"),
None => format!("User Context: {user_context}"),
};
// Extract documents for reranking
let documents: Vec<String> = search_result
.hits
.iter()
.map(|hit| {
// Convert the document to a string representation for reranking
serde_json::to_string(&hit.document).unwrap_or_else(|_| "{}".to_string())
})
.collect();
if documents.is_empty() {
return Ok(search_result);
}
// Prepare the rerank request
let rerank_request = ReRankRequest {
query: &prompt,
documents: &documents,
model: ReRankModel::EnglishV3, // Use the default and more recent model
top_n: None,
max_chunks_per_doc: None,
};
// Call Cohere's rerank API
match self.cohere.rerank(&rerank_request).await {
Ok(rerank_response) => {
debug!("Cohere rerank successful, reordering {} results", search_result.hits.len());
// Create a mapping from original index to new rank
let reranked_indices: Vec<usize> =
rerank_response.iter().map(|result| result.index as usize).collect();
// Reorder the hits based on Cohere's reranking
let mut reranked_hits = Vec::new();
for index in reranked_indices.iter() {
reranked_hits.push(search_result.hits[*index].clone());
}
Ok(SearchResult { hits: reranked_hits, ..search_result })
}
Err(e) => {
error!("Cohere rerank failed with model EnglishV3: {}", e);
// Return original results on error
Ok(search_result)
}
}
}
}
pub enum PersonalizationService {
Cohere(CohereService),
Uninitialized,
}
impl PersonalizationService {
pub fn cohere(api_key: String) -> Self {
Self::Cohere(CohereService::new(api_key))
}
pub fn uninitialized() -> Self {
debug!("Personalization service uninitialized");
Self::Uninitialized
}
pub async fn rerank_search_results(
&self,
search_result: SearchResult,
personalize: Option<&Personalize>,
query: Option<&str>,
) -> Result<SearchResult, ResponseError> {
match self {
Self::Cohere(cohere_service) => {
cohere_service.rerank_search_results(search_result, personalize, query).await
}
Self::Uninitialized => Ok(search_result),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::search::{HitsInfo, SearchHit};
#[tokio::test]
async fn test_personalization_service_without_api_key() {
let service = PersonalizationService::uninitialized();
let personalize = Personalize { user_context: Some("test user".to_string()) };
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service
.rerank_search_results(search_result.clone(), Some(&personalize), Some("test"))
.await;
assert!(result.is_ok());
// Should return original results when no API key is provided
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_with_user_context_only() {
let service = PersonalizationService::cohere("fake_key".to_string());
let personalize = Personalize { user_context: Some("test user".to_string()) };
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result =
service.rerank_search_results(search_result.clone(), Some(&personalize), None).await;
assert!(result.is_ok());
// Should attempt reranking with user context only
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_with_query_only() {
let service = PersonalizationService::cohere("fake_key".to_string());
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service.rerank_search_results(search_result.clone(), None, Some("test")).await;
assert!(result.is_ok());
// Should attempt reranking with query only
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_both_none() {
let service = PersonalizationService::cohere("fake_key".to_string());
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service.rerank_search_results(search_result.clone(), None, None).await;
assert!(result.is_ok());
// Should return original results when both query and user_context are None
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
}

View File

@@ -27,9 +27,10 @@ use meilisearch_types::features::{
ChatCompletionPrompts as DbChatCompletionPrompts,
ChatCompletionSource as DbChatCompletionSource, SystemRole,
};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::index::ChatConfig;
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, TimeBudget};
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget};
use meilisearch_types::{Document, Index};
use serde::Deserialize;
use serde_json::json;
@@ -169,6 +170,7 @@ fn setup_search_tool(
let mut index_uids = Vec::new();
let mut function_description = prompts.search_description.clone();
let mut filter_description = prompts.search_filter_param.clone();
index_scheduler.try_for_each_index::<_, ()>(|name, index| {
// Make sure to skip unauthorized indexes
if !filters.is_index_authorized(name) {
@@ -180,16 +182,22 @@ fn setup_search_tool(
let index_description = chat_config.description;
let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n");
index_uids.push(name.to_string());
let facet_distributions = format_facet_distributions(index, &rtxn, 10).unwrap(); // TODO do not unwrap
let _ = writeln!(&mut filter_description, "\n## Facet distributions of the {name} index");
let _ = writeln!(&mut filter_description, "{facet_distributions}");
Ok(())
})?;
tracing::debug!("LLM function description: {function_description}");
tracing::debug!("LLM filter description: {filter_description}");
let tool = ChatCompletionToolArgs::default()
.r#type(ChatCompletionToolType::Function)
.function(
FunctionObjectArgs::default()
.name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME)
.description(&function_description)
.description(function_description)
.parameters(json!({
"type": "object",
"properties": {
@@ -203,9 +211,13 @@ fn setup_search_tool(
// "type": ["string", "null"],
"type": "string",
"description": prompts.search_q_param,
},
"filter": {
"type": "string",
"description": filter_description,
}
},
"required": ["index_uid", "q"],
"required": ["index_uid", "q", "filter"],
"additionalProperties": false,
}))
.strict(true)
@@ -247,11 +259,19 @@ async fn process_search_request(
auth_token: &str,
index_uid: String,
q: Option<String>,
filter: Option<String>,
) -> Result<(Index, Vec<Document>, String), ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.static_read_txn()?;
let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?;
let mut query = SearchQuery { q, ..SearchQuery::from(search_parameters) };
let mut query = SearchQuery {
q,
filter: filter.map(serde_json::Value::from),
..SearchQuery::from(search_parameters)
};
tracing::debug!("LLM query: {:?}", query);
let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate(
auth_ctrl,
auth_token,
@@ -280,14 +300,23 @@ async fn process_search_request(
let (search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
search_from_kind(index_uid, search_kind, search)
.map(|(search_results, _)| (rtxn, search_results))
.map_err(ResponseError::from)
match search_from_kind(index_uid, search_kind, search) {
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),
Err(MeilisearchHttpError::Milli {
error: meilisearch_types::milli::Error::UserError(user_error),
index_name: _,
}) => Ok((rtxn, Err(user_error))),
Err(err) => Err(ResponseError::from(err)),
}
})
.await;
permit.drop().await;
let output = output?;
let output = match output? {
Ok((rtxn, Ok(search_results))) => Ok((rtxn, search_results)),
Ok((_rtxn, Err(error))) => return Ok((index, Vec::new(), error.to_string())),
Err(err) => Err(err),
};
let mut documents = Vec::new();
if let Ok((ref rtxn, ref search_result)) = output {
MEILISEARCH_CHAT_SEARCH_REQUESTS.with_label_values(&["internal"]).inc();
@@ -395,16 +424,19 @@ async fn non_streamed_chat(
for call in meili_calls {
let result = match serde_json::from_str(&call.function.arguments) {
Ok(SearchInIndexParameters { index_uid, q }) => process_search_request(
&index_scheduler,
auth_ctrl.clone(),
&search_queue,
auth_token,
index_uid,
q,
)
.await
.map_err(|e| e.to_string()),
Ok(SearchInIndexParameters { index_uid, q, filter }) => {
process_search_request(
&index_scheduler,
auth_ctrl.clone(),
&search_queue,
auth_token,
index_uid,
q,
filter,
)
.await
.map_err(|e| e.to_string())
}
Err(err) => Err(err.to_string()),
};
@@ -719,13 +751,14 @@ async fn handle_meili_tools(
let mut error = None;
let result = match serde_json::from_str(&call.function.arguments) {
Ok(SearchInIndexParameters { index_uid, q }) => match process_search_request(
Ok(SearchInIndexParameters { index_uid, q, filter }) => match process_search_request(
index_scheduler,
auth_ctrl.clone(),
search_queue,
auth_token,
index_uid,
q,
filter,
)
.await
{
@@ -801,4 +834,42 @@ struct SearchInIndexParameters {
index_uid: String,
/// The query parameter to use.
q: Option<String>,
/// The filter parameter to use.
filter: Option<String>,
}
fn format_facet_distributions(
index: &Index,
rtxn: &RoTxn,
max_values_per_facet: usize,
) -> meilisearch_types::milli::Result<String> {
let universe = index.documents_ids(rtxn)?;
let rules = index.filterable_attributes_rules(rtxn)?;
let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_attributes = fields_ids_map
.names()
.filter(|name| rules.iter().any(|rule| matches!(rule.match_str(name), PatternMatch::Match)))
.map(|name| (name, OrderBy::Count));
let facets_distribution = index
.facets_distribution(rtxn)
.max_values_per_facet(max_values_per_facet)
.candidates(universe)
.facets(filterable_attributes)
.execute()?;
let mut output = String::new();
for (facet_name, entries) in facets_distribution {
let _ = write!(&mut output, "{}: ", facet_name);
let total_entries = entries.len();
for (i, (value, _count)) in entries.into_iter().enumerate() {
let _ = if total_entries.saturating_sub(1) == i {
write!(&mut output, "{value}.")
} else {
write!(&mut output, "{value}, ")
};
}
let _ = writeln!(&mut output);
}
Ok(output)
}

View File

@@ -8,8 +8,8 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::features::{
ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings,
ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT,
DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT,
DEFAULT_CHAT_SYSTEM_PROMPT,
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT,
DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT, DEFAULT_CHAT_SYSTEM_PROMPT,
};
use meilisearch_types::keys::actions;
use meilisearch_types::milli::update::Setting;
@@ -84,6 +84,11 @@ async fn patch_settings(
Setting::Reset => DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
Setting::NotSet => old_settings.prompts.search_q_param,
},
search_filter_param: match new_prompts.search_filter_param {
Setting::Set(new_description) => new_description,
Setting::Reset => DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
Setting::NotSet => old_settings.prompts.search_filter_param,
},
search_index_uid_param: match new_prompts.search_index_uid_param {
Setting::Set(new_description) => new_description,
Setting::Reset => DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
@@ -252,6 +257,10 @@ pub struct ChatPrompts {
#[schema(value_type = Option<String>, example = json!("This is query parameter..."))]
pub search_q_param: Setting<String>,
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchFilterParamPrompt>)]
#[schema(value_type = Option<String>, example = json!("This is filter parameter..."))]
pub search_filter_param: Setting<String>,
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidChatCompletionSearchIndexUidParamPrompt>)]
#[schema(value_type = Option<String>, example = json!("This is index you want to search in..."))]
pub search_index_uid_param: Setting<String>,

View File

@@ -1,3 +1,5 @@
use url::Url;
use crate::analytics::Aggregate;
use crate::routes::export::Export;
@@ -5,6 +7,7 @@ use crate::routes::export::Export;
pub struct ExportAnalytics {
total_received: usize,
has_api_key: bool,
sum_exports_meilisearch_cloud: usize,
sum_index_patterns: usize,
sum_patterns_with_filter: usize,
sum_patterns_with_override_settings: usize,
@@ -13,8 +16,14 @@ pub struct ExportAnalytics {
impl ExportAnalytics {
pub fn from_export(export: &Export) -> Self {
let Export { url: _, api_key, payload_size, indexes } = export;
let Export { url, api_key, payload_size, indexes } = export;
let url = Url::parse(url).ok();
let is_meilisearch_cloud = url.as_ref().and_then(Url::host_str).is_some_and(|host| {
host.ends_with("meilisearch.dev")
|| host.ends_with("meilisearch.com")
|| host.ends_with("meilisearch.io")
});
let has_api_key = api_key.is_some();
let index_patterns_count = indexes.as_ref().map_or(0, |indexes| indexes.len());
let patterns_with_filter_count = indexes.as_ref().map_or(0, |indexes| {
@@ -33,6 +42,7 @@ impl ExportAnalytics {
Self {
total_received: 1,
has_api_key,
sum_exports_meilisearch_cloud: is_meilisearch_cloud as usize,
sum_index_patterns: index_patterns_count,
sum_patterns_with_filter: patterns_with_filter_count,
sum_patterns_with_override_settings: patterns_with_override_settings_count,
@@ -49,6 +59,7 @@ impl Aggregate for ExportAnalytics {
fn aggregate(mut self: Box<Self>, other: Box<Self>) -> Box<Self> {
self.total_received += other.total_received;
self.has_api_key |= other.has_api_key;
self.sum_exports_meilisearch_cloud += other.sum_exports_meilisearch_cloud;
self.sum_index_patterns += other.sum_index_patterns;
self.sum_patterns_with_filter += other.sum_patterns_with_filter;
self.sum_patterns_with_override_settings += other.sum_patterns_with_override_settings;
@@ -63,6 +74,12 @@ impl Aggregate for ExportAnalytics {
Some(self.payload_sizes.iter().sum::<u64>() / self.payload_sizes.len() as u64)
};
let avg_exports_meilisearch_cloud = if self.total_received == 0 {
None
} else {
Some(self.sum_exports_meilisearch_cloud as f64 / self.total_received as f64)
};
let avg_index_patterns = if self.total_received == 0 {
None
} else {
@@ -84,6 +101,7 @@ impl Aggregate for ExportAnalytics {
serde_json::json!({
"total_received": self.total_received,
"has_api_key": self.has_api_key,
"avg_exports_meilisearch_cloud": avg_exports_meilisearch_cloud,
"avg_index_patterns": avg_index_patterns,
"avg_patterns_with_filter": avg_patterns_with_filter,
"avg_patterns_with_override_settings": avg_patterns_with_override_settings,

View File

@@ -1,6 +1,7 @@
use std::collections::HashSet;
use std::io::{ErrorKind, Seek as _};
use std::marker::PhantomData;
use std::str::FromStr;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
@@ -17,9 +18,11 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::documents::sort::recursive_sort;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::milli::{AscDesc, DocumentId};
use meilisearch_types::serde_cs::vec::CS;
use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent;
@@ -42,6 +45,7 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::fix_sort_query_parameters;
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
};
@@ -135,6 +139,8 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
per_document_id: bool,
// if a filter was used
per_filter: bool,
// if documents were sorted
sort: bool,
#[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool,
@@ -151,39 +157,6 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
marker: std::marker::PhantomData<Method>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize },
}
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
pub fn from_query(query: &DocumentFetchKind) -> Self {
let (limit, offset, retrieve_vectors) = match query {
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
(*limit, *offset, *retrieve_vectors)
}
};
let ids = match query {
DocumentFetchKind::Normal { ids, .. } => *ids,
DocumentFetchKind::PerDocumentId { .. } => 0,
};
Self {
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit,
max_offset: offset,
retrieve_vectors,
max_document_ids: ids,
marker: PhantomData,
}
}
}
impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
fn event_name(&self) -> &'static str {
Method::event_name()
@@ -193,6 +166,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
Box::new(Self {
per_document_id: self.per_document_id | new.per_document_id,
per_filter: self.per_filter | new.per_filter,
sort: self.sort | new.sort,
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
max_limit: self.max_limit.max(new.max_limit),
max_offset: self.max_offset.max(new.max_offset),
@@ -276,6 +250,7 @@ pub async fn get_document(
retrieve_vectors: param_retrieve_vectors.0,
per_document_id: true,
per_filter: false,
sort: false,
max_limit: 0,
max_offset: 0,
max_document_ids: 0,
@@ -406,6 +381,8 @@ pub struct BrowseQueryGet {
#[param(default, value_type = Option<String>, example = "popularity > 1000")]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentSort>)]
sort: Option<String>,
}
#[derive(Debug, Deserr, ToSchema)]
@@ -430,6 +407,9 @@ pub struct BrowseQuery {
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>,
#[schema(default, value_type = Option<Vec<String>>, example = json!(["title:asc", "rating:desc"]))]
#[deserr(default, error = DeserrJsonError<InvalidDocumentSort>)]
sort: Option<Vec<String>>,
}
/// Get documents with POST
@@ -495,6 +475,7 @@ pub async fn documents_by_query_post(
analytics.publish(
DocumentsFetchAggregator::<DocumentsPOST> {
per_filter: body.filter.is_some(),
sort: body.sort.is_some(),
retrieve_vectors: body.retrieve_vectors,
max_limit: body.limit,
max_offset: body.offset,
@@ -571,7 +552,7 @@ pub async fn get_documents(
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } =
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids, sort } =
params.into_inner();
let filter = match filter {
@@ -582,20 +563,20 @@ pub async fn get_documents(
None => None,
};
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
let query = BrowseQuery {
offset: offset.0,
limit: limit.0,
fields: fields.merge_star_and_none(),
retrieve_vectors: retrieve_vectors.0,
filter,
ids,
ids: ids.map(|ids| ids.into_iter().map(Into::into).collect()),
sort: sort.map(|attr| fix_sort_query_parameters(&attr)),
};
analytics.publish(
DocumentsFetchAggregator::<DocumentsGET> {
per_filter: query.filter.is_some(),
sort: query.sort.is_some(),
retrieve_vectors: query.retrieve_vectors,
max_limit: query.limit,
max_offset: query.offset,
@@ -615,7 +596,7 @@ fn documents_by_query(
query: BrowseQuery,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids, sort } = query;
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
@@ -633,6 +614,18 @@ fn documents_by_query(
None
};
let sort_criteria = if let Some(sort) = &sort {
let sorts: Vec<_> = match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::SortError::from(asc_desc_error).into_document_error().into())
}
};
Some(sorts)
} else {
None
};
let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents(
&index,
@@ -643,6 +636,7 @@ fn documents_by_query(
fields,
retrieve_vectors,
index_scheduler.features(),
sort_criteria,
)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
@@ -1467,9 +1461,13 @@ fn some_documents<'a, 't: 'a>(
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, (vector, regenerate)) in index.embeddings(rtxn, key)? {
for (
name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ },
) in index.embeddings(rtxn, key)?
{
let embeddings =
ExplicitVectors { embeddings: Some(vector.into()), regenerate };
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
@@ -1494,6 +1492,7 @@ fn retrieve_documents<S: AsRef<str>>(
attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
sort_criteria: Option<Vec<AscDesc>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let filter = &filter;
@@ -1526,15 +1525,32 @@ fn retrieve_documents<S: AsRef<str>>(
})?
}
let (it, number_of_documents) = {
let (it, number_of_documents) = if let Some(sort) = sort_criteria {
let number_of_documents = candidates.len();
let facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?;
let iter = facet_sort.iter()?;
let mut documents = Vec::with_capacity(limit);
for result in iter.skip(offset).take(limit) {
documents.push(result?);
}
(
itertools::Either::Left(some_documents(
index,
&rtxn,
documents.into_iter(),
retrieve_vectors,
)?),
number_of_documents,
)
} else {
let number_of_documents = candidates.len();
(
some_documents(
itertools::Either::Right(some_documents(
index,
&rtxn,
candidates.into_iter().skip(offset).take(limit),
retrieve_vectors,
)?,
)?),
number_of_documents,
)
};

View File

@@ -343,6 +343,7 @@ impl From<FacetSearchQuery> for SearchQuery {
hybrid,
ranking_score_threshold,
locales,
personalize: None,
}
}
}

View File

@@ -22,10 +22,10 @@ use crate::extractors::sequential_extractor::SeqHandler;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
add_search_rules, perform_search, HybridQuery, MatchingStrategy, Personalize,
RankingScoreThreshold, RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
};
use crate::search_queue::SearchQueue;
@@ -132,6 +132,8 @@ pub struct SearchQueryGet {
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
#[param(value_type = Vec<Locale>, explode = false)]
pub locales: Option<CS<Locale>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPersonalizeUserContext>)]
pub personalize_user_context: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
@@ -203,6 +205,10 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
));
}
let personalize = other
.personalize_user_context
.map(|user_context| Personalize { user_context: Some(user_context) });
Ok(Self {
q: other.q,
// `media` not supported for `GET`
@@ -232,6 +238,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
locales: other.locales.map(|o| o.into_iter().collect()),
personalize,
})
}
}
@@ -320,6 +327,7 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
pub async fn search_with_url_query(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
index_uid: web::Path<String>,
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
req: HttpRequest,
@@ -342,6 +350,11 @@ pub async fn search_with_url_query(
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
// Extract personalization and query string before moving query
let personalize = query.personalize.clone();
let query_str = query.q.clone();
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
@@ -361,7 +374,12 @@ pub async fn search_with_url_query(
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
let mut search_result = search_result?;
// Apply personalization if requested
search_result = personalization_service
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
.await?;
debug!(returns = ?search_result, "Search get");
Ok(HttpResponse::Ok().json(search_result))
@@ -426,6 +444,7 @@ pub async fn search_with_url_query(
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
index_uid: web::Path<String>,
params: AwebJson<SearchQuery, DeserrJsonError>,
req: HttpRequest,
@@ -449,6 +468,10 @@ pub async fn search_with_post(
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
// Extract personalization and query string before moving query
let personalize = query.personalize.clone();
let query_str = query.q.clone();
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
@@ -471,7 +494,12 @@ pub async fn search_with_post(
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
let mut search_result = search_result?;
// Apply personalization if requested
search_result = personalization_service
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
.await?;
debug!(returns = ?search_result, "Search post");
Ok(HttpResponse::Ok().json(search_result))

View File

@@ -7,6 +7,7 @@ use serde_json::{json, Value};
use crate::aggregate_methods;
use crate::analytics::{Aggregate, AggregateMethod};
use crate::metrics::MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS;
use crate::search::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
@@ -94,6 +95,9 @@ pub struct SearchAggregator<Method: AggregateMethod> {
show_ranking_score_details: bool,
ranking_score_threshold: bool,
// personalization
total_personalized: usize,
marker: std::marker::PhantomData<Method>,
}
@@ -128,6 +132,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
hybrid,
ranking_score_threshold,
locales,
personalize,
} = query;
let mut ret = Self::default();
@@ -202,6 +207,12 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
ret.locales = locales.iter().copied().collect();
}
// personalization
if personalize.is_some() {
ret.total_personalized = 1;
MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS.inc();
}
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
@@ -290,6 +301,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
total_used_negative_operator,
ranking_score_threshold,
mut locales,
total_personalized,
marker: _,
} = *new;
@@ -374,6 +386,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
// locales
self.locales.append(&mut locales);
// personalization
self.total_personalized = self.total_personalized.saturating_add(total_personalized);
self
}
@@ -418,6 +433,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
total_used_negative_operator,
ranking_score_threshold,
locales,
total_personalized,
marker: _,
} = *self;
@@ -490,6 +506,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
},
"personalization": {
"total_personalized": total_personalized,
},
})
}
}

View File

@@ -67,6 +67,7 @@ impl MultiSearchAggregator {
hybrid: _,
ranking_score_threshold: _,
locales: _,
personalize: _,
} in &federated_search.queries
{
if let Some(federation_options) = federation_options {

View File

@@ -745,10 +745,9 @@ impl SearchByIndex {
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(milli::SortError::from(
asc_desc_error,
))
.into())
return Err(milli::SortError::from(asc_desc_error)
.into_search_error()
.into())
}
};
Some(sorts)

View File

@@ -16,7 +16,7 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use meilisearch_types::milli::index::{self, SearchParameters};
use meilisearch_types::milli::index::{self, EmbeddingsWithMetadata, SearchParameters};
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder;
@@ -57,6 +57,13 @@ pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
#[derive(Clone, Default, PartialEq, Deserr, ToSchema, Debug)]
#[deserr(error = DeserrJsonError<InvalidSearchPersonalize>, rename_all = camelCase, deny_unknown_fields)]
pub struct Personalize {
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalizeUserContext>)]
pub user_context: Option<String>,
}
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQuery {
@@ -120,6 +127,8 @@ pub struct SearchQuery {
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
pub personalize: Option<Personalize>,
}
impl From<SearchParameters> for SearchQuery {
@@ -167,6 +176,7 @@ impl From<SearchParameters> for SearchQuery {
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
crop_marker: DEFAULT_CROP_MARKER(),
locales: None,
personalize: None,
}
}
}
@@ -248,6 +258,7 @@ impl fmt::Debug for SearchQuery {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
} = self;
let mut debug = f.debug_struct("SearchQuery");
@@ -336,6 +347,10 @@ impl fmt::Debug for SearchQuery {
debug.field("locales", &locales);
}
if let Some(personalize) = personalize {
debug.field("personalize", &personalize);
}
debug.finish()
}
}
@@ -541,6 +556,9 @@ pub struct SearchQueryWithIndex {
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
#[serde(skip)]
pub personalize: Option<Personalize>,
#[deserr(default)]
pub federation_options: Option<FederationOptions>,
@@ -598,6 +616,7 @@ impl SearchQueryWithIndex {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
} = query;
SearchQueryWithIndex {
@@ -629,6 +648,7 @@ impl SearchQueryWithIndex {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
federation_options,
}
}
@@ -664,6 +684,7 @@ impl SearchQueryWithIndex {
hybrid,
ranking_score_threshold,
locales,
personalize,
} = self;
(
index_uid,
@@ -695,6 +716,7 @@ impl SearchQueryWithIndex {
hybrid,
ranking_score_threshold,
locales,
personalize,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
@@ -919,7 +941,7 @@ pub struct SearchResultWithIndex {
pub result: SearchResult,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
#[serde(untagged)]
pub enum HitsInfo {
#[serde(rename_all = "camelCase")]
@@ -1051,6 +1073,7 @@ pub fn prepare_search<'t>(
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination);
search.max_total_hits(Some(max_total_hits));
search.scoring_strategy(
if query.show_ranking_score
|| query.show_ranking_score_details
@@ -1091,7 +1114,7 @@ pub fn prepare_search<'t>(
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(SortError::from(asc_desc_error)).into())
return Err(SortError::from(asc_desc_error).into_search_error().into())
}
};
@@ -1165,6 +1188,7 @@ pub fn perform_search(
attributes_to_search_on: _,
filter: _,
distinct: _,
personalize: _,
} = query;
let format = AttributesFormat {
@@ -1527,8 +1551,11 @@ impl<'a> HitMaker<'a> {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, (vector, regenerate)) in self.index.embeddings(self.rtxn, id)? {
let embeddings = ExplicitVectors { embeddings: Some(vector.into()), regenerate };
for (name, EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ }) in
self.index.embeddings(self.rtxn, id)?
{
let embeddings =
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,

View File

@@ -419,14 +419,14 @@ async fn error_add_api_key_invalid_parameters_actions() {
let (response, code) = server.add_api_key(content).await;
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r#"
{
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
}
"###);
"#);
}
#[actix_rt::test]
@@ -790,7 +790,7 @@ async fn list_api_keys() {
meili_snap::snapshot!(code, @"201 Created");
let (response, code) = server.list_api_keys("").await;
meili_snap::snapshot!(meili_snap::json_string!(response, { ".results[].createdAt" => "[ignored]", ".results[].updatedAt" => "[ignored]", ".results[].uid" => "[ignored]", ".results[].key" => "[ignored]" }), @r###"
meili_snap::snapshot!(meili_snap::json_string!(response, { ".results[].createdAt" => "[ignored]", ".results[].updatedAt" => "[ignored]", ".results[].uid" => "[ignored]", ".results[].key" => "[ignored]" }), @r#"
{
"results": [
{
@@ -850,6 +850,22 @@ async fn list_api_keys() {
"createdAt": "[ignored]",
"updatedAt": "[ignored]"
},
{
"name": "Default Read-Only Admin API Key",
"description": "Use it to read information across the whole database. Caution! Do not expose this key on a public frontend",
"key": "[ignored]",
"uid": "[ignored]",
"actions": [
"*.get",
"keys.get"
],
"indexes": [
"*"
],
"expiresAt": null,
"createdAt": "[ignored]",
"updatedAt": "[ignored]"
},
{
"name": "Default Chat API Key",
"description": "Use it to chat and search from the frontend",
@@ -869,9 +885,9 @@ async fn list_api_keys() {
],
"offset": 0,
"limit": 20,
"total": 4
"total": 5
}
"###);
"#);
meili_snap::snapshot!(code, @"200 OK");
}

View File

@@ -91,14 +91,14 @@ async fn create_api_key_bad_actions() {
// can't parse
let (response, code) = server.add_api_key(json!({ "actions": ["doggo"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
snapshot!(json_string!(response), @r#"
{
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
}
"###);
"#);
}
#[actix_rt::test]

View File

@@ -562,5 +562,7 @@ pub struct GetAllDocumentsOptions {
pub offset: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub fields: Option<Vec<&'static str>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub sort: Option<Vec<&'static str>>,
pub retrieve_vectors: bool,
}

View File

@@ -3,8 +3,12 @@ pub mod index;
pub mod server;
pub mod service;
use std::fmt::{self, Display};
use std::{
collections::BTreeMap,
fmt::{self, Display},
};
use actix_http::StatusCode;
#[allow(unused)]
pub use index::GetAllDocumentsOptions;
use meili_snap::json_string;
@@ -13,6 +17,10 @@ use serde::{Deserialize, Serialize};
#[allow(unused)]
pub use server::{default_settings, Server};
use tokio::sync::OnceCell;
use wiremock::{
matchers::{method, path},
Mock, MockServer, Request, ResponseTemplate,
};
use crate::common::index::Index;
@@ -508,3 +516,166 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
})
.await
}
pub async fn shared_index_for_fragments() -> Index<'static, Shared> {
static INDEX: OnceCell<(Server<Shared>, String)> = OnceCell::const_new();
let (server, uid) = INDEX
.get_or_init(|| async {
let (server, uid, _) = init_fragments_index().await;
(server.into_shared(), uid)
})
.await;
server._index(uid).to_shared()
}
async fn fragment_mock_server() -> String {
let text_to_embedding: BTreeMap<_, _> = vec![
("kefir", [0.5, -0.5, 0.0]),
("intel", [1.0, 1.0, 0.0]),
("dustin", [-0.5, 0.5, 0.0]),
("bulldog", [0.0, 0.0, 1.0]),
("labrador", [0.0, 0.0, -1.0]),
("{{ doc.", [-9999.0, -9999.0, -9999.0]), // If a template didn't render
]
.into_iter()
.collect();
let mock_server = Box::leak(Box::new(MockServer::start().await));
Mock::given(method("POST"))
.and(path("/"))
.respond_with(move |req: &Request| {
let text = String::from_utf8_lossy(&req.body).to_string();
let mut data = [0.0, 0.0, 0.0];
for (inner_text, inner_data) in &text_to_embedding {
if text.contains(inner_text) {
for (i, &value) in inner_data.iter().enumerate() {
data[i] += value;
}
}
}
ResponseTemplate::new(200).set_body_json(json!({ "data": data }))
})
.mount(mock_server)
.await;
mock_server.uri()
}
pub async fn init_fragments_index() -> (Server<Owned>, String, crate::common::Value) {
let url = fragment_mock_server().await;
let server = Server::new().await;
let index = server.unique_index();
let (_response, code) = server.set_features(json!({"multimodal": true})).await;
assert_eq!(code, StatusCode::OK);
// Configure the index to use our mock embedder
let settings = json!({
"embedders": {
"rest": {
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{fragment}}",
"response": {
"data": "{{embedding}}"
},
"indexingFragments": {
"withBreed": {"value": "{{ doc.name }} is a {{ doc.breed }}"},
"basic": {"value": "{{ doc.name }} is a dog"},
},
"searchFragments": {
"justBreed": {"value": "It's a {{ media.breed }}"},
"justName": {"value": "{{ media.name }} is a dog"},
"query": {"value": "Some pre-prompt for query {{ q }}"},
}
},
},
});
let (response, code) = index.update_settings(settings.clone()).await;
assert_eq!(code, StatusCode::ACCEPTED);
server.wait_task(response.uid()).await.succeeded();
// Send documents
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel", "breed": "labrador"},
{"id": 3, "name": "dustin", "breed": "bulldog"},
]);
let (value, code) = index.add_documents(documents, None).await;
assert_eq!(code, StatusCode::ACCEPTED);
let _task = index.wait_task(value.uid()).await.succeeded();
let uid = index.uid.clone();
(server, uid, settings)
}
pub async fn init_fragments_index_composite() -> (Server<Owned>, String, crate::common::Value) {
let url = fragment_mock_server().await;
let server = Server::new().await;
let index = server.unique_index();
let (_response, code) = server.set_features(json!({"multimodal": true})).await;
assert_eq!(code, StatusCode::OK);
let (_response, code) = server.set_features(json!({"compositeEmbedders": true})).await;
assert_eq!(code, StatusCode::OK);
// Configure the index to use our mock embedder
let settings = json!({
"embedders": {
"rest": {
"source": "composite",
"searchEmbedder": {
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{fragment}}",
"response": {
"data": "{{embedding}}"
},
"searchFragments": {
"query": {"value": "Some pre-prompt for query {{ q }}"},
}
},
"indexingEmbedder": {
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{fragment}}",
"response": {
"data": "{{embedding}}"
},
"indexingFragments": {
"withBreed": {"value": "{{ doc.name }} is a {{ doc.breed }}"},
"basic": {"value": "{{ doc.name }} is a dog"},
}
},
},
},
});
let (response, code) = index.update_settings(settings.clone()).await;
assert_eq!(code, StatusCode::ACCEPTED);
server.wait_task(response.uid()).await.succeeded();
// Send documents
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel", "breed": "labrador"},
{"id": 3, "name": "dustin", "breed": "bulldog"},
]);
let (value, code) = index.add_documents(documents, None).await;
assert_eq!(code, StatusCode::ACCEPTED);
index.wait_task(value.uid()).await.succeeded();
let uid = index.uid.clone();
(server, uid, settings)
}

View File

@@ -35,7 +35,7 @@ pub struct Server<State = Owned> {
pub static TEST_TEMP_DIR: Lazy<TempDir> = Lazy::new(|| TempDir::new().unwrap());
impl Server<Owned> {
fn into_shared(self) -> Server<Shared> {
pub(super) fn into_shared(self) -> Server<Shared> {
Server { service: self.service, _dir: self._dir, _marker: PhantomData }
}
@@ -97,6 +97,7 @@ impl Server<Owned> {
self.use_api_key(master_key);
let (response, code) = self.list_api_keys("").await;
assert_eq!(200, code, "{:?}", response);
// TODO: relying on the order of keys is not ideal, we should use the name instead
let admin_key = &response["results"][1]["key"];
self.use_api_key(admin_key.as_str().unwrap());
}
@@ -465,6 +466,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
// Having 2 threads makes the tests way faster
max_indexing_threads: MaxThreads::from_str("2").unwrap(),
experimental_no_edition_2024_for_settings: false,
experimental_no_edition_2024_for_dumps: false,
},
experimental_enable_metrics: false,
..Parser::parse_from(None as Option<&str>)

View File

@@ -5,8 +5,8 @@ use urlencoding::encode as urlencode;
use crate::common::encoder::Encoder;
use crate::common::{
shared_does_not_exists_index, shared_empty_index, shared_index_with_test_set,
GetAllDocumentsOptions, Server, Value,
shared_does_not_exists_index, shared_empty_index, shared_index_with_geo_documents,
shared_index_with_test_set, GetAllDocumentsOptions, Server, Value,
};
use crate::json;
@@ -83,6 +83,311 @@ async fn get_document() {
);
}
#[actix_rt::test]
async fn get_document_sorted() {
let server = Server::new_shared();
let index = server.unique_index();
index.load_test_set().await;
let (task, _status_code) =
index.update_settings_sortable_attributes(json!(["age", "email", "gender", "name"])).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "age", "email"]),
sort: Some(vec!["age:asc", "email:desc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 5,
"age": 20,
"email": "warrenwatson@chorizon.com"
},
{
"id": 6,
"age": 20,
"email": "sheliaberry@chorizon.com"
},
{
"id": 57,
"age": 20,
"email": "kaitlinconner@chorizon.com"
},
{
"id": 45,
"age": 20,
"email": "irenebennett@chorizon.com"
},
{
"id": 40,
"age": 21,
"email": "staffordemerson@chorizon.com"
},
{
"id": 41,
"age": 21,
"email": "salinasgamble@chorizon.com"
},
{
"id": 63,
"age": 21,
"email": "knowleshebert@chorizon.com"
},
{
"id": 50,
"age": 21,
"email": "guerramcintyre@chorizon.com"
},
{
"id": 44,
"age": 22,
"email": "jonispears@chorizon.com"
},
{
"id": 56,
"age": 23,
"email": "tuckerbarry@chorizon.com"
},
{
"id": 51,
"age": 23,
"email": "keycervantes@chorizon.com"
},
{
"id": 60,
"age": 23,
"email": "jodyherrera@chorizon.com"
},
{
"id": 70,
"age": 23,
"email": "glassperkins@chorizon.com"
},
{
"id": 75,
"age": 24,
"email": "emmajacobs@chorizon.com"
},
{
"id": 68,
"age": 24,
"email": "angelinadyer@chorizon.com"
},
{
"id": 17,
"age": 25,
"email": "ortegabrennan@chorizon.com"
},
{
"id": 76,
"age": 25,
"email": "claricegardner@chorizon.com"
},
{
"id": 43,
"age": 25,
"email": "arnoldbender@chorizon.com"
},
{
"id": 12,
"age": 25,
"email": "aidakirby@chorizon.com"
},
{
"id": 9,
"age": 26,
"email": "kellimendez@chorizon.com"
}
]
"#);
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "gender", "name"]),
sort: Some(vec!["gender:asc", "name:asc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 3,
"name": "Adeline Flynn",
"gender": "female"
},
{
"id": 12,
"name": "Aida Kirby",
"gender": "female"
},
{
"id": 68,
"name": "Angelina Dyer",
"gender": "female"
},
{
"id": 15,
"name": "Aurelia Contreras",
"gender": "female"
},
{
"id": 36,
"name": "Barbra Valenzuela",
"gender": "female"
},
{
"id": 23,
"name": "Blanca Mcclain",
"gender": "female"
},
{
"id": 53,
"name": "Caitlin Burnett",
"gender": "female"
},
{
"id": 71,
"name": "Candace Sawyer",
"gender": "female"
},
{
"id": 65,
"name": "Carole Rowland",
"gender": "female"
},
{
"id": 33,
"name": "Cecilia Greer",
"gender": "female"
},
{
"id": 1,
"name": "Cherry Orr",
"gender": "female"
},
{
"id": 38,
"name": "Christina Short",
"gender": "female"
},
{
"id": 7,
"name": "Chrystal Boyd",
"gender": "female"
},
{
"id": 76,
"name": "Clarice Gardner",
"gender": "female"
},
{
"id": 73,
"name": "Eleanor Shepherd",
"gender": "female"
},
{
"id": 75,
"name": "Emma Jacobs",
"gender": "female"
},
{
"id": 16,
"name": "Estella Bass",
"gender": "female"
},
{
"id": 62,
"name": "Estelle Ramirez",
"gender": "female"
},
{
"id": 20,
"name": "Florence Long",
"gender": "female"
},
{
"id": 42,
"name": "Graciela Russell",
"gender": "female"
}
]
"#);
}
#[actix_rt::test]
async fn get_document_geosorted() {
let index = shared_index_with_geo_documents().await;
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 2,
"name": "La Bella Italia",
"address": "456 Elm Street, Townsville",
"type": "Italian",
"rating": 9,
"_geo": {
"lat": "45.4777599",
"lng": "9.1967508"
}
},
{
"id": 1,
"name": "Taco Truck",
"address": "444 Salsa Street, Burritoville",
"type": "Mexican",
"rating": 9,
"_geo": {
"lat": 34.0522,
"lng": -118.2437
}
},
{
"id": 3,
"name": "CrĂŞpe Truck",
"address": "2 Billig Avenue, Rouenville",
"type": "French",
"rating": 10
}
]
"#);
}
#[actix_rt::test]
async fn get_document_sort_the_unsortable() {
let index = shared_index_with_test_set().await;
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "name"]),
sort: Some(vec!["name:asc"]),
..Default::default()
})
.await;
snapshot!(json_string!(response), @r#"
{
"message": "Attribute `name` is not sortable. This index does not have configured sortable attributes.",
"code": "invalid_document_sort",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_sort"
}
"#);
}
#[actix_rt::test]
async fn error_get_unexisting_index_all_documents() {
let index = shared_does_not_exists_index().await;

View File

@@ -1,5 +1,4 @@
use crate::common::{shared_does_not_exists_index, Server};
use crate::json;
#[actix_rt::test]

View File

@@ -2499,7 +2499,7 @@ pub struct LocalMeiliParams {
/// A server that exploits [`MockServer`] to provide an URL for testing network and the network.
pub struct LocalMeili {
mock_server: MockServer,
mock_server: &'static MockServer,
}
impl LocalMeili {
@@ -2508,7 +2508,7 @@ impl LocalMeili {
}
pub async fn with_params(server: Arc<Server>, params: LocalMeiliParams) -> Self {
let mock_server = MockServer::start().await;
let mock_server = Box::leak(Box::new(MockServer::start().await));
// tokio won't let us execute asynchronous code from a sync function inside of an async test,
// so instead we spawn another thread that will call the service on a brand new tokio runtime
@@ -2572,7 +2572,7 @@ impl LocalMeili {
response.set_body_json(value)
}
})
.mount(&mock_server)
.mount(mock_server)
.await;
Self { mock_server }
}

View File

@@ -1,6 +1,7 @@
use super::shared_index_with_documents;
use crate::common::Server;
use crate::json;
use meili_snap::{json_string, snapshot};
#[actix_rt::test]
async fn default_search_should_return_estimated_total_hit() {
@@ -133,3 +134,61 @@ async fn ensure_placeholder_search_hit_count_valid() {
.await;
}
}
#[actix_rt::test]
async fn test_issue_5274() {
let server = Server::new_shared();
let index = server.unique_index();
let documents = json!([
{
"id": 1,
"title": "Document 1",
"content": "This is the first."
},
{
"id": 2,
"title": "Document 2",
"content": "This is the second doc."
}
]);
let (task, _code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
// Find out the lowest ranking score among the documents
let (rep, _status) = index
.search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 2, "showRankingScore": true}))
.await;
let hits = rep["hits"].as_array().expect("Missing hits array");
let second_hit = hits.get(1).expect("Missing second hit");
let ranking_score = second_hit
.get("_rankingScore")
.expect("Missing _rankingScore field")
.as_f64()
.expect("Expected _rankingScore to be a f64");
// Search with a ranking score threshold just above and expect to be a single hit
let (rep, _status) = index
.search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 1, "rankingScoreThreshold": ranking_score + 0.0001}))
.await;
snapshot!(json_string!(rep, {
".processingTimeMs" => "[ignored]",
}), @r#"
{
"hits": [
{
"id": 2,
"title": "Document 2",
"content": "This is the second doc."
}
],
"query": "doc",
"processingTimeMs": "[ignored]",
"hitsPerPage": 1,
"page": 1,
"totalPages": 1,
"totalHits": 1
}
"#);
}

View File

@@ -692,3 +692,68 @@ async fn granular_filterable_attributes() {
]
"###);
}
#[actix_rt::test]
async fn test_searchable_attributes_order() {
let server = Server::new_shared();
let index = server.unique_index();
// 1) Create an index with settings "searchableAttributes": ["title", "overview"]
let (response, code) = index.create(None).await;
assert_eq!(code, 202, "{response}");
server.wait_task(response.uid()).await.succeeded();
let (task, code) = index
.update_settings(json!({
"searchableAttributes": ["title", "overview"]
}))
.await;
assert_eq!(code, 202, "{task}");
server.wait_task(task.uid()).await.succeeded();
// 2) Add documents in the index
let documents = json!([
{
"id": 1,
"title": "The Matrix",
"overview": "A computer hacker learns from mysterious rebels about the true nature of his reality."
},
{
"id": 2,
"title": "Inception",
"overview": "A thief who steals corporate secrets through dream-sharing technology."
}
]);
let (response, code) = index.add_documents(documents, None).await;
assert_eq!(code, 202, "{response}");
server.wait_task(response.uid()).await.succeeded();
// 3) Modify the settings "searchableAttributes": ["overview", "title"] (overview is put first)
let (task, code) = index
.update_settings(json!({
"searchableAttributes": ["overview", "title"]
}))
.await;
assert_eq!(code, 202, "{task}");
server.wait_task(task.uid()).await.succeeded();
// 4) Check if it has been applied
let (response, code) = index.settings().await;
assert_eq!(code, 200, "{response}");
assert_eq!(response["searchableAttributes"], json!(["overview", "title"]));
// 5) Re-modify the settings "searchableAttributes": ["title", "overview"] (title is put first)
let (task, code) = index
.update_settings(json!({
"searchableAttributes": ["title", "overview"]
}))
.await;
assert_eq!(code, 202, "{task}");
server.wait_task(task.uid()).await.succeeded();
// 6) Check if it has been applied
let (response, code) = index.settings().await;
assert_eq!(code, 200, "{response}");
assert_eq!(response["searchableAttributes"], json!(["title", "overview"]));
}

View File

@@ -61,7 +61,16 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"pagination": {
"maxTotalHits": 15
},
"embedders": {},
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"pooling": "forceMean",
"documentTemplate": "{{doc.description}}",
"documentTemplateMaxBytes": 400
}
},
"searchCutoffMs": 8000,
"localizedAttributes": [
{

View File

@@ -0,0 +1,40 @@
---
source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
---
[
{
"id": 1,
"name": "kefir",
"surname": [
"kef",
"kefkef",
"kefirounet",
"boubou"
],
"age": 1.4,
"description": "kefir est un petit chien blanc très mignon",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": true
}
}
},
{
"id": 2,
"name": "intel",
"surname": [
"untel",
"tétel",
"iouiou"
],
"age": 11.5,
"description": "intel est un grand beagle très mignon",
"_vectors": {
"doggo_embedder": {
"embeddings": "[vector]",
"regenerate": false
}
}
}
]

View File

@@ -4,7 +4,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 24,
"uid": 30,
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
@@ -26,6 +26,155 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"finishedAt": "[date]",
"batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type."
},
{
"uid": 29,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.067201S",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z",
"batchStrategy": "unspecified"
},
{
"uid": 28,
"progress": null,
"details": {
"deletedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"indexDeletion": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.012727S",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z",
"batchStrategy": "unspecified"
},
{
"uid": 27,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"stats": {
"totalNbTasks": 1,
"status": {
"failed": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.059920S",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z",
"batchStrategy": "unspecified"
},
{
"uid": 26,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.088879S",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z",
"batchStrategy": "unspecified"
},
{
"uid": 25,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.312911S",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z",
"batchStrategy": "unspecified"
},
{
"uid": 24,
"progress": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.247378S",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z",
"batchStrategy": "unspecified"
},
{
"uid": 23,
"progress": null,
@@ -348,179 +497,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z",
"batchStrategy": "unspecified"
},
{
"uid": 10,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007391353S",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z",
"batchStrategy": "unspecified"
},
{
"uid": 9,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007445825S",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z",
"batchStrategy": "unspecified"
},
{
"uid": 8,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.012020083S",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z",
"batchStrategy": "unspecified"
},
{
"uid": 7,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007440092S",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z",
"batchStrategy": "unspecified"
},
{
"uid": 6,
"progress": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007565161S",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z",
"batchStrategy": "unspecified"
},
{
"uid": 5,
"progress": null,
"details": {
"stopWords": [
"le",
"un"
]
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.016307263S",
"startedAt": "2025-01-16T16:53:19.913351957Z",
"finishedAt": "2025-01-16T16:53:19.92965922Z",
"batchStrategy": "unspecified"
}
],
"total": 23,
"total": 29,
"limit": 20,
"from": 24,
"next": 4
"from": 30,
"next": 10
}

View File

@@ -4,7 +4,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 24,
"uid": 30,
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
@@ -26,6 +26,155 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"finishedAt": "[date]",
"batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type."
},
{
"uid": 29,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.067201S",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z",
"batchStrategy": "unspecified"
},
{
"uid": 28,
"progress": null,
"details": {
"deletedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"indexDeletion": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.012727S",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z",
"batchStrategy": "unspecified"
},
{
"uid": 27,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"stats": {
"totalNbTasks": 1,
"status": {
"failed": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.059920S",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z",
"batchStrategy": "unspecified"
},
{
"uid": 26,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.088879S",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z",
"batchStrategy": "unspecified"
},
{
"uid": 25,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.312911S",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z",
"batchStrategy": "unspecified"
},
{
"uid": 24,
"progress": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.247378S",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z",
"batchStrategy": "unspecified"
},
{
"uid": 23,
"progress": null,
@@ -348,179 +497,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z",
"batchStrategy": "unspecified"
},
{
"uid": 10,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007391353S",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z",
"batchStrategy": "unspecified"
},
{
"uid": 9,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007445825S",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z",
"batchStrategy": "unspecified"
},
{
"uid": 8,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.012020083S",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z",
"batchStrategy": "unspecified"
},
{
"uid": 7,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007440092S",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z",
"batchStrategy": "unspecified"
},
{
"uid": 6,
"progress": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007565161S",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z",
"batchStrategy": "unspecified"
},
{
"uid": 5,
"progress": null,
"details": {
"stopWords": [
"le",
"un"
]
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.016307263S",
"startedAt": "2025-01-16T16:53:19.913351957Z",
"finishedAt": "2025-01-16T16:53:19.92965922Z",
"batchStrategy": "unspecified"
}
],
"total": 23,
"total": 29,
"limit": 20,
"from": 24,
"next": 4
"from": 30,
"next": 10
}

View File

@@ -4,7 +4,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 24,
"uid": 30,
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
@@ -26,6 +26,155 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"finishedAt": "[date]",
"batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type."
},
{
"uid": 29,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.067201S",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z",
"batchStrategy": "unspecified"
},
{
"uid": 28,
"progress": null,
"details": {
"deletedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"indexDeletion": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.012727S",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z",
"batchStrategy": "unspecified"
},
{
"uid": 27,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"stats": {
"totalNbTasks": 1,
"status": {
"failed": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.059920S",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z",
"batchStrategy": "unspecified"
},
{
"uid": 26,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.088879S",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z",
"batchStrategy": "unspecified"
},
{
"uid": 25,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.312911S",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z",
"batchStrategy": "unspecified"
},
{
"uid": 24,
"progress": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.247378S",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z",
"batchStrategy": "unspecified"
},
{
"uid": 23,
"progress": null,
@@ -348,179 +497,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z",
"batchStrategy": "unspecified"
},
{
"uid": 10,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007391353S",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z",
"batchStrategy": "unspecified"
},
{
"uid": 9,
"progress": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007445825S",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z",
"batchStrategy": "unspecified"
},
{
"uid": 8,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.012020083S",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z",
"batchStrategy": "unspecified"
},
{
"uid": 7,
"progress": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007440092S",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z",
"batchStrategy": "unspecified"
},
{
"uid": 6,
"progress": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.007565161S",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z",
"batchStrategy": "unspecified"
},
{
"uid": 5,
"progress": null,
"details": {
"stopWords": [
"le",
"un"
]
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.016307263S",
"startedAt": "2025-01-16T16:53:19.913351957Z",
"finishedAt": "2025-01-16T16:53:19.92965922Z",
"batchStrategy": "unspecified"
}
],
"total": 23,
"total": 29,
"limit": 20,
"from": 24,
"next": 4
"from": 30,
"next": 10
}

View File

@@ -4,8 +4,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 25,
"batchUid": 24,
"uid": 31,
"batchUid": 30,
"indexUid": null,
"status": "succeeded",
"type": "upgradeDatabase",
@@ -20,6 +20,118 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "[date]",
"finishedAt": "[date]"
},
{
"uid": 30,
"batchUid": 29,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.067201S",
"enqueuedAt": "2025-07-07T13:43:08.772432Z",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z"
},
{
"uid": 29,
"batchUid": 28,
"indexUid": "mieli",
"status": "succeeded",
"type": "indexDeletion",
"canceledBy": null,
"details": {
"deletedDocuments": 1
},
"error": null,
"duration": "PT0.012727S",
"enqueuedAt": "2025-07-07T13:42:50.744793Z",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z"
},
{
"uid": 28,
"batchUid": 27,
"indexUid": "kefir",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `kefir`: Bad embedder configuration in the document with id: `2`. Could not parse `._vectors.doggo_embedder`: trailing characters at line 1 column 13",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "PT0.059920S",
"enqueuedAt": "2025-07-07T13:42:15.624598Z",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z"
},
{
"uid": 27,
"batchUid": 26,
"indexUid": "mieli",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.088879S",
"enqueuedAt": "2025-07-07T13:40:01.46081Z",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z"
},
{
"uid": 26,
"batchUid": 25,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.312911S",
"enqueuedAt": "2025-07-07T13:32:46.13871Z",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z"
},
{
"uid": 25,
"batchUid": 24,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"error": null,
"duration": "PT0.247378S",
"enqueuedAt": "2025-07-07T13:28:27.390054Z",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z"
},
{
"uid": 24,
"batchUid": 23,
@@ -264,134 +376,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"enqueuedAt": "2025-01-16T17:02:52.527382964Z",
"startedAt": "2025-01-16T17:02:52.539749853Z",
"finishedAt": "2025-01-16T17:02:52.547390016Z"
},
{
"uid": 11,
"batchUid": 11,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"searchCutoffMs": 8000
},
"error": null,
"duration": "PT0.007307840S",
"enqueuedAt": "2025-01-16T17:01:14.100316617Z",
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z"
},
{
"uid": 10,
"batchUid": 10,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"error": null,
"duration": "PT0.007391353S",
"enqueuedAt": "2025-01-16T17:00:29.188815062Z",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z"
},
{
"uid": 9,
"batchUid": 9,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"error": null,
"duration": "PT0.007445825S",
"enqueuedAt": "2025-01-16T17:00:15.759501709Z",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z"
},
{
"uid": 8,
"batchUid": 8,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"error": null,
"duration": "PT0.012020083S",
"enqueuedAt": "2025-01-16T16:59:42.727292501Z",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z"
},
{
"uid": 7,
"batchUid": 7,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"error": null,
"duration": "PT0.007440092S",
"enqueuedAt": "2025-01-16T16:58:41.203145044Z",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z"
},
{
"uid": 6,
"batchUid": 6,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"error": null,
"duration": "PT0.007565161S",
"enqueuedAt": "2025-01-16T16:54:51.927866243Z",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z"
}
],
"total": 24,
"total": 30,
"limit": 20,
"from": 25,
"next": 5
"from": 31,
"next": 11
}

View File

@@ -4,8 +4,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 25,
"batchUid": 24,
"uid": 31,
"batchUid": 30,
"indexUid": null,
"status": "succeeded",
"type": "upgradeDatabase",
@@ -20,6 +20,118 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "[date]",
"finishedAt": "[date]"
},
{
"uid": 30,
"batchUid": 29,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.067201S",
"enqueuedAt": "2025-07-07T13:43:08.772432Z",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z"
},
{
"uid": 29,
"batchUid": 28,
"indexUid": "mieli",
"status": "succeeded",
"type": "indexDeletion",
"canceledBy": null,
"details": {
"deletedDocuments": 1
},
"error": null,
"duration": "PT0.012727S",
"enqueuedAt": "2025-07-07T13:42:50.744793Z",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z"
},
{
"uid": 28,
"batchUid": 27,
"indexUid": "kefir",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `kefir`: Bad embedder configuration in the document with id: `2`. Could not parse `._vectors.doggo_embedder`: trailing characters at line 1 column 13",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "PT0.059920S",
"enqueuedAt": "2025-07-07T13:42:15.624598Z",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z"
},
{
"uid": 27,
"batchUid": 26,
"indexUid": "mieli",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.088879S",
"enqueuedAt": "2025-07-07T13:40:01.46081Z",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z"
},
{
"uid": 26,
"batchUid": 25,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.312911S",
"enqueuedAt": "2025-07-07T13:32:46.13871Z",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z"
},
{
"uid": 25,
"batchUid": 24,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"error": null,
"duration": "PT0.247378S",
"enqueuedAt": "2025-07-07T13:28:27.390054Z",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z"
},
{
"uid": 24,
"batchUid": 23,
@@ -264,134 +376,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"enqueuedAt": "2025-01-16T17:02:52.527382964Z",
"startedAt": "2025-01-16T17:02:52.539749853Z",
"finishedAt": "2025-01-16T17:02:52.547390016Z"
},
{
"uid": 11,
"batchUid": 11,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"searchCutoffMs": 8000
},
"error": null,
"duration": "PT0.007307840S",
"enqueuedAt": "2025-01-16T17:01:14.100316617Z",
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z"
},
{
"uid": 10,
"batchUid": 10,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"error": null,
"duration": "PT0.007391353S",
"enqueuedAt": "2025-01-16T17:00:29.188815062Z",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z"
},
{
"uid": 9,
"batchUid": 9,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"error": null,
"duration": "PT0.007445825S",
"enqueuedAt": "2025-01-16T17:00:15.759501709Z",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z"
},
{
"uid": 8,
"batchUid": 8,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"error": null,
"duration": "PT0.012020083S",
"enqueuedAt": "2025-01-16T16:59:42.727292501Z",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z"
},
{
"uid": 7,
"batchUid": 7,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"error": null,
"duration": "PT0.007440092S",
"enqueuedAt": "2025-01-16T16:58:41.203145044Z",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z"
},
{
"uid": 6,
"batchUid": 6,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"error": null,
"duration": "PT0.007565161S",
"enqueuedAt": "2025-01-16T16:54:51.927866243Z",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z"
}
],
"total": 24,
"total": 30,
"limit": 20,
"from": 25,
"next": 5
"from": 31,
"next": 11
}

View File

@@ -4,8 +4,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 25,
"batchUid": 24,
"uid": 31,
"batchUid": 30,
"indexUid": null,
"status": "succeeded",
"type": "upgradeDatabase",
@@ -20,6 +20,118 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "[date]",
"finishedAt": "[date]"
},
{
"uid": 30,
"batchUid": 29,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.067201S",
"enqueuedAt": "2025-07-07T13:43:08.772432Z",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z"
},
{
"uid": 29,
"batchUid": 28,
"indexUid": "mieli",
"status": "succeeded",
"type": "indexDeletion",
"canceledBy": null,
"details": {
"deletedDocuments": 1
},
"error": null,
"duration": "PT0.012727S",
"enqueuedAt": "2025-07-07T13:42:50.744793Z",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z"
},
{
"uid": 28,
"batchUid": 27,
"indexUid": "kefir",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `kefir`: Bad embedder configuration in the document with id: `2`. Could not parse `._vectors.doggo_embedder`: trailing characters at line 1 column 13",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "PT0.059920S",
"enqueuedAt": "2025-07-07T13:42:15.624598Z",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z"
},
{
"uid": 27,
"batchUid": 26,
"indexUid": "mieli",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.088879S",
"enqueuedAt": "2025-07-07T13:40:01.46081Z",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z"
},
{
"uid": 26,
"batchUid": 25,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.312911S",
"enqueuedAt": "2025-07-07T13:32:46.13871Z",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z"
},
{
"uid": 25,
"batchUid": 24,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"error": null,
"duration": "PT0.247378S",
"enqueuedAt": "2025-07-07T13:28:27.390054Z",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z"
},
{
"uid": 24,
"batchUid": 23,
@@ -264,134 +376,10 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"enqueuedAt": "2025-01-16T17:02:52.527382964Z",
"startedAt": "2025-01-16T17:02:52.539749853Z",
"finishedAt": "2025-01-16T17:02:52.547390016Z"
},
{
"uid": 11,
"batchUid": 11,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"searchCutoffMs": 8000
},
"error": null,
"duration": "PT0.007307840S",
"enqueuedAt": "2025-01-16T17:01:14.100316617Z",
"startedAt": "2025-01-16T17:01:14.112756687Z",
"finishedAt": "2025-01-16T17:01:14.120064527Z"
},
{
"uid": 10,
"batchUid": 10,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 99
},
"pagination": {
"maxTotalHits": 15
}
},
"error": null,
"duration": "PT0.007391353S",
"enqueuedAt": "2025-01-16T17:00:29.188815062Z",
"startedAt": "2025-01-16T17:00:29.201180268Z",
"finishedAt": "2025-01-16T17:00:29.208571621Z"
},
{
"uid": 9,
"batchUid": 9,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
},
"error": null,
"duration": "PT0.007445825S",
"enqueuedAt": "2025-01-16T17:00:15.759501709Z",
"startedAt": "2025-01-16T17:00:15.77629445Z",
"finishedAt": "2025-01-16T17:00:15.783740275Z"
},
{
"uid": 8,
"batchUid": 8,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
},
"disableOnWords": [
"kefir"
],
"disableOnAttributes": [
"surname"
]
}
},
"error": null,
"duration": "PT0.012020083S",
"enqueuedAt": "2025-01-16T16:59:42.727292501Z",
"startedAt": "2025-01-16T16:59:42.744086671Z",
"finishedAt": "2025-01-16T16:59:42.756106754Z"
},
{
"uid": 7,
"batchUid": 7,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"typoTolerance": {
"minWordSizeForTypos": {
"oneTypo": 4
}
}
},
"error": null,
"duration": "PT0.007440092S",
"enqueuedAt": "2025-01-16T16:58:41.203145044Z",
"startedAt": "2025-01-16T16:58:41.2155771Z",
"finishedAt": "2025-01-16T16:58:41.223017192Z"
},
{
"uid": 6,
"batchUid": 6,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"synonyms": {
"boubou": [
"kefir"
]
}
},
"error": null,
"duration": "PT0.007565161S",
"enqueuedAt": "2025-01-16T16:54:51.927866243Z",
"startedAt": "2025-01-16T16:54:51.940332781Z",
"finishedAt": "2025-01-16T16:54:51.947897942Z"
}
],
"total": 24,
"total": 30,
"limit": 20,
"from": 25,
"next": 5
"from": 31,
"next": 11
}

View File

@@ -4,7 +4,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 24,
"uid": 30,
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
@@ -26,6 +26,155 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"finishedAt": "[date]",
"batchStrategy": "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type."
},
{
"uid": 29,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.067201S",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z",
"batchStrategy": "unspecified"
},
{
"uid": 28,
"progress": null,
"details": {
"deletedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"indexDeletion": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.012727S",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z",
"batchStrategy": "unspecified"
},
{
"uid": 27,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"stats": {
"totalNbTasks": 1,
"status": {
"failed": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.059920S",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z",
"batchStrategy": "unspecified"
},
{
"uid": 26,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"mieli": 1
}
},
"duration": "PT0.088879S",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z",
"batchStrategy": "unspecified"
},
{
"uid": 25,
"progress": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"documentAdditionOrUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.312911S",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z",
"batchStrategy": "unspecified"
},
{
"uid": 24,
"progress": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"stats": {
"totalNbTasks": 1,
"status": {
"succeeded": 1
},
"types": {
"settingsUpdate": 1
},
"indexUids": {
"kefir": 1
}
},
"duration": "PT0.247378S",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z",
"batchStrategy": "unspecified"
},
{
"uid": 23,
"progress": null,
@@ -642,8 +791,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"batchStrategy": "unspecified"
}
],
"total": 25,
"total": 31,
"limit": 1000,
"from": 24,
"from": 30,
"next": null
}

View File

@@ -4,8 +4,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
{
"results": [
{
"uid": 25,
"batchUid": 24,
"uid": 31,
"batchUid": 30,
"indexUid": null,
"status": "succeeded",
"type": "upgradeDatabase",
@@ -20,6 +20,118 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"startedAt": "[date]",
"finishedAt": "[date]"
},
{
"uid": 30,
"batchUid": 29,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.067201S",
"enqueuedAt": "2025-07-07T13:43:08.772432Z",
"startedAt": "2025-07-07T13:43:08.772854Z",
"finishedAt": "2025-07-07T13:43:08.840055Z"
},
{
"uid": 29,
"batchUid": 28,
"indexUid": "mieli",
"status": "succeeded",
"type": "indexDeletion",
"canceledBy": null,
"details": {
"deletedDocuments": 1
},
"error": null,
"duration": "PT0.012727S",
"enqueuedAt": "2025-07-07T13:42:50.744793Z",
"startedAt": "2025-07-07T13:42:50.745461Z",
"finishedAt": "2025-07-07T13:42:50.758188Z"
},
{
"uid": 28,
"batchUid": 27,
"indexUid": "kefir",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `kefir`: Bad embedder configuration in the document with id: `2`. Could not parse `._vectors.doggo_embedder`: trailing characters at line 1 column 13",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
},
"duration": "PT0.059920S",
"enqueuedAt": "2025-07-07T13:42:15.624598Z",
"startedAt": "2025-07-07T13:42:15.625413Z",
"finishedAt": "2025-07-07T13:42:15.685333Z"
},
{
"uid": 27,
"batchUid": 26,
"indexUid": "mieli",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.088879S",
"enqueuedAt": "2025-07-07T13:40:01.46081Z",
"startedAt": "2025-07-07T13:40:01.461741Z",
"finishedAt": "2025-07-07T13:40:01.55062Z"
},
{
"uid": 26,
"batchUid": 25,
"indexUid": "kefir",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "PT0.312911S",
"enqueuedAt": "2025-07-07T13:32:46.13871Z",
"startedAt": "2025-07-07T13:32:46.139785Z",
"finishedAt": "2025-07-07T13:32:46.452696Z"
},
{
"uid": 25,
"batchUid": 24,
"indexUid": "kefir",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"doggo_embedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.description}}"
}
}
},
"error": null,
"duration": "PT0.247378S",
"enqueuedAt": "2025-07-07T13:28:27.390054Z",
"startedAt": "2025-07-07T13:28:27.391344Z",
"finishedAt": "2025-07-07T13:28:27.638722Z"
},
{
"uid": 24,
"batchUid": 23,
@@ -497,8 +609,8 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"finishedAt": "2025-01-16T16:45:16.131303739Z"
}
],
"total": 26,
"total": 32,
"limit": 1000,
"from": 25,
"from": 31,
"next": null
}

View File

@@ -114,13 +114,13 @@ async fn check_the_index_scheduler(server: &Server) {
// All the indexes are still present
let (indexes, _) = server.list_indexes(None, None).await;
snapshot!(indexes, @r#"
snapshot!(indexes, @r###"
{
"results": [
{
"uid": "kefir",
"createdAt": "2025-01-16T16:45:16.020663157Z",
"updatedAt": "2025-01-23T11:36:22.634859166Z",
"updatedAt": "2025-07-07T13:43:08.835381Z",
"primaryKey": "id"
}
],
@@ -128,7 +128,7 @@ async fn check_the_index_scheduler(server: &Server) {
"limit": 20,
"total": 1
}
"#);
"###);
// And their metadata are still right
let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, {
@@ -141,21 +141,21 @@ async fn check_the_index_scheduler(server: &Server) {
{
"databaseSize": "[bytes]",
"usedDatabaseSize": "[bytes]",
"lastUpdate": "2025-01-23T11:36:22.634859166Z",
"lastUpdate": "2025-07-07T13:43:08.835381Z",
"indexes": {
"kefir": {
"numberOfDocuments": 1,
"numberOfDocuments": 2,
"rawDocumentDbSize": "[bytes]",
"avgDocumentSize": "[bytes]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"age": 1,
"description": 1,
"id": 1,
"name": 1,
"surname": 1
"age": 2,
"description": 2,
"id": 2,
"name": 2,
"surname": 2
}
}
}
@@ -227,21 +227,21 @@ async fn check_the_index_scheduler(server: &Server) {
{
"databaseSize": "[bytes]",
"usedDatabaseSize": "[bytes]",
"lastUpdate": "2025-01-23T11:36:22.634859166Z",
"lastUpdate": "2025-07-07T13:43:08.835381Z",
"indexes": {
"kefir": {
"numberOfDocuments": 1,
"numberOfDocuments": 2,
"rawDocumentDbSize": "[bytes]",
"avgDocumentSize": "[bytes]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"age": 1,
"description": 1,
"id": 1,
"name": 1,
"surname": 1
"age": 2,
"description": 2,
"id": 2,
"name": 2,
"surname": 2
}
}
}
@@ -254,18 +254,18 @@ async fn check_the_index_scheduler(server: &Server) {
".avgDocumentSize" => "[bytes]",
}), @r###"
{
"numberOfDocuments": 1,
"numberOfDocuments": 2,
"rawDocumentDbSize": "[bytes]",
"avgDocumentSize": "[bytes]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
"numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 2,
"fieldDistribution": {
"age": 1,
"description": 1,
"id": 1,
"name": 1,
"surname": 1
"age": 2,
"description": 2,
"id": 2,
"name": 2,
"surname": 2
}
}
"###);
@@ -295,4 +295,8 @@ async fn check_the_index_features(server: &Server) {
let (results, _status) =
kefir.search_post(json!({ "sort": ["age:asc"], "filter": "surname = kefirounet" })).await;
snapshot!(results, name: "search_with_sort_and_filter");
// ensuring we can get the vectors and their `regenerate` is still good.
let (results, _status) = kefir.search_post(json!({"retrieveVectors": true})).await;
snapshot!(json_string!(results["hits"], {"[]._vectors.doggo_embedder.embeddings" => "[vector]"}), name: "search_with_retrieve_vectors");
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,5 @@
mod binary_quantized;
mod fragments;
#[cfg(feature = "test-ollama")]
mod ollama;
mod openai;

View File

@@ -136,7 +136,7 @@ fn long_text() -> &'static str {
})
}
async fn create_mock_tokenized() -> (MockServer, Value) {
async fn create_mock_tokenized() -> (&'static MockServer, Value) {
create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false, false).await
}
@@ -145,8 +145,8 @@ async fn create_mock_with_template(
model_dimensions: ModelDimensions,
fallible: bool,
slow: bool,
) -> (MockServer, Value) {
let mock_server = MockServer::start().await;
) -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
const API_KEY: &str = "my-api-key";
const API_KEY_BEARER: &str = "Bearer my-api-key";
@@ -299,7 +299,7 @@ async fn create_mock_with_template(
}
}))
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@@ -321,27 +321,27 @@ const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée
Un chien nommé {{doc.name}}, né en {{doc.birthyear}}
{%- endif %}, de race {{doc.breed}}."#;
async fn create_mock() -> (MockServer, Value) {
async fn create_mock() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false, false).await
}
async fn create_mock_dimensions() -> (MockServer, Value) {
async fn create_mock_dimensions() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false, false).await
}
async fn create_mock_small_embedding_model() -> (MockServer, Value) {
async fn create_mock_small_embedding_model() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false, false).await
}
async fn create_mock_legacy_embedding_model() -> (MockServer, Value) {
async fn create_mock_legacy_embedding_model() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false, false).await
}
async fn create_fallible_mock() -> (MockServer, Value) {
async fn create_fallible_mock() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, false).await
}
async fn create_slow_mock() -> (MockServer, Value) {
async fn create_slow_mock() -> (&'static MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, true).await
}

View File

@@ -12,8 +12,8 @@ use crate::common::Value;
use crate::json;
use crate::vector::{get_server_vector, GetAllDocumentsOptions};
async fn create_mock() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@@ -32,7 +32,7 @@ async fn create_mock() -> (MockServer, Value) {
json!({ "data": text_to_embedding.get(text.as_str()).unwrap_or(&[99., 99., 99.]) }),
)
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@@ -50,8 +50,8 @@ async fn create_mock() -> (MockServer, Value) {
(mock_server, embedder_settings)
}
async fn create_mock_default_template() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock_default_template() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@@ -73,7 +73,7 @@ async fn create_mock_default_template() -> (MockServer, Value) {
.set_body_json(json!({"error": "text not found", "text": text})),
}
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@@ -106,8 +106,8 @@ struct SingleResponse {
embedding: Vec<f32>,
}
async fn create_mock_multiple() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock_multiple() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@@ -146,7 +146,7 @@ async fn create_mock_multiple() -> (MockServer, Value) {
ResponseTemplate::new(200).set_body_json(response)
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@@ -176,8 +176,8 @@ struct SingleRequest {
input: String,
}
async fn create_mock_single_response_in_array() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock_single_response_in_array() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@@ -212,7 +212,7 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
ResponseTemplate::new(200).set_body_json(response)
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@@ -236,8 +236,8 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
(mock_server, embedder_settings)
}
async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock_raw_with_custom_header() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@@ -277,7 +277,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
ResponseTemplate::new(200).set_body_json(output)
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@@ -293,8 +293,8 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
(mock_server, embedder_settings)
}
async fn create_mock_raw() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_mock_raw() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
@@ -321,7 +321,7 @@ async fn create_mock_raw() -> (MockServer, Value) {
ResponseTemplate::new(200).set_body_json(output)
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();
@@ -337,8 +337,8 @@ async fn create_mock_raw() -> (MockServer, Value) {
(mock_server, embedder_settings)
}
async fn create_faulty_mock_raw(sender: mpsc::Sender<()>) -> (MockServer, Value) {
let mock_server = MockServer::start().await;
async fn create_faulty_mock_raw(sender: mpsc::Sender<()>) -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await));
let count = AtomicUsize::new(0);
Mock::given(method("POST"))
@@ -355,7 +355,7 @@ async fn create_faulty_mock_raw(sender: mpsc::Sender<()>) -> (MockServer, Value)
ResponseTemplate::new(500).set_body_string("Service Unavailable")
}
})
.mount(&mock_server)
.mount(mock_server)
.await;
let url = mock_server.uri();

View File

@@ -101,14 +101,7 @@ async fn reset_embedder_documents() {
server.wait_task(response.uid()).await;
// Make sure the documents are still present
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions {
limit: None,
offset: None,
retrieve_vectors: false,
fields: None,
})
.await;
let (documents, _code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(json_string!(documents), @r###"
{
"results": [

View File

@@ -15,6 +15,7 @@ use meilisearch_types::heed::{
};
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{obkv_to_json, BEU32};
use meilisearch_types::tasks::{Status, Task};
@@ -591,12 +592,21 @@ fn export_documents(
.into());
};
for (embedder_name, (embeddings, regenerate)) in embeddings {
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate,
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());

View File

@@ -40,7 +40,7 @@ indexmap = { version = "2.9.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memchr = "2.7.5"
memmap2 = "0.9.5"
memmap2 = "0.9.7"
obkv = "0.3.0"
once_cell = "1.21.3"
ordered-float = "5.0.0"

View File

@@ -168,6 +168,16 @@ pub enum SortError {
ReservedNameForFilter { name: String },
}
impl SortError {
pub fn into_search_error(self) -> Error {
Error::UserError(UserError::SortError { error: self, search: true })
}
pub fn into_document_error(self) -> Error {
Error::UserError(UserError::SortError { error: self, search: false })
}
}
impl From<AscDescError> for SortError {
fn from(error: AscDescError) -> Self {
match error {
@@ -190,12 +200,6 @@ impl From<AscDescError> for SortError {
}
}
impl From<SortError> for Error {
fn from(error: SortError) -> Self {
Self::UserError(UserError::SortError(error))
}
}
#[cfg(test)]
mod tests {
use big_s::S;

View File

@@ -0,0 +1,294 @@
use crate::{
distance_between_two_points,
heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec},
lat_lng_to_xyz,
search::new::{facet_string_values, facet_values_prefix_key},
GeoPoint, Index,
};
use heed::{
types::{Bytes, Unit},
RoPrefix, RoTxn,
};
use roaring::RoaringBitmap;
use rstar::RTree;
use std::collections::VecDeque;
#[derive(Debug, Clone, Copy)]
pub struct GeoSortParameter {
// Define the strategy used by the geo sort
pub strategy: GeoSortStrategy,
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
pub max_bucket_size: u64,
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
pub distance_error_margin: f64,
}
impl Default for GeoSortParameter {
fn default() -> Self {
Self {
strategy: GeoSortStrategy::default(),
max_bucket_size: 1000,
distance_error_margin: 1.0,
}
}
}
/// Define the strategy used by the geo sort.
/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
/// the point where we move from using the iterative strategy to the rtree.
#[derive(Debug, Clone, Copy)]
pub enum GeoSortStrategy {
AlwaysIterative(usize),
AlwaysRtree(usize),
Dynamic(usize),
}
impl Default for GeoSortStrategy {
fn default() -> Self {
GeoSortStrategy::Dynamic(1000)
}
}
impl GeoSortStrategy {
pub fn use_rtree(&self, candidates: usize) -> bool {
match self {
GeoSortStrategy::AlwaysIterative(_) => false,
GeoSortStrategy::AlwaysRtree(_) => true,
GeoSortStrategy::Dynamic(i) => candidates >= *i,
}
}
pub fn cache_size(&self) -> usize {
match self {
GeoSortStrategy::AlwaysIterative(i)
| GeoSortStrategy::AlwaysRtree(i)
| GeoSortStrategy::Dynamic(i) => *i,
}
}
}
#[allow(clippy::too_many_arguments)]
pub fn fill_cache(
index: &Index,
txn: &RoTxn<heed::AnyTls>,
strategy: GeoSortStrategy,
ascending: bool,
target_point: [f64; 2],
field_ids: &Option<[u16; 2]>,
rtree: &mut Option<RTree<GeoPoint>>,
geo_candidates: &RoaringBitmap,
cached_sorted_docids: &mut VecDeque<(u32, [f64; 2])>,
) -> crate::Result<()> {
debug_assert!(cached_sorted_docids.is_empty());
// lazily initialize the rtree if needed by the strategy, and cache it in `self.rtree`
let rtree = if strategy.use_rtree(geo_candidates.len() as usize) {
if let Some(rtree) = rtree.as_ref() {
// get rtree from cache
Some(rtree)
} else {
let rtree2 = index.geo_rtree(txn)?.expect("geo candidates but no rtree");
// insert rtree in cache and returns it.
// Can't use `get_or_insert_with` because getting the rtree from the DB is a fallible operation.
Some(&*rtree.insert(rtree2))
}
} else {
None
};
let cache_size = strategy.cache_size();
if let Some(rtree) = rtree {
if ascending {
let point = lat_lng_to_xyz(&target_point);
for point in rtree.nearest_neighbor_iter(&point) {
if geo_candidates.contains(point.data.0) {
cached_sorted_docids.push_back(point.data);
if cached_sorted_docids.len() >= cache_size {
break;
}
}
}
} else {
// in the case of the desc geo sort we look for the closest point to the opposite of the queried point
// and we insert the points in reverse order they get reversed when emptying the cache later on
let point = lat_lng_to_xyz(&opposite_of(target_point));
for point in rtree.nearest_neighbor_iter(&point) {
if geo_candidates.contains(point.data.0) {
cached_sorted_docids.push_front(point.data);
if cached_sorted_docids.len() >= cache_size {
break;
}
}
}
}
} else {
// the iterative version
let [lat, lng] = field_ids.expect("fill_buffer can't be called without the lat&lng");
let mut documents = geo_candidates
.iter()
.map(|id| -> crate::Result<_> { Ok((id, geo_value(id, lat, lng, index, txn)?)) })
.collect::<crate::Result<Vec<(u32, [f64; 2])>>>()?;
// computing the distance between two points is expensive thus we cache the result
documents
.sort_by_cached_key(|(_, p)| distance_between_two_points(&target_point, p) as usize);
cached_sorted_docids.extend(documents);
};
Ok(())
}
#[allow(clippy::too_many_arguments)]
pub fn next_bucket(
index: &Index,
txn: &RoTxn<heed::AnyTls>,
universe: &RoaringBitmap,
ascending: bool,
target_point: [f64; 2],
field_ids: &Option<[u16; 2]>,
rtree: &mut Option<RTree<GeoPoint>>,
cached_sorted_docids: &mut VecDeque<(u32, [f64; 2])>,
geo_candidates: &RoaringBitmap,
parameter: GeoSortParameter,
) -> crate::Result<Option<(RoaringBitmap, Option<[f64; 2]>)>> {
let mut geo_candidates = geo_candidates & universe;
if geo_candidates.is_empty() {
return Ok(Some((universe.clone(), None)));
}
let next = |cache: &mut VecDeque<_>| {
if ascending {
cache.pop_front()
} else {
cache.pop_back()
}
};
let put_back = |cache: &mut VecDeque<_>, x: _| {
if ascending {
cache.push_front(x)
} else {
cache.push_back(x)
}
};
let mut current_bucket = RoaringBitmap::new();
// current_distance stores the first point and distance in current bucket
let mut current_distance: Option<([f64; 2], f64)> = None;
loop {
// The loop will only exit when we have found all points with equal distance or have exhausted the candidates.
if let Some((id, point)) = next(cached_sorted_docids) {
if geo_candidates.contains(id) {
let distance = distance_between_two_points(&target_point, &point);
if let Some((point0, bucket_distance)) = current_distance.as_ref() {
if (bucket_distance - distance).abs() > parameter.distance_error_margin {
// different distance, point belongs to next bucket
put_back(cached_sorted_docids, (id, point));
return Ok(Some((current_bucket, Some(point0.to_owned()))));
} else {
// same distance, point belongs to current bucket
current_bucket.insert(id);
// remove from candidates to prevent it from being added to the cache again
geo_candidates.remove(id);
// current bucket size reaches limit, force return
if current_bucket.len() == parameter.max_bucket_size {
return Ok(Some((current_bucket, Some(point0.to_owned()))));
}
}
} else {
// first doc in current bucket
current_distance = Some((point, distance));
current_bucket.insert(id);
geo_candidates.remove(id);
// current bucket size reaches limit, force return
if current_bucket.len() == parameter.max_bucket_size {
return Ok(Some((current_bucket, Some(point.to_owned()))));
}
}
}
} else {
// cache exhausted, we need to refill it
fill_cache(
index,
txn,
parameter.strategy,
ascending,
target_point,
field_ids,
rtree,
&geo_candidates,
cached_sorted_docids,
)?;
if cached_sorted_docids.is_empty() {
// candidates exhausted, exit
if let Some((point0, _)) = current_distance.as_ref() {
return Ok(Some((current_bucket, Some(point0.to_owned()))));
} else {
return Ok(Some((universe.clone(), None)));
}
}
}
}
}
/// Return an iterator over each number value in the given field of the given document.
fn facet_number_values<'a>(
docid: u32,
field_id: u16,
index: &Index,
txn: &'a RoTxn<'a>,
) -> crate::Result<RoPrefix<'a, FieldDocIdFacetCodec<OrderedF64Codec>, Unit>> {
let key = facet_values_prefix_key(field_id, docid);
let iter = index
.field_id_docid_facet_f64s
.remap_key_type::<Bytes>()
.prefix_iter(txn, &key)?
.remap_key_type();
Ok(iter)
}
/// Extracts the lat and long values from a single document.
///
/// If it is not able to find it in the facet number index it will extract it
/// from the facet string index and parse it as f64 (as the geo extraction behaves).
pub(crate) fn geo_value(
docid: u32,
field_lat: u16,
field_lng: u16,
index: &Index,
rtxn: &RoTxn<'_>,
) -> crate::Result<[f64; 2]> {
let extract_geo = |geo_field: u16| -> crate::Result<f64> {
match facet_number_values(docid, geo_field, index, rtxn)?.next() {
Some(Ok(((_, _, geo), ()))) => Ok(geo),
Some(Err(e)) => Err(e.into()),
None => match facet_string_values(docid, geo_field, index, rtxn)?.next() {
Some(Ok((_, geo))) => {
Ok(geo.parse::<f64>().expect("cannot parse geo field as f64"))
}
Some(Err(e)) => Err(e.into()),
None => panic!("A geo faceted document doesn't contain any lat or lng"),
},
}
};
let lat = extract_geo(field_lat)?;
let lng = extract_geo(field_lng)?;
Ok([lat, lng])
}
/// Compute the antipodal coordinate of `coord`
pub(crate) fn opposite_of(mut coord: [f64; 2]) -> [f64; 2] {
coord[0] *= -1.;
// in the case of x,0 we want to return x,180
if coord[1] > 0. {
coord[1] -= 180.;
} else {
coord[1] += 180.;
}
coord
}

View File

@@ -1,8 +1,10 @@
mod builder;
mod enriched;
pub mod geo_sort;
mod primary_key;
mod reader;
mod serde_impl;
pub mod sort;
use std::fmt::Debug;
use std::io;
@@ -19,6 +21,7 @@ pub use primary_key::{
pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
use serde::{Deserialize, Serialize};
pub use self::geo_sort::{GeoSortParameter, GeoSortStrategy};
use crate::error::{FieldIdMapMissingEntry, InternalError};
use crate::{FieldId, Object, Result};

View File

@@ -0,0 +1,444 @@
use std::collections::{BTreeSet, VecDeque};
use crate::{
constants::RESERVED_GEO_FIELD_NAME,
documents::{geo_sort::next_bucket, GeoSortParameter},
heed_codec::{
facet::{FacetGroupKeyCodec, FacetGroupValueCodec},
BytesRefCodec,
},
is_faceted,
search::facet::{ascending_facet_sort, descending_facet_sort},
AscDesc, DocumentId, Member, UserError,
};
use heed::Database;
use roaring::RoaringBitmap;
#[derive(Debug, Clone, Copy)]
enum AscDescId {
Facet { field_id: u16, ascending: bool },
Geo { field_ids: [u16; 2], target_point: [f64; 2], ascending: bool },
}
/// A [`SortedDocumentsIterator`] allows efficient access to a continuous range of sorted documents.
/// This is ideal in the context of paginated queries in which only a small number of documents are needed at a time.
/// Search operations will only be performed upon access.
pub enum SortedDocumentsIterator<'ctx> {
Leaf {
/// The exact number of documents remaining
size: usize,
values: Box<dyn Iterator<Item = DocumentId> + 'ctx>,
},
Branch {
/// The current child, got from the children iterator
current_child: Option<Box<SortedDocumentsIterator<'ctx>>>,
/// The exact number of documents remaining, excluding documents in the current child
next_children_size: usize,
/// Iterators to become the current child once it is exhausted
next_children:
Box<dyn Iterator<Item = crate::Result<SortedDocumentsIteratorBuilder<'ctx>>> + 'ctx>,
},
}
impl SortedDocumentsIterator<'_> {
/// Takes care of updating the current child if it is `None`, and also updates the size
fn update_current<'ctx>(
current_child: &mut Option<Box<SortedDocumentsIterator<'ctx>>>,
next_children_size: &mut usize,
next_children: &mut Box<
dyn Iterator<Item = crate::Result<SortedDocumentsIteratorBuilder<'ctx>>> + 'ctx,
>,
) -> crate::Result<()> {
if current_child.is_none() {
*current_child = match next_children.next() {
Some(Ok(builder)) => {
let next_child = Box::new(builder.build()?);
*next_children_size -= next_child.size_hint().0;
Some(next_child)
}
Some(Err(e)) => return Err(e),
None => return Ok(()),
};
}
Ok(())
}
}
impl Iterator for SortedDocumentsIterator<'_> {
type Item = crate::Result<DocumentId>;
/// Implementing the `nth` method allows for efficient access to the nth document in the sorted order.
/// It's used by `skip` internally.
/// The default implementation of `nth` would iterate over all children, which is inefficient for large datasets.
/// This implementation will jump over whole chunks of children until it gets close.
fn nth(&mut self, n: usize) -> Option<Self::Item> {
if n == 0 {
return self.next();
}
// If it's at the leaf level, just forward the call to the values iterator
let (current_child, next_children, next_children_size) = match self {
SortedDocumentsIterator::Leaf { values, size } => {
*size = size.saturating_sub(n);
return values.nth(n).map(Ok);
}
SortedDocumentsIterator::Branch {
current_child,
next_children,
next_children_size,
} => (current_child, next_children, next_children_size),
};
// Otherwise don't directly iterate over children, skip them if we know we will go further
let mut to_skip = n - 1;
while to_skip > 0 {
if let Err(e) = SortedDocumentsIterator::update_current(
current_child,
next_children_size,
next_children,
) {
return Some(Err(e));
}
let Some(inner) = current_child else {
return None; // No more inner iterators, everything has been consumed.
};
if to_skip >= inner.size_hint().0 {
// The current child isn't large enough to contain the nth element.
// Skip it and continue with the next one.
to_skip -= inner.size_hint().0;
*current_child = None;
continue;
} else {
// The current iterator is large enough, so we can forward the call to it.
return inner.nth(to_skip + 1);
}
}
self.next()
}
/// Iterators need to keep track of their size so that they can be skipped efficiently by the `nth` method.
fn size_hint(&self) -> (usize, Option<usize>) {
let size = match self {
SortedDocumentsIterator::Leaf { size, .. } => *size,
SortedDocumentsIterator::Branch {
next_children_size,
current_child: Some(current_child),
..
} => current_child.size_hint().0 + next_children_size,
SortedDocumentsIterator::Branch { next_children_size, current_child: None, .. } => {
*next_children_size
}
};
(size, Some(size))
}
fn next(&mut self) -> Option<Self::Item> {
match self {
SortedDocumentsIterator::Leaf { values, size } => {
let result = values.next().map(Ok);
if result.is_some() {
*size -= 1;
}
result
}
SortedDocumentsIterator::Branch {
current_child,
next_children_size,
next_children,
} => {
let mut result = None;
while result.is_none() {
// Ensure we have selected an iterator to work with
if let Err(e) = SortedDocumentsIterator::update_current(
current_child,
next_children_size,
next_children,
) {
return Some(Err(e));
}
let Some(inner) = current_child else {
return None;
};
result = inner.next();
// If the current iterator is exhausted, we need to try the next one
if result.is_none() {
*current_child = None;
}
}
result
}
}
}
}
/// Builder for a [`SortedDocumentsIterator`].
/// Most builders won't ever be built, because pagination will skip them.
pub struct SortedDocumentsIteratorBuilder<'ctx> {
index: &'ctx crate::Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
fields: &'ctx [AscDescId],
candidates: RoaringBitmap,
geo_candidates: &'ctx RoaringBitmap,
}
impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> {
/// Performs the sort and builds a [`SortedDocumentsIterator`].
fn build(self) -> crate::Result<SortedDocumentsIterator<'ctx>> {
let size = self.candidates.len() as usize;
match self.fields {
[] => Ok(SortedDocumentsIterator::Leaf {
size,
values: Box::new(self.candidates.into_iter()),
}),
[AscDescId::Facet { field_id, ascending }, next_fields @ ..] => {
SortedDocumentsIteratorBuilder::build_facet(
self.index,
self.rtxn,
self.number_db,
self.string_db,
next_fields,
self.candidates,
self.geo_candidates,
*field_id,
*ascending,
)
}
[AscDescId::Geo { field_ids, target_point, ascending }, next_fields @ ..] => {
SortedDocumentsIteratorBuilder::build_geo(
self.index,
self.rtxn,
self.number_db,
self.string_db,
next_fields,
self.candidates,
self.geo_candidates,
*field_ids,
*target_point,
*ascending,
)
}
}
}
/// Builds a [`SortedDocumentsIterator`] based on the results of a facet sort.
#[allow(clippy::too_many_arguments)]
fn build_facet(
index: &'ctx crate::Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
next_fields: &'ctx [AscDescId],
candidates: RoaringBitmap,
geo_candidates: &'ctx RoaringBitmap,
field_id: u16,
ascending: bool,
) -> crate::Result<SortedDocumentsIterator<'ctx>> {
let size = candidates.len() as usize;
// Perform the sort on the first field
let (number_iter, string_iter) = if ascending {
let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?;
(itertools::Either::Left(number_iter), itertools::Either::Left(string_iter))
} else {
let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?;
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
};
// Create builders for the next level of the tree
let number_iter = number_iter.map(|r| r.map(|(d, _)| d));
let string_iter = string_iter.map(|r| r.map(|(d, _)| d));
let next_children = number_iter.chain(string_iter).map(move |r| {
Ok(SortedDocumentsIteratorBuilder {
index,
rtxn,
number_db,
string_db,
fields: next_fields,
candidates: r?,
geo_candidates,
})
});
Ok(SortedDocumentsIterator::Branch {
current_child: None,
next_children_size: size,
next_children: Box::new(next_children),
})
}
/// Builds a [`SortedDocumentsIterator`] based on the (lazy) results of a geo sort.
#[allow(clippy::too_many_arguments)]
fn build_geo(
index: &'ctx crate::Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
next_fields: &'ctx [AscDescId],
candidates: RoaringBitmap,
geo_candidates: &'ctx RoaringBitmap,
field_ids: [u16; 2],
target_point: [f64; 2],
ascending: bool,
) -> crate::Result<SortedDocumentsIterator<'ctx>> {
let mut cache = VecDeque::new();
let mut rtree = None;
let size = candidates.len() as usize;
let not_geo_candidates = candidates.clone() - geo_candidates;
let mut geo_remaining = size - not_geo_candidates.len() as usize;
let mut not_geo_candidates = Some(not_geo_candidates);
let next_children = std::iter::from_fn(move || {
// Find the next bucket of geo-sorted documents.
// next_bucket loops and will go back to the beginning so we use a variable to track how many are left.
if geo_remaining > 0 {
if let Ok(Some((docids, _point))) = next_bucket(
index,
rtxn,
&candidates,
ascending,
target_point,
&Some(field_ids),
&mut rtree,
&mut cache,
geo_candidates,
GeoSortParameter::default(),
) {
geo_remaining -= docids.len() as usize;
return Some(Ok(SortedDocumentsIteratorBuilder {
index,
rtxn,
number_db,
string_db,
fields: next_fields,
candidates: docids,
geo_candidates,
}));
}
}
// Once all geo candidates have been processed, we can return the others
if let Some(not_geo_candidates) = not_geo_candidates.take() {
if !not_geo_candidates.is_empty() {
return Some(Ok(SortedDocumentsIteratorBuilder {
index,
rtxn,
number_db,
string_db,
fields: next_fields,
candidates: not_geo_candidates,
geo_candidates,
}));
}
}
None
});
Ok(SortedDocumentsIterator::Branch {
current_child: None,
next_children_size: size,
next_children: Box::new(next_children),
})
}
}
/// A structure owning the data needed during the lifetime of a [`SortedDocumentsIterator`].
pub struct SortedDocuments<'ctx> {
index: &'ctx crate::Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
fields: Vec<AscDescId>,
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
candidates: &'ctx RoaringBitmap,
geo_candidates: RoaringBitmap,
}
impl<'ctx> SortedDocuments<'ctx> {
pub fn iter(&'ctx self) -> crate::Result<SortedDocumentsIterator<'ctx>> {
let builder = SortedDocumentsIteratorBuilder {
index: self.index,
rtxn: self.rtxn,
number_db: self.number_db,
string_db: self.string_db,
fields: &self.fields,
candidates: self.candidates.clone(),
geo_candidates: &self.geo_candidates,
};
builder.build()
}
}
pub fn recursive_sort<'ctx>(
index: &'ctx crate::Index,
rtxn: &'ctx heed::RoTxn<'ctx>,
sort: Vec<AscDesc>,
candidates: &'ctx RoaringBitmap,
) -> crate::Result<SortedDocuments<'ctx>> {
let sortable_fields: BTreeSet<_> = index.sortable_fields(rtxn)?.into_iter().collect();
let fields_ids_map = index.fields_ids_map(rtxn)?;
// Retrieve the field ids that are used for sorting
let mut fields = Vec::new();
let mut need_geo_candidates = false;
for asc_desc in sort {
let (field, geofield) = match asc_desc {
AscDesc::Asc(Member::Field(field)) => (Some((field, true)), None),
AscDesc::Desc(Member::Field(field)) => (Some((field, false)), None),
AscDesc::Asc(Member::Geo(target_point)) => (None, Some((target_point, true))),
AscDesc::Desc(Member::Geo(target_point)) => (None, Some((target_point, false))),
};
if let Some((field, ascending)) = field {
if is_faceted(&field, &sortable_fields) {
if let Some(field_id) = fields_ids_map.id(&field) {
fields.push(AscDescId::Facet { field_id, ascending });
continue;
}
}
return Err(UserError::InvalidDocumentSortableAttribute {
field: field.to_string(),
sortable_fields: sortable_fields.clone(),
}
.into());
}
if let Some((target_point, ascending)) = geofield {
if sortable_fields.contains(RESERVED_GEO_FIELD_NAME) {
if let (Some(lat), Some(lng)) =
(fields_ids_map.id("_geo.lat"), fields_ids_map.id("_geo.lng"))
{
need_geo_candidates = true;
fields.push(AscDescId::Geo { field_ids: [lat, lng], target_point, ascending });
continue;
}
}
return Err(UserError::InvalidDocumentSortableAttribute {
field: RESERVED_GEO_FIELD_NAME.to_string(),
sortable_fields: sortable_fields.clone(),
}
.into());
}
}
let geo_candidates = if need_geo_candidates {
index.geo_faceted_documents_ids(rtxn)?
} else {
RoaringBitmap::new()
};
let number_db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let string_db =
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
Ok(SortedDocuments { index, rtxn, fields, number_db, string_db, candidates, geo_candidates })
}

View File

@@ -191,7 +191,21 @@ and can not be more than 511 bytes.", .document_id.to_string()
),
}
)]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
InvalidSearchSortableAttribute {
field: String,
valid_fields: BTreeSet<String>,
hidden_fields: bool,
},
#[error("Attribute `{}` is not sortable. {}",
.field,
match .sortable_fields.is_empty() {
true => "This index does not have configured sortable attributes.".to_string(),
false => format!("Available sortable attributes are: `{}`.",
sortable_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
),
}
)]
InvalidDocumentSortableAttribute { field: String, sortable_fields: BTreeSet<String> },
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
.field,
match (.valid_patterns.is_empty(), .matching_rule_index) {
@@ -272,8 +286,8 @@ and can not be more than 511 bytes.", .document_id.to_string()
PrimaryKeyCannotBeChanged(String),
#[error(transparent)]
SerdeJson(serde_json::Error),
#[error(transparent)]
SortError(#[from] SortError),
#[error("{error}")]
SortError { error: SortError, search: bool },
#[error("An unknown internal document id have been used: `{document_id}`.")]
UnknownInternalDocumentId { document_id: DocumentId },
#[error("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {0}` and twoTypos: {1}`.")]
@@ -616,7 +630,7 @@ fn conditionally_lookup_for_error_message() {
];
for (list, suffix) in messages {
let err = UserError::InvalidSortableAttribute {
let err = UserError::InvalidSearchSortableAttribute {
field: "name".to_string(),
valid_fields: list,
hidden_fields: false,

View File

@@ -1766,20 +1766,22 @@ impl Index {
&self,
rtxn: &RoTxn<'_>,
docid: DocumentId,
) -> Result<BTreeMap<String, (Vec<Embedding>, bool)>> {
) -> Result<BTreeMap<String, EmbeddingsWithMetadata>> {
let mut res = BTreeMap::new();
let embedders = self.embedding_configs();
for config in embedders.embedding_configs(rtxn)? {
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
let has_fragments = config.config.embedder_options.has_fragments();
let reader = ArroyWrapper::new(
self.vector_arroy,
embedder_info.embedder_id,
config.config.quantized(),
);
let embeddings = reader.item_vectors(rtxn, docid)?;
let regenerate = embedder_info.embedding_status.must_regenerate(docid);
res.insert(
config.name.to_owned(),
(embeddings, embedder_info.embedding_status.must_regenerate(docid)),
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
);
}
Ok(res)
@@ -1919,6 +1921,12 @@ impl Index {
}
}
pub struct EmbeddingsWithMetadata {
pub embeddings: Vec<Embedding>,
pub regenerate: bool,
pub has_fragments: bool,
}
#[derive(Debug, Default, Deserialize, Serialize)]
pub struct ChatConfig {
pub description: String,

View File

@@ -43,12 +43,13 @@ use std::fmt;
use std::hash::BuildHasherDefault;
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
pub use documents::GeoSortStrategy;
pub use filter_parser::{Condition, FilterCondition, Span, Token};
use fxhash::{FxHasher32, FxHasher64};
pub use grenad::CompressionType;
pub use search::new::{
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, SearchContext,
SearchLogger, VisualSearchLogger,
execute_search, filtered_universe, DefaultSearchLogger, SearchContext, SearchLogger,
VisualSearchLogger,
};
use serde_json::Value;
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};

View File

@@ -210,6 +210,7 @@ impl Search<'_> {
scoring_strategy: ScoringStrategy::Detailed,
words_limit: self.words_limit,
exhaustive_number_hits: self.exhaustive_number_hits,
max_total_hits: self.max_total_hits,
rtxn: self.rtxn,
index: self.index,
semantic: self.semantic.clone(),

View File

@@ -9,6 +9,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats};
use crate::documents::GeoSortParameter;
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::index::MatchingStrategy;
use crate::score_details::{ScoreDetails, ScoringStrategy};
@@ -47,11 +48,12 @@ pub struct Search<'a> {
sort_criteria: Option<Vec<AscDesc>>,
distinct: Option<String>,
searchable_attributes: Option<&'a [String]>,
geo_param: new::GeoSortParameter,
geo_param: GeoSortParameter,
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
words_limit: usize,
exhaustive_number_hits: bool,
max_total_hits: Option<usize>,
rtxn: &'a heed::RoTxn<'a>,
index: &'a Index,
semantic: Option<SemanticSearch>,
@@ -70,10 +72,11 @@ impl<'a> Search<'a> {
sort_criteria: None,
distinct: None,
searchable_attributes: None,
geo_param: new::GeoSortParameter::default(),
geo_param: GeoSortParameter::default(),
terms_matching_strategy: TermsMatchingStrategy::default(),
scoring_strategy: Default::default(),
exhaustive_number_hits: false,
max_total_hits: None,
words_limit: 10,
rtxn,
index,
@@ -147,7 +150,7 @@ impl<'a> Search<'a> {
}
#[cfg(test)]
pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
pub fn geo_sort_strategy(&mut self, strategy: crate::GeoSortStrategy) -> &mut Search<'a> {
self.geo_param.strategy = strategy;
self
}
@@ -165,6 +168,11 @@ impl<'a> Search<'a> {
self
}
pub fn max_total_hits(&mut self, max_total_hits: Option<usize>) -> &mut Search<'a> {
self.max_total_hits = max_total_hits;
self
}
pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
self.time_budget = time_budget;
self
@@ -243,6 +251,8 @@ impl<'a> Search<'a> {
&mut ctx,
vector,
self.scoring_strategy,
self.exhaustive_number_hits,
self.max_total_hits,
universe,
&self.sort_criteria,
&self.distinct,
@@ -261,6 +271,7 @@ impl<'a> Search<'a> {
self.terms_matching_strategy,
self.scoring_strategy,
self.exhaustive_number_hits,
self.max_total_hits,
universe,
&self.sort_criteria,
&self.distinct,
@@ -314,6 +325,7 @@ impl fmt::Debug for Search<'_> {
scoring_strategy,
words_limit,
exhaustive_number_hits,
max_total_hits,
rtxn: _,
index: _,
semantic,
@@ -333,6 +345,7 @@ impl fmt::Debug for Search<'_> {
.field("terms_matching_strategy", terms_matching_strategy)
.field("scoring_strategy", scoring_strategy)
.field("exhaustive_number_hits", exhaustive_number_hits)
.field("max_total_hits", max_total_hits)
.field("words_limit", words_limit)
.field(
"semantic.embedder_name",

View File

@@ -32,6 +32,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger: &mut dyn SearchLogger<Q>,
time_budget: TimeBudget,
ranking_score_threshold: Option<f64>,
exhaustive_number_hits: bool,
max_total_hits: Option<usize>,
) -> Result<BucketSortOutput> {
logger.initial_query(query);
logger.ranking_rules(&ranking_rules);
@@ -159,7 +161,13 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
};
}
while valid_docids.len() < length {
let max_len_to_evaluate =
match (max_total_hits, exhaustive_number_hits && ranking_score_threshold.is_some()) {
(Some(max_total_hits), true) => max_total_hits,
_ => length,
};
while valid_docids.len() < max_len_to_evaluate {
if time_budget.exceeded() {
loop {
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);

View File

@@ -82,7 +82,7 @@ fn facet_value_docids(
}
/// Return an iterator over each number value in the given field of the given document.
fn facet_number_values<'a>(
pub(crate) fn facet_number_values<'a>(
docid: u32,
field_id: u16,
index: &Index,
@@ -118,7 +118,7 @@ pub fn facet_string_values<'a>(
}
#[allow(clippy::drop_non_drop)]
fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] {
pub(crate) fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] {
concat_arrays::concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes())
}

View File

@@ -1,96 +1,18 @@
use std::collections::VecDeque;
use heed::types::{Bytes, Unit};
use heed::{RoPrefix, RoTxn};
use roaring::RoaringBitmap;
use rstar::RTree;
use super::facet_string_values;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
use crate::documents::geo_sort::{fill_cache, next_bucket};
use crate::documents::{GeoSortParameter, GeoSortStrategy};
use crate::score_details::{self, ScoreDetails};
use crate::{
distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext,
SearchLogger,
};
const FID_SIZE: usize = 2;
const DOCID_SIZE: usize = 4;
#[allow(clippy::drop_non_drop)]
fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] {
concat_arrays::concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes())
}
/// Return an iterator over each number value in the given field of the given document.
fn facet_number_values<'a>(
docid: u32,
field_id: u16,
index: &Index,
txn: &'a RoTxn<'a>,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<OrderedF64Codec>, Unit>> {
let key = facet_values_prefix_key(field_id, docid);
let iter = index
.field_id_docid_facet_f64s
.remap_key_type::<Bytes>()
.prefix_iter(txn, &key)?
.remap_key_type();
Ok(iter)
}
#[derive(Debug, Clone, Copy)]
pub struct Parameter {
// Define the strategy used by the geo sort
pub strategy: Strategy,
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
pub max_bucket_size: u64,
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
pub distance_error_margin: f64,
}
impl Default for Parameter {
fn default() -> Self {
Self { strategy: Strategy::default(), max_bucket_size: 1000, distance_error_margin: 1.0 }
}
}
/// Define the strategy used by the geo sort.
/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
/// the point where we move from using the iterative strategy to the rtree.
#[derive(Debug, Clone, Copy)]
pub enum Strategy {
AlwaysIterative(usize),
AlwaysRtree(usize),
Dynamic(usize),
}
impl Default for Strategy {
fn default() -> Self {
Strategy::Dynamic(1000)
}
}
impl Strategy {
pub fn use_rtree(&self, candidates: usize) -> bool {
match self {
Strategy::AlwaysIterative(_) => false,
Strategy::AlwaysRtree(_) => true,
Strategy::Dynamic(i) => candidates >= *i,
}
}
pub fn cache_size(&self) -> usize {
match self {
Strategy::AlwaysIterative(i) | Strategy::AlwaysRtree(i) | Strategy::Dynamic(i) => *i,
}
}
}
use crate::{GeoPoint, Result, SearchContext, SearchLogger};
pub struct GeoSort<Q: RankingRuleQueryTrait> {
query: Option<Q>,
strategy: Strategy,
strategy: GeoSortStrategy,
ascending: bool,
point: [f64; 2],
field_ids: Option<[u16; 2]>,
@@ -107,12 +29,12 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
pub fn new(
parameter: Parameter,
parameter: GeoSortParameter,
geo_faceted_docids: RoaringBitmap,
point: [f64; 2],
ascending: bool,
) -> Result<Self> {
let Parameter { strategy, max_bucket_size, distance_error_margin } = parameter;
let GeoSortParameter { strategy, max_bucket_size, distance_error_margin } = parameter;
Ok(Self {
query: None,
strategy,
@@ -134,98 +56,22 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
ctx: &mut SearchContext<'_>,
geo_candidates: &RoaringBitmap,
) -> Result<()> {
debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng");
debug_assert!(self.cached_sorted_docids.is_empty());
// lazily initialize the rtree if needed by the strategy, and cache it in `self.rtree`
let rtree = if self.strategy.use_rtree(geo_candidates.len() as usize) {
if let Some(rtree) = self.rtree.as_ref() {
// get rtree from cache
Some(rtree)
} else {
let rtree = ctx.index.geo_rtree(ctx.txn)?.expect("geo candidates but no rtree");
// insert rtree in cache and returns it.
// Can't use `get_or_insert_with` because getting the rtree from the DB is a fallible operation.
Some(&*self.rtree.insert(rtree))
}
} else {
None
};
let cache_size = self.strategy.cache_size();
if let Some(rtree) = rtree {
if self.ascending {
let point = lat_lng_to_xyz(&self.point);
for point in rtree.nearest_neighbor_iter(&point) {
if geo_candidates.contains(point.data.0) {
self.cached_sorted_docids.push_back(point.data);
if self.cached_sorted_docids.len() >= cache_size {
break;
}
}
}
} else {
// in the case of the desc geo sort we look for the closest point to the opposite of the queried point
// and we insert the points in reverse order they get reversed when emptying the cache later on
let point = lat_lng_to_xyz(&opposite_of(self.point));
for point in rtree.nearest_neighbor_iter(&point) {
if geo_candidates.contains(point.data.0) {
self.cached_sorted_docids.push_front(point.data);
if self.cached_sorted_docids.len() >= cache_size {
break;
}
}
}
}
} else {
// the iterative version
let [lat, lng] = self.field_ids.unwrap();
let mut documents = geo_candidates
.iter()
.map(|id| -> Result<_> { Ok((id, geo_value(id, lat, lng, ctx.index, ctx.txn)?)) })
.collect::<Result<Vec<(u32, [f64; 2])>>>()?;
// computing the distance between two points is expensive thus we cache the result
documents
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
self.cached_sorted_docids.extend(documents);
};
fill_cache(
ctx.index,
ctx.txn,
self.strategy,
self.ascending,
self.point,
&self.field_ids,
&mut self.rtree,
geo_candidates,
&mut self.cached_sorted_docids,
)?;
Ok(())
}
}
/// Extracts the lat and long values from a single document.
///
/// If it is not able to find it in the facet number index it will extract it
/// from the facet string index and parse it as f64 (as the geo extraction behaves).
fn geo_value(
docid: u32,
field_lat: u16,
field_lng: u16,
index: &Index,
rtxn: &RoTxn<'_>,
) -> Result<[f64; 2]> {
let extract_geo = |geo_field: u16| -> Result<f64> {
match facet_number_values(docid, geo_field, index, rtxn)?.next() {
Some(Ok(((_, _, geo), ()))) => Ok(geo),
Some(Err(e)) => Err(e.into()),
None => match facet_string_values(docid, geo_field, index, rtxn)?.next() {
Some(Ok((_, geo))) => {
Ok(geo.parse::<f64>().expect("cannot parse geo field as f64"))
}
Some(Err(e)) => Err(e.into()),
None => panic!("A geo faceted document doesn't contain any lat or lng"),
},
}
};
let lat = extract_geo(field_lat)?;
let lng = extract_geo(field_lng)?;
Ok([lat, lng])
}
impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
fn id(&self) -> String {
"geo_sort".to_owned()
@@ -267,124 +113,33 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
) -> Result<Option<RankingRuleOutput<Q>>> {
let query = self.query.as_ref().unwrap().clone();
let mut geo_candidates = &self.geo_candidates & universe;
if geo_candidates.is_empty() {
return Ok(Some(RankingRuleOutput {
next_bucket(
ctx.index,
ctx.txn,
universe,
self.ascending,
self.point,
&self.field_ids,
&mut self.rtree,
&mut self.cached_sorted_docids,
&self.geo_candidates,
GeoSortParameter {
strategy: self.strategy,
max_bucket_size: self.max_bucket_size,
distance_error_margin: self.distance_error_margin,
},
)
.map(|o| {
o.map(|(candidates, point)| RankingRuleOutput {
query,
candidates: universe.clone(),
candidates,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: None,
value: point,
}),
}));
}
let ascending = self.ascending;
let next = |cache: &mut VecDeque<_>| {
if ascending {
cache.pop_front()
} else {
cache.pop_back()
}
};
let put_back = |cache: &mut VecDeque<_>, x: _| {
if ascending {
cache.push_front(x)
} else {
cache.push_back(x)
}
};
let mut current_bucket = RoaringBitmap::new();
// current_distance stores the first point and distance in current bucket
let mut current_distance: Option<([f64; 2], f64)> = None;
loop {
// The loop will only exit when we have found all points with equal distance or have exhausted the candidates.
if let Some((id, point)) = next(&mut self.cached_sorted_docids) {
if geo_candidates.contains(id) {
let distance = distance_between_two_points(&self.point, &point);
if let Some((point0, bucket_distance)) = current_distance.as_ref() {
if (bucket_distance - distance).abs() > self.distance_error_margin {
// different distance, point belongs to next bucket
put_back(&mut self.cached_sorted_docids, (id, point));
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point0.to_owned()),
}),
}));
} else {
// same distance, point belongs to current bucket
current_bucket.insert(id);
// remove from cadidates to prevent it from being added to the cache again
geo_candidates.remove(id);
// current bucket size reaches limit, force return
if current_bucket.len() == self.max_bucket_size {
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point0.to_owned()),
}),
}));
}
}
} else {
// first doc in current bucket
current_distance = Some((point, distance));
current_bucket.insert(id);
geo_candidates.remove(id);
// current bucket size reaches limit, force return
if current_bucket.len() == self.max_bucket_size {
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point.to_owned()),
}),
}));
}
}
}
} else {
// cache exhausted, we need to refill it
self.fill_buffer(ctx, &geo_candidates)?;
if self.cached_sorted_docids.is_empty() {
// candidates exhausted, exit
if let Some((point0, _)) = current_distance.as_ref() {
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point0.to_owned()),
}),
}));
} else {
return Ok(Some(RankingRuleOutput {
query,
candidates: universe.clone(),
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: None,
}),
}));
}
}
}
}
})
})
}
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
@@ -394,16 +149,3 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
self.cached_sorted_docids.clear();
}
}
/// Compute the antipodal coordinate of `coord`
fn opposite_of(mut coord: [f64; 2]) -> [f64; 2] {
coord[0] *= -1.;
// in the case of x,0 we want to return x,180
if coord[1] > 0. {
coord[1] -= 180.;
} else {
coord[1] += 180.;
}
coord
}

View File

@@ -510,6 +510,7 @@ mod tests {
crate::TermsMatchingStrategy::default(),
crate::score_details::ScoringStrategy::Skip,
false,
None,
universe,
&None,
&None,

View File

@@ -1,7 +1,7 @@
mod bucket_sort;
mod db_cache;
mod distinct;
mod geo_sort;
pub(crate) mod geo_sort;
mod graph_based_ranking_rule;
mod interner;
mod limits;
@@ -46,14 +46,14 @@ use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
use roaring::RoaringBitmap;
use sort::Sort;
use self::distinct::facet_string_values;
pub(crate) use self::distinct::{facet_string_values, facet_values_prefix_key};
use self::geo_sort::GeoSort;
pub use self::geo_sort::{Parameter as GeoSortParameter, Strategy as GeoSortStrategy};
use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
use self::vector_sort::VectorSort;
use crate::attribute_patterns::{match_pattern, PatternMatch};
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::documents::GeoSortParameter;
use crate::index::PrefixSearch;
use crate::localized_attributes_rules::LocalizedFieldIds;
use crate::score_details::{ScoreDetails, ScoringStrategy};
@@ -319,7 +319,7 @@ fn resolve_negative_phrases(
fn get_ranking_rules_for_placeholder_search<'ctx>(
ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>,
geo_param: geo_sort::Parameter,
geo_param: GeoSortParameter,
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
let mut sort = false;
let mut sorted_fields = HashSet::new();
@@ -371,7 +371,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
fn get_ranking_rules_for_vector<'ctx>(
ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>,
geo_param: geo_sort::Parameter,
geo_param: GeoSortParameter,
limit_plus_offset: usize,
target: &[f32],
embedder_name: &str,
@@ -448,7 +448,7 @@ fn get_ranking_rules_for_vector<'ctx>(
fn get_ranking_rules_for_query_graph_search<'ctx>(
ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>,
geo_param: geo_sort::Parameter,
geo_param: GeoSortParameter,
terms_matching_strategy: TermsMatchingStrategy,
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
// query graph search
@@ -559,7 +559,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
ranking_rules: &mut Vec<BoxRankingRule<'ctx, Query>>,
sorted_fields: &mut HashSet<String>,
geo_sorted: &mut bool,
geo_param: geo_sort::Parameter,
geo_param: GeoSortParameter,
) -> Result<()> {
let sort_criteria = sort_criteria.clone().unwrap_or_default();
ranking_rules.reserve(sort_criteria.len());
@@ -626,10 +626,12 @@ pub fn execute_vector_search(
ctx: &mut SearchContext<'_>,
vector: &[f32],
scoring_strategy: ScoringStrategy,
exhaustive_number_hits: bool,
max_total_hits: Option<usize>,
universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_param: geo_sort::Parameter,
geo_param: GeoSortParameter,
from: usize,
length: usize,
embedder_name: &str,
@@ -669,6 +671,8 @@ pub fn execute_vector_search(
placeholder_search_logger,
time_budget,
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
)?;
Ok(PartialSearchResult {
@@ -689,10 +693,11 @@ pub fn execute_search(
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
exhaustive_number_hits: bool,
max_total_hits: Option<usize>,
mut universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_param: geo_sort::Parameter,
geo_param: GeoSortParameter,
from: usize,
length: usize,
words_limit: Option<usize>,
@@ -825,6 +830,8 @@ pub fn execute_search(
query_graph_logger,
time_budget,
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
)?
} else {
let ranking_rules =
@@ -841,6 +848,8 @@ pub fn execute_search(
placeholder_search_logger,
time_budget,
ranking_score_threshold,
exhaustive_number_hits,
max_total_hits,
)?
};
@@ -872,7 +881,7 @@ pub fn execute_search(
})
}
fn check_sort_criteria(
pub(crate) fn check_sort_criteria(
ctx: &SearchContext<'_>,
sort_criteria: Option<&Vec<AscDesc>>,
) -> Result<()> {
@@ -902,7 +911,7 @@ fn check_sort_criteria(
let (valid_fields, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
return Err(UserError::InvalidSortableAttribute {
return Err(UserError::InvalidSearchSortableAttribute {
field: field.to_string(),
valid_fields,
hidden_fields,
@@ -913,7 +922,7 @@ fn check_sort_criteria(
let (valid_fields, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
return Err(UserError::InvalidSortableAttribute {
return Err(UserError::InvalidSearchSortableAttribute {
field: RESERVED_GEO_FIELD_NAME.to_string(),
valid_fields,
hidden_fields,

View File

@@ -93,7 +93,7 @@ pub struct ChatSearchParams {
pub hybrid: Setting<HybridQuery>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default = Setting::Set(20))]
#[deserr(default)]
#[schema(value_type = Option<usize>)]
pub limit: Setting<usize>,

View File

@@ -2,7 +2,7 @@ use heed::RwTxn;
use roaring::RoaringBitmap;
use time::OffsetDateTime;
use crate::{FieldDistribution, Index, Result};
use crate::{database_stats::DatabaseStats, FieldDistribution, Index, Result};
pub struct ClearDocuments<'t, 'i> {
wtxn: &'t mut RwTxn<'i>,
@@ -92,6 +92,10 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
documents.clear(self.wtxn)?;
// Update the stats of the documents database after clearing all documents.
let stats = DatabaseStats::new(self.index.documents.remap_data_type(), self.wtxn)?;
self.index.put_documents_stats(self.wtxn, stats)?;
Ok(number_of_documents)
}
}
@@ -122,6 +126,9 @@ mod tests {
let rtxn = index.read_txn().unwrap();
// Variables for statistics verification
let stats = index.documents_stats(&rtxn).unwrap().unwrap();
// the value is 7 because there is `[id, name, age, country, _geo, _geo.lng, _geo.lat]`
assert_eq!(index.fields_ids_map(&rtxn).unwrap().len(), 7);
@@ -142,5 +149,9 @@ mod tests {
assert!(index.field_id_docid_facet_f64s.is_empty(&rtxn).unwrap());
assert!(index.field_id_docid_facet_strings.is_empty(&rtxn).unwrap());
assert!(index.documents.is_empty(&rtxn).unwrap());
// Verify that the statistics are correctly updated after clearing documents
assert_eq!(index.number_of_documents(&rtxn).unwrap(), 0);
assert_eq!(stats.number_of_entries(), 0);
}
}

View File

@@ -23,7 +23,7 @@ use crate::progress::EmbedderStats;
use crate::prompt::Prompt;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::db::{EmbedderInfo, EmbeddingStatus, EmbeddingStatusDelta};
use crate::vector::db::{EmbedderInfo, EmbeddingStatusDelta};
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
use crate::vector::extractor::{Extractor, ExtractorDiff, RequestFragmentExtractor};
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState};
@@ -441,6 +441,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
{
let embedder_is_manual = matches!(*runtime.embedder, Embedder::UserProvided(_));
let (old_is_user_provided, old_must_regenerate) =
embedder_info.embedding_status.is_user_provided_must_regenerate(docid);
let (old, new) = parsed_vectors.remove(embedder_name);
let new_must_regenerate = new.must_regenerate();
let delta = match action {
@@ -499,16 +501,19 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
let is_adding_fragments = has_fragments && !old_has_fragments;
if is_adding_fragments {
if !has_fragments {
// removing fragments
regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)?
} else if is_adding_fragments ||
// regenerate all fragments when going from user provided to ! user provided
old_is_user_provided
{
regenerate_all_fragments(
runtime.fragments(),
&doc_alloc,
new_fields_ids_map,
obkv,
)
} else if !has_fragments {
// removing fragments
regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)?
} else {
let mut fragment_diff = Vec::new();
let new_fields_ids_map = new_fields_ids_map.as_fields_ids_map();
@@ -600,7 +605,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
docid,
&delta,
new_must_regenerate,
&embedder_info.embedding_status,
old_is_user_provided,
old_must_regenerate,
);
// and we finally push the unique vectors into the writer
@@ -657,10 +663,9 @@ fn push_embedding_status_delta(
docid: DocumentId,
delta: &VectorStateDelta,
new_must_regenerate: bool,
embedding_status: &EmbeddingStatus,
old_is_user_provided: bool,
old_must_regenerate: bool,
) {
let (old_is_user_provided, old_must_regenerate) =
embedding_status.is_user_provided_must_regenerate(docid);
let new_is_user_provided = match delta {
VectorStateDelta::NoChange => old_is_user_provided,
VectorStateDelta::NowRemoved => {

View File

@@ -16,6 +16,7 @@ pub struct IndexerConfig {
pub max_positions_per_attributes: Option<u32>,
pub skip_index_budget: bool,
pub experimental_no_edition_2024_for_settings: bool,
pub experimental_no_edition_2024_for_dumps: bool,
}
impl IndexerConfig {
@@ -65,6 +66,7 @@ impl Default for IndexerConfig {
max_positions_per_attributes: None,
skip_index_budget: false,
experimental_no_edition_2024_for_settings: false,
experimental_no_edition_2024_for_dumps: false,
}
}
}

Some files were not shown because too many files have changed in this diff Show More