Compare commits

...

310 Commits

Author SHA1 Message Date
Louis Dureuil
1bf6e5030a WIP 2025-11-18 17:56:01 +01:00
Louis Dureuil
b757db5cf6 Pass task network at registration time 2025-11-18 10:37:54 +01:00
Louis Dureuil
47f87ba17d WIP 2025-11-17 15:48:43 +01:00
Clément Renault
ea70a7d1c9 Merge pull request #5969 from xuhongxu96/main
Remove unused dependency `allocator-api2`
2025-11-15 10:03:15 +00:00
Clément Renault
9304f8e586 Merge pull request #5991 from meilisearch/release-v1.26.0
Release v1.26.0
2025-11-13 17:54:01 +00:00
Louis Dureuil
495db080ec Upgrade snap 2025-11-13 17:52:34 +01:00
Louis Dureuil
d71341fa48 Suport upgrade to v1.26.0 2025-11-13 17:52:02 +01:00
Louis Dureuil
5b3070d8c3 Update version in toml and lock 2025-11-13 17:35:26 +01:00
Louis Dureuil
89006fd4b3 Merge pull request #5980 from hayatosc/feat/hugging-face-modernbert
Support ModernBERT architecture on `huggingface` embedder
2025-11-10 18:03:35 +00:00
Louis Dureuil
49f50a0a21 Don't collect the views 2025-11-10 17:55:44 +01:00
Louis Dureuil
1104f00803 happy clippy 2025-11-10 16:59:12 +01:00
Louis Dureuil
33fa564a9c rustfmt 2025-11-10 16:56:13 +01:00
Clément Renault
a097b254f8 Merge pull request #5963 from meilisearch/engprod-2128-allow-attaching-user-defined-metadata-to-tasks-and-return
Allow to attach `customMetadata` in the document addition or update tasks
2025-11-10 15:48:46 +00:00
Clément Renault
54cb0ec437 Merge pull request #5984 from meilisearch/embedder-error-modes
Embedder failure modes
2025-11-10 15:34:01 +00:00
Louis Dureuil
38ed1f1dbb Change parsing of environment variable 2025-11-10 15:08:24 +01:00
Clément Renault
643dd33358 Merge pull request #5982 from meilisearch/bump-meilisearch-v1.25.0
Bump meilisearch v1.25.0
2025-11-10 14:04:17 +00:00
Louis Dureuil
32f9fb6ab2 fix environment variable values 2025-11-10 14:54:25 +01:00
Louis Dureuil
b5966f82e8 Make max retry duration configurable with MEILI_EXPERIMENTAL_REST_EMBEDDER_MAX_RETRY_DURATION_SECONDS 2025-11-10 14:29:27 +01:00
Louis Dureuil
5e54063aab Configurable timeout with MEILI_EXPERIMENTAL_REST_EMBEDDER_TIMEOUT_SECONDS 2025-11-10 14:29:20 +01:00
Louis Dureuil
40456795d0 Allow to customize failure modes with MEILI_EXPERIMENTAL_CONFIG_EMBEDDER_FAILURE_MODES 2025-11-10 14:23:51 +01:00
ManyTheFish
40e60c6f52 Fix dumpless upgrade 2025-11-10 14:03:17 +01:00
ManyTheFish
eeae6383d0 Bump Meilisearch version v1.25.0 2025-11-10 14:03:17 +01:00
Clément Renault
8cbcaeff56 Merge pull request #5981 from meilisearch/charabia-v0.9.8
Update Charabia v0.9.8
2025-11-10 09:45:30 +00:00
ManyTheFish
ce87d5a89e Update Charabia v0.9.8 2025-11-10 09:33:31 +01:00
Hayato Sakaguchi
9f7172f6ab chage tensor names 2025-11-09 01:06:04 +09:00
Hayato Sakaguchi
d6eca83cfa Support modernbert architecture in hugging face embedder 2025-11-08 20:53:47 +09:00
Louis Dureuil
a9d6e86077 Merge pull request #5775 from meilisearch/experimental-search-personnalization
Experimental search personalization
2025-11-06 18:45:18 +00:00
Clément Renault
346f9efe3a Merge pull request #5977 from meilisearch/fix-rust-analyzer-false-positive
Fix error that rust-analyzer reports because it is compiling all code with the `test` cfg
2025-11-06 18:42:43 +00:00
Louis Dureuil
a987d698c1 Fix error that rust-analyzer reports because it is compiling all code with the test cfg 2025-11-06 18:10:59 +01:00
Louis Dureuil
fc3508c8c8 Fix route path 2025-11-06 18:08:50 +01:00
ManyTheFish
dbb45dec1a ignore flaky test 2025-11-06 17:55:27 +01:00
ManyTheFish
5f69a43846 Early return if time budget is already exceeded 2025-11-06 17:55:27 +01:00
ManyTheFish
fe1e4814fa Return an error when personalize is used in federated queries 2025-11-06 17:55:27 +01:00
ManyTheFish
c29749741b Use the time budget instead of defining a deadline outside the scope 2025-11-06 17:55:27 +01:00
ManyTheFish
3e47201365 Fix too many argument clippy warning 2025-11-06 17:55:27 +01:00
ManyTheFish
ec9719f3b1 Fix simple PR comments 2025-11-06 17:55:27 +01:00
ManyTheFish
b2cc9e4db8 remove useless clones 2025-11-06 17:55:27 +01:00
ManyTheFish
56198bae48 Initialize personalization API once 2025-11-06 17:55:27 +01:00
ManyTheFish
888059b2d0 Fix PR comments 2025-11-06 17:55:27 +01:00
ManyTheFish
410f2fc8c3 add some failure tests for personalization 2025-11-06 17:55:01 +01:00
ManyTheFish
54e244d2f3 Return an error when the feature is disabled 2025-11-06 17:55:01 +01:00
ManyTheFish
e0c36972fb remove deadcode 2025-11-06 17:55:01 +01:00
ManyTheFish
daadcddb5e Reduce personalization footprint on the codebase 2025-11-06 17:55:01 +01:00
ManyTheFish
7f92dafa02 User context is no more optional 2025-11-06 17:55:01 +01:00
ManyTheFish
cc5d12a368 FIx padding 2025-11-06 17:55:01 +01:00
ManyTheFish
0f98b996b5 Fix PR comments 2025-11-06 17:55:01 +01:00
ManyTheFish
d005ca5bf7 remove irrelevant tests 2025-11-06 17:55:01 +01:00
ManyTheFish
7e65fb1d3e feat(metrics): add personalization count to metrics endpoint
- Add MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS metric to track personalized searches
- Increment metric directly in search analytics when personalization is used
- Metric automatically exposed in /metrics endpoint for monitoring
2025-11-06 17:55:01 +01:00
ManyTheFish
cdefb3f665 feat(analytics): add personalization tracking to segment analytics
- Add total_personalized field to SearchAggregator to track personalization usage
- Track when search requests include personalization parameters
- Include personalization data in analytics JSON output
- Maintain clean personalization service interface
2025-11-06 17:55:01 +01:00
ManyTheFish
a91887221a refactor(personalization): improve Cohere reranking logic and error handling
- Replace and_then() with early return for missing personalization
- Simplify reranking by building new hits vector instead of swapping
- Add debug logging for reranked indices
- Fix potential index out-of-bounds issues in reranking
2025-11-06 17:55:01 +01:00
ManyTheFish
9c66b20a97 refactor: split PersonalizationService into enum with CohereService
- Refactor PersonalizationService as enum with Cohere and Uninitialized variants
- Create dedicated CohereService struct with rerank_search_results method
- Split constructor into cohere() and uninitialized() methods
- Move all Cohere logic into CohereService for better separation of concerns
- Update tests and lib.rs to use new API
- Improve code organization and maintainability
2025-11-06 17:55:01 +01:00
ManyTheFish
a48283527e feat: refine personalization query by merging with user context
- Merge initial query with user context to create a comprehensive prompt
- Only skip reranking if both query and user_context are None
- Support reranking with query-only, user_context-only, or both
- Use 'let else' pattern for cleaner error handling
- Add comprehensive tests for different parameter combinations
- Improve prompt format for better reranking effectiveness
2025-11-06 17:55:01 +01:00
ManyTheFish
73f78c19b0 refactor: rename personalization API fields and move checks inside service
- Rename 'personalization' field to 'personalize' in API
- Rename 'userProfile' to 'userContext' in personalization object
- Remove 'personalized' boolean field (activation now based on non-null 'personalize')
- Move personalization checks inside rerank_search_results function
- Use 'let else' pattern for better error handling
- Update error types and messages to reflect new field names
- Update all search routes and analytics to use new field names
2025-11-06 17:55:01 +01:00
ManyTheFish
34639e346e feat: add personalization service with EnglishV3-only reranking
- Add new personalization module with Cohere integration
- Implement rerank_search_results method using EnglishV3 model
- Remove fallback logic to EnglishV2 for simplified behavior
- Add comprehensive error handling and logging
- Include unit tests for service behavior
- Update search route to support personalization feature
2025-11-06 17:55:01 +01:00
ManyTheFish
7af2a254d6 feat: add personalization parameters to /search route
- Add Personalization struct with personalized boolean and user_profile string
- Add personalizationPersonalized and personalizationUserProfile query parameters to SearchQueryGet
- Follow same pattern as hybrid parameters (hybridEmbedder, hybridSemanticRatio)
- Add validation: personalizationUserProfile requires personalizationPersonalized
- Add error codes for personalization parameters
- Update analytics and facet search to handle new personalization field
- Remove serde dependencies from Personalization struct, use Deserr only
2025-11-06 17:55:01 +01:00
ManyTheFish
0f9d262a1c feat: add experimental_personalization_api_key feature to RoFeatures
- Add MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY environment variable
- Add experimental_personalization_api_key field to Opt struct with CLI and env support
- Add experimental_personalization_api_key field to InstanceTogglableFeatures
- Store personalization API key in FeatureData for access through IndexScheduler
- Add experimental_personalization_api_key() method to IndexScheduler
- Update analytics destructuring to include new field
- Maintain RoFeatures Copy trait while properly handling Option<String>
2025-11-06 17:55:01 +01:00
Louis Dureuil
747476a225 Add customMetadata to all documents routes 2025-11-06 17:07:18 +01:00
Clément Renault
34765b556b Merge pull request #5948 from meilisearch/new-s3-snapshots
Upload snapshot tarballs to S3
2025-11-06 15:10:37 +00:00
Kerollmops
dfb4860578 Make some parameters experimental and link the dedicated issue 2025-11-06 12:43:12 +01:00
Kerollmops
ce62713f02 Always create the update_files directory 2025-11-06 12:43:12 +01:00
Kerollmops
8b5d04d60f Move the code to append a file to the tarball in a dedicated function 2025-11-06 12:07:08 +01:00
Kerollmops
1b74709b91 Extract more logic into dedicated functions 2025-11-06 12:00:25 +01:00
Kerollmops
a5c0a282c5 Better document safety problems and unwraps 2025-11-06 11:26:45 +01:00
Kerollmops
4fc048ff20 Better handle errors when trying to clone the handles 2025-11-06 11:15:34 +01:00
Kerollmops
375b5600cd Move the tarball streaming into a dedicated function 2025-11-06 11:11:26 +01:00
Kerollmops
32b997d817 Extracts the streaming closure into an async function 2025-11-06 11:04:16 +01:00
Kerollmops
ff3090e3cc Remove the slash-path dependency 2025-11-06 10:53:07 +01:00
Kerollmops
6c6645f945 Better seeking-to-the-start explanation 2025-11-06 10:45:47 +01:00
Clément Renault
af6473d999 Improve the comments
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-11-06 10:43:14 +01:00
Kerollmops
11851f9701 Fix warnings on Windows 2025-11-05 16:40:33 +01:00
Kerollmops
cc4654eabd Remove useless clippy flag 2025-11-05 16:38:05 +01:00
Kerollmops
0bb91f4a77 Immediately panic when using S3 snapshot options on Windows 2025-11-05 15:16:43 +01:00
Kerollmops
f9d57f54df Keep the smae naming behavior as before 2025-11-05 15:16:43 +01:00
Kerollmops
3ef1afc0f1 Introduce some backoff retries 2025-11-05 15:16:43 +01:00
Kerollmops
dbb5abebb6 Support cancelation 2025-11-05 15:16:42 +01:00
Kerollmops
700f33bd39 Clean up the code 2025-11-05 15:16:42 +01:00
Kerollmops
d01bbbccde Support clean CLI options 2025-11-05 15:16:42 +01:00
Kerollmops
4fc506f267 Remove unused imports/code on Windows 2025-11-05 15:16:42 +01:00
Kerollmops
dc456276e5 Make clippy happy 2025-11-05 15:16:42 +01:00
Kerollmops
b2ea50cb10 Make the compression level configurable 2025-11-05 15:16:42 +01:00
Kerollmops
5074cf92ab Disable compression entirely to avoid being CPU bound 2025-11-05 15:16:42 +01:00
Clément Renault
a92bc8d192 Improve the way we create the snapshot path 2025-11-05 15:16:42 +01:00
Clément Renault
ee538cf045 Remove useless dependencies 2025-11-05 15:16:42 +01:00
Kerollmops
2b05d63a0f Make it finaly work but without async on the write side 2025-11-05 15:16:42 +01:00
Kerollmops
104e8918ce Seeking the tasks/data.mdb file to the begining made the trick 2025-11-05 15:16:42 +01:00
Kerollmops
d6ec4d4f4a Improve understanding of S3-related errors 2025-11-05 15:16:42 +01:00
Kerollmops
f0e7326b7a Retrieve the bytesMut only when released 2025-11-05 15:16:42 +01:00
Kerollmops
c8106a0006 Fix minimum part size 2025-11-05 15:16:42 +01:00
Kerollmops
c9ab5bc0b6 Improve error messaging when missing env var 2025-11-05 15:16:42 +01:00
Clément Renault
5e0f15fd43 WIP 2025-11-05 15:16:42 +01:00
Kerollmops
4c30f090c7 WIP Do more tests 2025-11-05 15:16:42 +01:00
Clément Renault
63f247cdda WIP sending multiparts of 250MiB 2025-11-05 15:16:42 +01:00
Clément Renault
e109fa9529 Rename the update_path function 2025-11-05 15:16:42 +01:00
Clément Renault
76e4ec2168 Geenrate an async tarball 2025-11-05 15:16:42 +01:00
Kerollmops
982babdb74 WIP 2025-11-05 15:16:42 +01:00
Kerollmops
7ae2ae33d9 Make max in flights parts fro upload configurable 2025-11-05 15:16:42 +01:00
Kerollmops
cb0788ae07 Use a good mem advice for uploads 2025-11-05 15:16:42 +01:00
Kerollmops
cb3e5dc234 Move the S3 snapshots to disk into a dedicated method 2025-11-05 15:16:41 +01:00
Clément Renault
59d40a2821 Upload ten parts at a time 2025-11-05 15:16:41 +01:00
Clément Renault
98a678e73d Use the Bytes crate to send the parts 2025-11-05 15:16:41 +01:00
Clément Renault
70292aae3c Upload indexes under their uuids 2025-11-05 15:16:41 +01:00
Clément Renault
73521f0069 Initial working S3 uploads to RustFS 2025-11-05 15:16:41 +01:00
Louis Dureuil
4533179604 Pass tokio handle to index-scheduler 2025-11-05 15:16:41 +01:00
Clément Renault
1a21cc1a17 Merge pull request #5959 from meilisearch/parallelize-word-prefix-docids
Parallelize the word prefix docids
2025-11-05 10:10:07 +00:00
Clément Renault
d08042f8a7 Merge pull request #5967 from meilisearch/bump-lmdb-version
Fix the LMDB fork memory leak
2025-11-05 09:33:59 +00:00
Louis Dureuil
77aadb5f22 Merge pull request #5968 from meilisearch/engprod-2116-privilege-escalation-from-webhook-api-key-to-master-key
Redact Authorization header in webhooks
2025-11-05 08:51:07 +00:00
Kerollmops
4fd913f7eb Use the latest version of heed 2025-11-05 09:42:03 +01:00
Louis Dureuil
4b72e54ca7 Test webhook with metadata 2025-11-04 17:41:29 +01:00
Louis Dureuil
adef2cc132 test remote auto sharding with and without metadata 2025-11-04 17:41:28 +01:00
Louis Dureuil
533b9951b1 Allow adding custom metadata in tests 2025-11-04 17:41:28 +01:00
Louis Dureuil
9103cbc9db Add custom metadata to payload 2025-11-04 17:41:28 +01:00
Louis Dureuil
083de2bfc1 Allow to attach customMetadatain the document addition or update tasks 2025-11-04 17:41:28 +01:00
Louis Dureuil
8618a4d2ba document hide_secret 2025-11-04 17:03:12 +01:00
Hongxu Xu
08bc982748 Remove unused dependency allocator-api2 2025-11-04 03:29:24 +00:00
Louis Dureuil
e9c5df7993 happy clippy 2025-11-03 17:23:27 +01:00
Louis Dureuil
8a28b3aa77 Update snap 2025-11-03 15:52:35 +01:00
Louis Dureuil
1a0b100ad9 rename webhook to highlight redaction 2025-11-03 15:52:22 +01:00
Louis Dureuil
ff93563f41 Redact webhook authorize header on display 2025-11-03 15:51:56 +01:00
Louis Dureuil
2f25258191 Extract crate::settings::hide_secret as a public function 2025-11-03 15:50:37 +01:00
Clément Renault
2859079c32 Merge pull request #5961 from meilisearch/add-flickr-demo
Add Flickr example to README
2025-11-03 14:44:54 +00:00
Clément Renault
74b83d305f Add Flickr example to README
This PR adds the new Flickr demo to the README.
2025-10-29 18:27:36 +01:00
Clément Renault
70f6e4b828 Parallelize the word prefix docids 2025-10-27 17:20:12 +01:00
Clément Renault
6df196034e Merge pull request #5950 from meilisearch/update-version-v1.24.0
Update version to v1.24.0
2025-10-20 11:17:15 +00:00
Clément Renault
a63762737c Upgrade index scheduler 2025-10-20 12:22:27 +02:00
Clément Renault
77394bd4b9 Update insta tests 2025-10-20 10:54:16 +02:00
Clément Renault
cb87201c8b Fix dumpless upgrade and do nothing 2025-10-20 10:42:35 +02:00
Clément Renault
1a9c38794f Bump version to v1.24.0 2025-10-20 10:38:48 +02:00
Clément Renault
34233efb63 Merge pull request #5946 from meilisearch/fix-compaction-issues
Improve compaction behaviors
2025-10-16 15:42:38 +00:00
Clément Renault
af0608ebd6 Continue to the next index if index doesn't exists 2025-10-16 16:39:51 +02:00
Clément Renault
8c7e5c094e Improve the task batch stopped message 2025-10-16 16:39:50 +02:00
Clément Renault
c064737137 Remove duplicated logic in auto batching of tasks 2025-10-16 16:33:20 +02:00
Clément Renault
1d188a7ad3 Make the compaction tasks a priority over the export ones 2025-10-16 13:01:23 +02:00
Clément Renault
66a6b65716 Merge pull request #5945 from meilisearch/search-cutoff-vector-store
Search cutoff vector store
2025-10-16 09:43:20 +00:00
Louis Dureuil
326652a399 Update hannoy 2025-10-16 10:34:54 +02:00
Louis Dureuil
59316e8d5a add unit test 2025-10-16 10:34:20 +02:00
Louis Dureuil
76d7f20c87 fix snap 2025-10-16 10:34:19 +02:00
Louis Dureuil
380b2797a5 Share the same budget for all queries of a given index in federated search 2025-10-16 10:34:19 +02:00
Clémentine
1dd58f9bec Merge pull request #5866 from PedroTroller/build/alpine3.22
Bump Dockerfile alpine version to 3.22
2025-10-16 07:22:43 +00:00
Kerollmops
ddc76ad0dc Delete the leftover compaction files from canceled operations 2025-10-15 16:49:25 +02:00
Kerollmops
ffacf1c002 Introduce the new IndexMapper index path method 2025-10-15 16:49:25 +02:00
Kerollmops
5a49b93b77 Use constant tempfile name to reuse tempfile 2025-10-15 16:49:25 +02:00
Louis Dureuil
918a6eaec9 Implement for vector store ranking rule 2025-10-15 16:31:47 +02:00
Louis Dureuil
1e6ce70e3e "Uninteresting" ranking rule implementations 2025-10-15 16:31:47 +02:00
Louis Dureuil
b418054ee4 Change bucket_sort logic to pass the time budget and allow for retrieving non-blocking buckets 2025-10-15 16:31:47 +02:00
Louis Dureuil
58f30e9d8a Change RankingRule trait to account for budget 2025-10-15 16:31:46 +02:00
Many the fish
c45172a4bf Merge pull request #5942 from meilisearch/meili-bot-patch-1
Adapt the standards of prototypes
2025-10-15 11:22:03 +00:00
meili-bot
221ba20083 Adapt the standards of prototypes 2025-10-15 10:47:23 +02:00
Many the fish
93c5fbbb8b Merge pull request #5926 from meilisearch/search-metadata
Search metadata
2025-10-14 14:13:42 +00:00
ManyTheFish
22d529523a refactor: extract query metadata building logic into separate function 2025-10-14 14:39:07 +02:00
ManyTheFish
ed6f479940 Remove irrelevant test index method 2025-10-14 12:10:17 +02:00
ManyTheFish
f19f712433 Add local remote name when a remote federated search is made 2025-10-14 12:10:17 +02:00
ManyTheFish
24a92c2809 move contant header in search/mod.rs 2025-10-14 12:10:17 +02:00
ManyTheFish
443cc24408 --amend 2025-10-14 12:10:17 +02:00
ManyTheFish
e8d5228250 factorize metadata header 2025-10-14 12:10:17 +02:00
ManyTheFish
5c33fb090c avoid openning each index twice and remove clones 2025-10-14 12:10:17 +02:00
ManyTheFish
48dd9146e7 Add comprehensive metadata tests with insta snapshots
- Add 9 test cases covering single search, multi-search, and federated search
- Test metadata header opt-in functionality with case insensitivity
- Test header false value handling
- Test UUID format validation and consistency
- Use insta snapshots for reliable, maintainable test assertions
- Fix header parsing to properly handle 'false' values
- Add helper methods for testing with custom headers
2025-10-14 12:10:17 +02:00
ManyTheFish
c1c42e818e refactor: group perform_search parameters into SearchParams struct
- Create SearchParams struct to group related parameters
- Update perform_search function to use SearchParams instead of 8 individual parameters
- Fix clippy warning about too many arguments
- Update all callers to use new SearchParams struct
2025-10-14 12:10:17 +02:00
ManyTheFish
519905ef9c Fix remote index collision with HashMap-based lookup
- Replace BTreeMap with HashMap for (remote, index_uid) -> primary_key lookup
- Prevents collisions when multiple remotes have same index_uid but different primary keys
2025-10-14 12:10:17 +02:00
ManyTheFish
f242377d2b Fix remote index collision in federated search metadata
- Use composite key (indexUid, remote) instead of indexUid only for remote metadata lookup
- Prevents collisions when multiple remotes have same indexUid but different primary keys
- Ensures each remote query gets correct primaryKey from its specific remote instance
2025-10-14 12:10:17 +02:00
ManyTheFish
da06306274 Add header-based metadata opt-in for search responses
- Add Meili-Include-Metadata header constant
- Modify perform_search to conditionally include metadata based on header
- Modify perform_federated_search to conditionally include metadata based on header
- Update all search routes to check for header and pass include_metadata parameter
- Forward Meili-Include-Metadata header to remote requests for federated search
- Ensure remote queries include primaryKey metadata when header is present
2025-10-14 12:10:17 +02:00
ManyTheFish
b93b803a2e WIP: Add metadata field with queryUid, indexUid, primaryKey, and remote
- Add SearchMetadata struct with queryUid, indexUid, primaryKey, and remote fields
- Update SearchResult to include metadata field
- Update FederatedSearchResult to include metadata array
- Refactor federated search metadata building to maintain query order
- Support primary key extraction from both local and remote results
- Add remote field to identify remote instance for federated queries
- Ensure metadata array matches query order in federated search

Features:
- queryUid: UUID v7 for each query
- indexUid: Index identifier
- primaryKey: Primary key field name (null if not available)
- remote: Remote instance name (null for local queries)

This provides complete traceability for search operations across local and remote instances.
2025-10-14 12:10:17 +02:00
ManyTheFish
cf43ec4aff feat: add indexUid to SearchMetadata
- Add indexUid field to SearchMetadata struct
- Update perform_search to include indexUid in metadata
- Update federated search to include indexUid for each query

The metadata field now contains both queryUid and indexUid:
- For /search: single object with queryUid and indexUid
- For /multi-search: each result has metadata with both fields
- For federated search: array of objects, each with queryUid and indexUid
2025-10-14 12:10:17 +02:00
ManyTheFish
9795d98e77 feat: add metadata field with queryUid to search responses
- Add SearchMetadata struct with queryUid field (UUID v7)
- Add metadata field to SearchResult for /search route
- Add metadata field to FederatedSearchResult for /multi-search route
- Update perform_search to generate queryUid and set metadata
- Update federated search to generate queryUid for each query
- Update multi-search non-federated path to include metadata
- Fix pattern matching in analytics and other code

The metadata field contains:
- For /search: single object with queryUid
- For /multi-search: array of objects, one per query
- For federated search: array of objects, one per query

All queryUid values are generated using Uuid::now_v7() for time-ordered uniqueness.
2025-10-14 12:10:17 +02:00
Clément Renault
316b4c047f Merge pull request #5940 from meilisearch/update-version-v1.23.0
Update version v1.23.0
2025-10-13 12:50:52 +00:00
Kerollmops
1d701c6980 Fix upgrade tests 2025-10-13 10:40:15 +02:00
Kerollmops
0203adb9cb Add a no-op when upgrading the index scheduler 2025-10-13 10:28:31 +02:00
Kerollmops
0d05c2ad6e Add a no-op when upgrading the index 2025-10-13 10:24:57 +02:00
Kerollmops
b3f44c4abd Bump the version to 1.23.0 2025-10-13 09:47:20 +02:00
Clémentine
62115f57b1 Merge pull request #5938 from meilisearch/attempt-license-fix-again
Try to fix GH license detection again
2025-10-09 16:32:40 +00:00
Louis Dureuil
9023172139 Add a dedicated LICENSE-MIT file containing the unmodified MIT license 2025-10-09 16:24:18 +02:00
Louis Dureuil
59631afd9a Merge pull request #5929 from meilisearch/compaction-task
Introduce a task to compact an index
2025-10-09 11:30:01 +00:00
Clément Renault
c2584c6edd Merge pull request #5936 from meilisearch/merge-v1.22.3-back
Merge v1.22.3 back into main
2025-10-09 08:45:33 +00:00
Louis Dureuil
685663af3c bump cellulite to address backcompat issue from #5307 2025-10-09 10:20:58 +02:00
Louis Dureuil
72b4b41443 Read MEILI_EXPERIMENTAL_REMOTE_SEARCH_TIMEOUT_SECONDS to override the default timeout in remote federated search 2025-10-09 09:34:49 +02:00
Louis Dureuil
70aa768d48 Update ignored test 2025-10-09 09:34:48 +02:00
Louis Dureuil
6029677eec Also raise the global deadline 2025-10-09 09:34:48 +02:00
Louis Dureuil
3c78f4121e Raise timeout to 30secs 2025-10-09 09:34:48 +02:00
Clémentine
89170dd78f Merge pull request #5935 from meilisearch/remove-release-drafter
Remove release-drafter and encourage usage of GitHub generated notes
2025-10-08 16:42:51 +00:00
Many the fish
6379a62d95 Merge pull request #5933 from meilisearch/fix-ranking-score-with-sort
Fix ranking score bug when sort is present
2025-10-08 16:23:12 +00:00
curquiza
4c05c0cf96 Remove release-drafter and encourage usage of GitHub generated notes 2025-10-08 17:35:33 +02:00
ManyTheFish
ce832da16c Add a function documentation 2025-10-08 17:19:40 +02:00
Louis Dureuil
14de657d36 Use the "currently_processing_index" to avoid potentially blocking the search during compaction 2025-10-08 15:45:38 +02:00
Kerollmops
9a36c090bf Do not return the EnvClosingEvent 2025-10-08 15:38:45 +02:00
Kerollmops
3aca010b42 Recompute the stats 2025-10-08 15:33:12 +02:00
Clément Renault
62c11ce3f3 Fix comments 2025-10-08 15:33:12 +02:00
Clément Renault
f358538f4f Improve the pre-compaction size information 2025-10-08 15:33:12 +02:00
Clément Renault
9068857ba1 Make the tests pass 2025-10-08 15:33:12 +02:00
Clément Renault
d241157084 Make Clippy happy 2025-10-08 15:33:12 +02:00
Clément Renault
69f73b1d74 Introduce a function to effectively close an index 2025-10-08 15:33:12 +02:00
Clément Renault
202794f620 Expose the env closing event so we can wait for the index to close 2025-10-08 15:33:12 +02:00
Kerollmops
38cbd54604 Implement the index compaction task 2025-10-08 15:33:12 +02:00
Kerollmops
3877e0043c Rename operation to IndexCompaction 2025-10-08 15:33:12 +02:00
Clément Renault
f95398420b Add the necessary batches and tasks in the process 2025-10-08 15:33:11 +02:00
Clément Renault
53905c1362 Add a new CompactIndex action 2025-10-08 15:33:11 +02:00
Clément Renault
113aac8815 Introduce a new /indexes/{indexUid}/compact route 2025-10-08 15:33:11 +02:00
ManyTheFish
d2071dde1f Fix ranking score bug when sort is present
- Fix global_score function to properly handle semantic scores and ranking scores
- Prioritize semantic scores (vector/embedding) when available, fall back to ranking scores
- Exclude sort and geo sort details from relevance scoring
- Use Rank::global_score to properly merge ranking scores
- Add test case with insta snapshots to reproduce and verify the fix
- When sorting is present, ranking scores now properly reflect search relevance
- Previously all ranking scores were 1.0 when sort was present, now they show actual relevance scores
2025-10-08 11:23:43 +02:00
Many the fish
4502af5aed Merge pull request #5930 from meilisearch/synonym-performance-fix
Synonym performance fix
2025-10-07 15:17:34 +00:00
ManyTheFish
06af68aa07 Get rid of upwrap in get_synonym, We can't use get_or_insert_with because the index.synonyms(..) returns a Result 2025-10-07 14:37:13 +02:00
ManyTheFish
6d378c6397 PERFORMANCE: Implement synonym caching to eliminate repeated database access
- Added SynonymCache to SearchContext to cache synonyms in memory
- Modified synonym retrieval to use cached synonyms after first load
- Eliminated redundant database calls for multi-word queries
- Performance improvement: 87% → 0ms for subsequent synonym processing
- Complex queries now process in 40ms vs 495ms (92% improvement)
2025-10-06 14:26:30 +02:00
Clément Renault
ec0c0cf779 Merge pull request #5307 from meilisearch/parallel-bulk-facets
Parallelize bulk facets & word prefix fid/position docids
2025-10-06 12:08:52 +00:00
Kerollmops
851694e323 Fix a bug where prefixes were never deleted 2025-10-03 10:50:05 +02:00
Kerollmops
ea92c64fdc Fix a potential bug where prefixes were not deleted 2025-10-03 09:49:05 +02:00
Kerollmops
dc36f681be Fix the prefix post-processing algorithm 2025-10-03 09:42:29 +02:00
Clément Renault
48f1987a8d Improve facet post processing readability
Co-authored-by: Many the fish <many@meilisearch.com>
2025-10-03 09:42:29 +02:00
Many the fish
b98e2cef81 Merge pull request #5863 from meilisearch/add-request-uid-to-search-routes
Add request uid to search routes
2025-10-02 10:09:31 +00:00
Clément Renault
9f79ce82af Introduce new CLI arguments to deactivate experimental post processing 2025-10-02 12:06:33 +02:00
Clément Renault
5f18a9b2ee Move dependencies to actual versions 2025-10-02 11:00:48 +02:00
Clément Renault
7f8a1ac0be Remove useless heed path 2025-10-01 16:19:58 +02:00
Clément Renault
1a67163ee8 Use git cellulite in case 2025-10-01 16:02:07 +02:00
Clément Renault
38141de68d Use local heed in case 2025-10-01 16:01:58 +02:00
Clément Renault
7a98b80687 Use temporary git repo for hannoy and arroy in nested-rtxns pre-version 2025-10-01 15:28:36 +02:00
Kerollmops
229a12c8e6 Multithread word prefix position docids 2025-10-01 15:18:21 +02:00
Kerollmops
2fdfe79400 Make clippy happy 2025-10-01 15:09:59 +02:00
Kerollmops
9184b12a26 Fix the algorithm 2025-10-01 15:09:59 +02:00
Kerollmops
742378d8e1 Multi-thread the facet bulk processing 2025-10-01 15:09:59 +02:00
Kerollmops
6dcd739a8b Patch heed to create multiple nested RoTxns 2025-10-01 15:09:59 +02:00
ManyTheFish
f97384da6c Fix geo_json snapshots 2025-09-30 17:03:21 +02:00
ManyTheFish
6ea76f2771 Add uuid v7 feature 2025-09-30 15:42:03 +02:00
ManyTheFish
715b255371 fix tests 2025-09-30 15:42:03 +02:00
ManyTheFish
db094d3923 Add requestUid field in search response and add debug logs with requestUid 2025-09-30 15:42:03 +02:00
Many the fish
c29bdcae23 Merge pull request #5913 from meilisearch/dependabot/github_actions/actions/setup-python-6
Bump actions/setup-python from 5 to 6
2025-09-29 14:58:45 +00:00
Many the fish
75219181a3 Merge pull request #5834 from meilisearch/fix-openapi-ci
Minor improvement in OpenAPI CI
2025-09-29 13:55:12 +00:00
Many the fish
a5b5cf7cd1 Merge pull request #5916 from meilisearch/dependabot/github_actions/sigstore/cosign-installer-3.10.0
Bump sigstore/cosign-installer from 3.9.2 to 3.10.0
2025-09-29 13:52:31 +00:00
Many the fish
142ba8ea00 Merge pull request #5915 from meilisearch/dependabot/github_actions/actions/setup-node-5
Bump actions/setup-node from 4 to 5
2025-09-29 13:52:28 +00:00
Many the fish
4bc823e07c Merge pull request #5914 from meilisearch/dependabot/github_actions/actions/setup-dotnet-5
Bump actions/setup-dotnet from 4 to 5
2025-09-29 13:52:10 +00:00
Many the fish
db06ca7138 Merge pull request #5912 from meilisearch/dependabot/github_actions/actions/setup-go-6
Bump actions/setup-go from 5 to 6
2025-09-29 13:52:06 +00:00
Clément Renault
95595a768e Merge pull request #5911 from EazyAl/main
Update README.md to fix newsletter link
2025-09-29 13:10:16 +00:00
dependabot[bot]
36f649768e Bump sigstore/cosign-installer from 3.9.2 to 3.10.0
Bumps [sigstore/cosign-installer](https://github.com/sigstore/cosign-installer) from 3.9.2 to 3.10.0.
- [Release notes](https://github.com/sigstore/cosign-installer/releases)
- [Commits](d58896d6a1...d7543c93d8)

---
updated-dependencies:
- dependency-name: sigstore/cosign-installer
  dependency-version: 3.10.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:14 +00:00
dependabot[bot]
0c6fc243f2 Bump actions/setup-node from 4 to 5
Bumps [actions/setup-node](https://github.com/actions/setup-node) from 4 to 5.
- [Release notes](https://github.com/actions/setup-node/releases)
- [Commits](https://github.com/actions/setup-node/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/setup-node
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:11 +00:00
dependabot[bot]
dfc46d5627 Bump actions/setup-dotnet from 4 to 5
Bumps [actions/setup-dotnet](https://github.com/actions/setup-dotnet) from 4 to 5.
- [Release notes](https://github.com/actions/setup-dotnet/releases)
- [Commits](https://github.com/actions/setup-dotnet/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/setup-dotnet
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:08 +00:00
dependabot[bot]
11d55f2121 Bump actions/setup-python from 5 to 6
Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5 to 6.
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/setup-python
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:03 +00:00
dependabot[bot]
014da57cf6 Bump actions/setup-go from 5 to 6
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5 to 6.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:00 +00:00
Clément Renault
70a0ff4a8f Merge pull request #5900 from meilisearch/show-dependencies
Show Dependabot dependency upgrade in the changelog
2025-09-25 16:04:03 +00:00
Clément Renault
dd0d5e4b90 Merge pull request #5910 from meilisearch/curquiza-patch-1
Change Java version in SDK CI
2025-09-25 14:32:16 +00:00
Ali Imran
15b3bb1700 Update README.md to fix newsletter link 2025-09-25 16:07:08 +02:00
Louis Dureuil
077ec2ab11 Merge pull request #5908 from meilisearch/update-version
Update version
2025-09-25 13:10:34 +00:00
Clémentine
f25db0795e Change Java version in SDK CI
Updated Java version and distribution in workflow.
2025-09-25 15:03:50 +02:00
Tamo
c50a337c29 bump version for 1.22.1 2025-09-25 13:44:44 +02:00
Tamo
efeae09ce1 Merge pull request #5906 from meilisearch/task-deletion-strategy
Delete oldest tasks first
2025-09-25 10:11:33 +00:00
Tamo
ad55b48664 Merge pull request #5907 from meilisearch/fix-geojson-bug
use the latest version of zerometry that supports collection, lines and multi-lines
2025-09-25 09:56:01 +00:00
Tamo
94eabd34e6 fmt 2025-09-25 11:01:53 +02:00
Tamo
6935589f74 use the latest version of zerometry that supports collection, lines and multi-lines 2025-09-25 10:31:07 +02:00
Louis Dureuil
4beb452027 Optimize by using from_sorted_iter
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-09-25 10:16:30 +02:00
Louis Dureuil
b722da303a Do not start from the end of the finished tasks when selecting the tasks to delete 2025-09-25 09:54:58 +02:00
Louis Dureuil
ad39263b94 Merge pull request #5902 from meilisearch/bump-version
bump the version of meilisearch
2025-09-24 07:23:39 +00:00
Tamo
0ffb08b112 bump the version of meilisearch 2025-09-23 17:37:31 +02:00
Clément Renault
ff80b4d0ff Merge pull request #5891 from nnethercott/fix-hannoy-arroy-conversion
Bump `hannoy` to v0.0.8
2025-09-23 13:26:54 +00:00
Louis Dureuil
7fb4404928 Merge pull request #5758 from meilisearch/cellulite
Cellulite integration
2025-09-23 12:48:13 +00:00
Tamo
8405f0bf9c fmt 2025-09-23 13:55:36 +02:00
Tamo
3a7f9b56fe update cellulite 2025-09-23 13:55:36 +02:00
Louis Dureuil
61034e2e2e write geojson in obkv 2025-09-23 13:55:36 +02:00
Tamo
108d6d3344 remove a bunch of useless logs 2025-09-23 13:55:36 +02:00
Tamo
35bd00f6a1 continue previous commit 2025-09-23 13:55:36 +02:00
Tamo
69059d67ef stop returning the geojson field when iterating on the fields 2025-09-23 13:55:36 +02:00
Tamo
e13783103f use the CELLULITE constant 2025-09-23 13:55:36 +02:00
Tamo
f719665c4e update the filter-parser after updating its error messages 2025-09-23 13:55:36 +02:00
Tamo
638f284614 densify the shapes before storing them 2025-09-23 13:55:36 +02:00
Tamo
32ac98ed95 style improvement 2025-09-23 13:55:36 +02:00
Tamo
46aee695ca review the filters errors 2025-09-23 13:55:36 +02:00
Tamo
716c67f858 review and fix all error codes 2025-09-23 13:55:36 +02:00
Tamo
fec10bb2d6 update cellulite to the latest version 2025-09-23 13:55:36 +02:00
Mubelotix
3dac2cf73e Update tests 2025-09-23 13:55:36 +02:00
Mubelotix
03eca800e6 Support _geoRadius 2025-09-23 13:55:36 +02:00
Mubelotix
28fa2e960e Tolerate trailing comma 2025-09-23 13:55:36 +02:00
Mubelotix
a3b9220f84 Improve error message 2025-09-23 13:55:36 +02:00
Mubelotix
c09d48edf2 Fix coordinates order in filters 2025-09-23 13:55:36 +02:00
Mubelotix
ae4ab0ebbb Improve filter parser errors 2025-09-23 13:55:36 +02:00
Mubelotix
900a9a6d59 Reduce identations 2025-09-23 13:55:36 +02:00
Mubelotix
fc560e6730 Improve geo polygon errors 2025-09-23 13:55:36 +02:00
Mubelotix
e2a06470b7 Update tests 2025-09-23 13:55:36 +02:00
Mubelotix
ada27323f2 Rename file 2025-09-23 13:55:36 +02:00
Mubelotix
607a1c2395 Add geo bounding box filter 2025-09-23 13:55:36 +02:00
Mubelotix
b56956ea0c Optimize geojson channels 2025-09-23 13:55:36 +02:00
Mubelotix
3d21290f7f Add cellulite database sizes 2025-09-23 13:55:36 +02:00
Mubelotix
4edd4c06bc Fix trivial clippy warnings 2025-09-23 13:55:36 +02:00
Mubelotix
566baddc6b Optimize points removed serialization 2025-09-23 13:55:36 +02:00
Tamo
febe3186ce improve deletion 2025-09-23 13:55:36 +02:00
Tamo
5dd42c1871 remove useless log 2025-09-23 13:55:36 +02:00
Tamo
8670793e6e fix the cellulite spilling bug 2025-09-23 13:55:36 +02:00
Tamo
41a04aa3ab fix the cellulite integration 2025-09-23 13:55:36 +02:00
Tamo
88f841bc05 plug in the document deletion in cellulite 2025-09-23 13:55:36 +02:00
Tamo
d19892d2ea update to the latest version of cellulite and steppe 2025-09-23 13:55:36 +02:00
Tamo
c0905d6650 add the deletion in the new indexer 2025-09-23 13:55:36 +02:00
Tamo
576d7d94b1 fix the old indexer 2025-09-23 13:55:36 +02:00
Tamo
f4f1334b62 add a new _geoPolygon filter to query the cellulite database 2025-09-23 13:55:36 +02:00
Tamo
aaff6c3685 fmt 2025-09-23 13:55:36 +02:00
Tamo
42d2af4c84 finish plugin cellulite to the new indexer 2025-09-23 13:55:36 +02:00
Tamo
6be91c824c Cellulite is almost in the new indexer. We must add the documentID to the geojson pipeline 2025-09-23 13:55:36 +02:00
Tamo
6ee0537db8 add an extractor for cellulite in the new pipeline 2025-09-23 13:55:36 +02:00
Tamo
3fbeff4308 add cellulite to the old pipeline, it probably doesn't works 2025-09-23 13:55:36 +02:00
Tamo
375546b61a add a few helpers 2025-09-23 13:55:36 +02:00
Tamo
25a1d50763 add cellulite to the index 2025-09-23 13:55:36 +02:00
curquiza
6f0d26c22c Show dependency upgrade in the changelog for full transparency 2025-09-22 18:30:34 +02:00
Louis Dureuil
4fe073cc1a Merge pull request #5896 from meilisearch/fix-doc-template
Document template: Correctly render when indexing first item in array
2025-09-22 07:20:38 +00:00
Clément Renault
5cd3d36d20 Merge pull request #5897 from meilisearch/improve-prom
improve the prometheus content type we return
2025-09-18 16:18:16 +00:00
PedroTroller
9f4dcd04e9 Bump alpine version to 3.22 2025-09-18 17:08:36 +02:00
Tamo
d7ad76ea1e improve the prometheus content type we return 2025-09-18 17:04:13 +02:00
Louis Dureuil
e82bb93221 Fix indexing bug 2025-09-18 16:57:20 +02:00
Clément Renault
000cb93aad Merge pull request #5895 from meilisearch/fix-ci
Update the dtolnay action to 1.89
2025-09-18 14:56:45 +00:00
Tamo
ad4f5514b9 update the dtolnay action to 1.89 2025-09-18 15:52:39 +02:00
Louis Dureuil
8d29a29867 Merge pull request #5894 from meilisearch/fix-hannoy-unreachable-items
Bump Hannoy to fix unreachable documents
2025-09-18 13:33:34 +00:00
Kerollmops
d7de819d11 Bump Hannoy to fix unreachable documents 2025-09-18 14:26:13 +02:00
nnethercott
7a6cf30cb2 bump hannoy to 0.0.8 2025-09-18 11:23:57 +02:00
Tamo
e43d67591c Merge pull request #5892 from meilisearch/increase-msrv
increase rust version from 1.85 to 1.89
2025-09-17 08:26:08 +00:00
Tamo
134237d1eb update the toolchain for rustfmt 2025-09-16 17:45:49 +02:00
Tamo
26d9070aa7 increase rust version from 1.85 to 1.89 2025-09-16 17:21:33 +02:00
nnethercott
f9ffb8ada5 bump from hannoy 0.0.6 to 0.0.7 2025-09-16 12:00:36 +02:00
nnethercott
a47888f02c bump hannoy to 0.6 2025-09-16 11:02:46 +02:00
nnethercott
5bef2f4d86 Update arroy-hannoy conversion internals 2025-09-15 16:10:56 +02:00
curquiza
d52c7dcc94 Add needs: check-version 2025-08-12 20:47:43 +02:00
217 changed files with 11811 additions and 2529 deletions

View File

@@ -7,6 +7,5 @@ updates:
schedule:
interval: "monthly"
labels:
- 'skip changelog'
- 'dependencies'
rebase-strategy: disabled

View File

@@ -1,33 +0,0 @@
name-template: 'v$RESOLVED_VERSION'
tag-template: 'v$RESOLVED_VERSION'
exclude-labels:
- 'skip changelog'
version-resolver:
minor:
labels:
- 'enhancement'
default: patch
categories:
- title: '⚠️ Breaking changes'
label: 'breaking-change'
- title: '🚀 Enhancements'
label: 'enhancement'
- title: '🐛 Bug Fixes'
label: 'bug'
- title: '🔒 Security'
label: 'security'
- title: '⚙️ Maintenance/misc'
label:
- 'maintenance'
- 'documentation'
template: |
$CHANGES
❤️ Huge thanks to our contributors: $CONTRIBUTORS.
no-changes-template: 'Changes are coming soon 😎'
sort-direction: 'ascending'
replacers:
- search: '/(?:and )?@dependabot-preview(?:\[bot\])?,?/g'
replace: ''
- search: '/(?:and )?@dependabot(?:\[bot\])?,?/g'
replace: ''

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -66,7 +66,7 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -12,7 +12,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -44,7 +44,7 @@ jobs:
exit 1
fi
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -16,7 +16,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -17,7 +17,7 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps

View File

@@ -12,7 +12,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -25,7 +25,7 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v5

View File

@@ -65,7 +65,7 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Install cosign
uses: sigstore/cosign-installer@d58896d6a1865668819e1d91763c7751a165e159 # tag=v3.9.2
uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0
- name: Login to Docker Hub
uses: docker/login-action@v3

View File

@@ -11,7 +11,7 @@ jobs:
check-version:
name: Check the version validity
runs-on: ubuntu-latest
# No need to check the version for dry run (cron)
# No need to check the version for dry run (cron or workflow_dispatch)
steps:
- uses: actions/checkout@v5
# Check if the tag has the v<nmumber>.<number>.<number> format.
@@ -45,10 +45,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
@@ -75,10 +75,10 @@ jobs:
asset_name: meilisearch-windows-amd64.exe
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
@@ -101,7 +101,7 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v5
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.85
uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
target: ${{ matrix.target }}
@@ -111,7 +111,7 @@ jobs:
command: build
args: --release --target ${{ matrix.target }}
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
@@ -148,7 +148,7 @@ jobs:
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.85
uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
target: ${{ matrix.target }}
@@ -176,7 +176,7 @@ jobs:
- name: List target output files
run: ls -lR ./target
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
@@ -187,6 +187,7 @@ jobs:
publish-openapi-file:
name: Publish OpenAPI file
needs: check-version
runs-on: ubuntu-latest
steps:
- name: Checkout code
@@ -201,7 +202,7 @@ jobs:
cd crates/openapi-generator
cargo run --release -- --pretty --output ../../meilisearch.json
- name: Upload OpenAPI to Release
# No need to upload for dry run (cron)
# No need to upload for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:

View File

@@ -1,20 +0,0 @@
name: Release Drafter
permissions:
contents: read
pull-requests: write
on:
push:
branches:
- main
jobs:
update_release_draft:
runs-on: ubuntu-latest
steps:
- uses: release-drafter/release-drafter@v6
with:
config-name: release-draft-template.yml
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_DRAFTER_TOKEN }}

View File

@@ -50,7 +50,7 @@ jobs:
with:
repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core
uses: actions/setup-dotnet@v4
uses: actions/setup-dotnet@v5
with:
dotnet-version: "8.0.x"
- name: Install dependencies
@@ -100,7 +100,7 @@ jobs:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/setup-go@v6
with:
go-version: stable
- uses: actions/checkout@v5
@@ -135,13 +135,13 @@ jobs:
- name: Set up Java
uses: actions/setup-java@v5
with:
java-version: 8
distribution: 'zulu'
java-version: 17
distribution: 'temurin'
cache: gradle
- name: Grant execute permission for gradlew
run: chmod +x gradlew
- name: Build and run unit and integration tests
run: ./gradlew build integrationTest
run: ./gradlew build integrationTest --info
meilisearch-js-tests:
needs: define-docker-image
@@ -160,7 +160,7 @@ jobs:
with:
repository: meilisearch/meilisearch-js
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v5
with:
cache: 'yarn'
- name: Install dependencies
@@ -224,7 +224,7 @@ jobs:
with:
repository: meilisearch/meilisearch-python
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
- name: Install pipenv
uses: dschep/install-pipenv-action@v1
- name: Install dependencies
@@ -318,7 +318,7 @@ jobs:
with:
repository: meilisearch/meilisearch-js-plugins
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v5
with:
cache: yarn
- name: Install dependencies

View File

@@ -27,7 +27,7 @@ jobs:
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.85
uses: dtolnay/rust-toolchain@1.89
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo check without any default features
@@ -52,7 +52,7 @@ jobs:
- uses: actions/checkout@v5
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -77,7 +77,7 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
@@ -129,7 +129,7 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@@ -153,7 +153,7 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run tests in debug
@@ -167,7 +167,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
components: clippy
@@ -184,7 +184,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
toolchain: nightly-2024-07-09

View File

@@ -18,7 +18,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- name: Install sd

1445
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.21.0"
version = "1.26.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -1,5 +1,5 @@
# Compile
FROM rust:1.85-alpine3.20 AS compiler
FROM rust:1.89-alpine3.22 AS compiler
RUN apk add -q --no-cache build-base openssl-dev
@@ -20,7 +20,7 @@ RUN set -eux; \
cargo build --release -p meilisearch -p meilitool
# Run
FROM alpine:3.20
FROM alpine:3.22
LABEL org.opencontainers.image.source="https://github.com/meilisearch/meilisearch"
ENV MEILI_HTTP_ADDR 0.0.0.0:7700

28
LICENSE
View File

@@ -1,29 +1,9 @@
MIT License
# License
Copyright (c) 2019-2025 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Part of this work fall under the Meilisearch Enterprise Edition (EE) and are licensed under the Business Source License 1.1, please refer to [LICENSE-EE](./LICENSE-EE) for details.
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
The other parts of this work are licensed under the [MIT license](./LICENSE-MIT).
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---
🔒 Meilisearch Enterprise Edition (EE)
Certain parts of this codebase are not licensed under the MIT license and governed by the Business Source License 1.1.
See the LICENSE-EE file for details.
`SPDX-License-Identifier: MIT AND BUSL-1.1`

21
LICENSE-MIT Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019-2025 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -39,6 +39,7 @@
## 🖥 Examples
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
- [**Flickr**](https://flickr.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — Search and explore one hundred million Flickr images with semantic search.
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
@@ -121,7 +122,7 @@ If you want to know more about the kind of data we collect and what we use it fo
Meilisearch is a search engine created by [Meili](https://www.meilisearch.com/careers), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
🗞 [Subscribe to our newsletter](https://share-eu1.hsforms.com/1LN5N0x_GQgq7ss7tXmSykwfg3aq) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
💌 Want to make a suggestion or give feedback? Here are some of the channels where you can reach us:

View File

@@ -9,8 +9,9 @@ use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::Key;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::enterprise_edition::network::{NetworkTopologyChange, DbTaskNetwork};
use meilisearch_types::tasks::{
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, TaskNetwork,
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
};
use meilisearch_types::InstanceUid;
use roaring::RoaringBitmap;
@@ -95,7 +96,9 @@ pub struct TaskDump {
)]
pub finished_at: Option<OffsetDateTime>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
pub network: Option<DbTaskNetwork>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub custom_metadata: Option<String>,
}
// A `Kind` specific version made for the dump. If modified you may break the dump.
@@ -158,6 +161,10 @@ pub enum KindDump {
UpgradeDatabase {
from: (u32, u32, u32),
},
IndexCompaction {
index_uid: String,
},
NetworkTopologyChange(NetworkTopologyChange),
}
impl From<Task> for TaskDump {
@@ -175,6 +182,7 @@ impl From<Task> for TaskDump {
started_at: task.started_at,
finished_at: task.finished_at,
network: task.network,
custom_metadata: task.custom_metadata,
}
}
}
@@ -240,6 +248,12 @@ impl From<KindWithContent> for KindDump {
KindWithContent::UpgradeDatabase { from: version } => {
KindDump::UpgradeDatabase { from: version }
}
KindWithContent::IndexCompaction { index_uid } => {
KindDump::IndexCompaction { index_uid }
}
KindWithContent::NetworkTopologyChange(network_topology_change) => {
KindDump::NetworkTopologyChange(network_topology_change)
}
}
}
}
@@ -390,6 +404,7 @@ pub(crate) mod test {
started_at: Some(datetime!(2022-11-20 0:00 UTC)),
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
network: None,
custom_metadata: None,
},
None,
),
@@ -415,6 +430,7 @@ pub(crate) mod test {
started_at: None,
finished_at: None,
network: None,
custom_metadata: None,
},
Some(vec![
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
@@ -435,6 +451,7 @@ pub(crate) mod test {
started_at: None,
finished_at: None,
network: None,
custom_metadata: None,
},
None,
),
@@ -548,7 +565,8 @@ pub(crate) mod test {
Network {
local: Some("myself".to_string()),
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
sharding: false,
leader: None,
version: Default::default(),
}
}

View File

@@ -97,6 +97,7 @@ impl CompatV2ToV3 {
}
}
#[allow(clippy::large_enum_variant)]
pub enum CompatIndexV2ToV3 {
V2(v2::V2IndexReader),
Compat(Box<CompatIndexV1ToV2>),

View File

@@ -164,6 +164,7 @@ impl CompatV5ToV6 {
started_at: task_view.started_at,
finished_at: task_view.finished_at,
network: None,
custom_metadata: None,
};
(task, content_file)

View File

@@ -60,7 +60,7 @@ impl FileStore {
/// Returns the file corresponding to the requested uuid.
pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
let path = self.get_update_path(uuid);
let path = self.update_path(uuid);
let file = match StdFile::open(path) {
Ok(file) => file,
Err(e) => {
@@ -72,7 +72,7 @@ impl FileStore {
}
/// Returns the path that correspond to this uuid, the path could not exists.
pub fn get_update_path(&self, uuid: Uuid) -> PathBuf {
pub fn update_path(&self, uuid: Uuid) -> PathBuf {
self.path.join(uuid.to_string())
}

View File

@@ -7,23 +7,14 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::char;
use nom::character::complete::multispace0;
use nom::character::complete::multispace1;
use nom::combinator::cut;
use nom::combinator::map;
use nom::combinator::value;
use nom::sequence::preceded;
use nom::sequence::{terminated, tuple};
use nom::character::complete::{char, multispace0, multispace1};
use nom::combinator::{cut, map, value};
use nom::sequence::{preceded, terminated, tuple};
use Condition::*;
use crate::error::IResultExt;
use crate::value::parse_vector_value;
use crate::value::parse_vector_value_cut;
use crate::Error;
use crate::ErrorKind;
use crate::VectorFilter;
use crate::{parse_value, FilterCondition, IResult, Span, Token};
use crate::value::{parse_vector_value, parse_vector_value_cut};
use crate::{parse_value, Error, ErrorKind, FilterCondition, IResult, Span, Token, VectorFilter};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Condition<'a> {
@@ -124,7 +115,7 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
}
fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>)> {
fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter)> {
let (input, _) = multispace0(input)?;
let (input, fid) = tag("_vectors")(input)?;

View File

@@ -75,7 +75,11 @@ pub enum ExpectedValueKind {
pub enum ErrorKind<'a> {
ReservedGeo(&'a str),
GeoRadius,
GeoRadiusArgumentCount(usize),
GeoBoundingBox,
GeoPolygon,
GeoPolygonNotEnoughPoints(usize),
GeoCoordinatesNotPair(usize),
MisusedGeoRadius,
MisusedGeoBoundingBox,
VectorFilterLeftover,
@@ -189,7 +193,7 @@ impl Display for Error<'_> {
}
ErrorKind::InvalidPrimary => {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` {text}")?
}
ErrorKind::InvalidEscapedNumber => {
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
@@ -198,11 +202,23 @@ impl Display for Error<'_> {
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
}
ErrorKind::GeoRadius => {
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
writeln!(f, "The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.")?
}
ErrorKind::GeoRadiusArgumentCount(count) => {
writeln!(f, "Was expecting 3 or 4 arguments for `_geoRadius`, but instead found {count}.")?
}
ErrorKind::GeoBoundingBox => {
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
}
ErrorKind::GeoPolygon => {
writeln!(f, "The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.")?
}
ErrorKind::GeoPolygonNotEnoughPoints(n) => {
writeln!(f, "The `_geoPolygon` filter expects at least 3 points but only {n} were specified")?;
}
ErrorKind::GeoCoordinatesNotPair(number) => {
writeln!(f, "Was expecting 2 coordinates but instead found {number}.")?
}
ErrorKind::ReservedGeo(name) => {
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
}

View File

@@ -19,6 +19,7 @@
//! word = (alphanumeric | _ | - | .)+
//! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
//! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]")
//! geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
//! ```
//!
//! Other BNF grammar used to handle some specific errors:
@@ -116,7 +117,7 @@ impl<'a> Token<'a> {
self.span
}
pub fn parse_finite_float(&self) -> Result<f64, Error> {
pub fn parse_finite_float(&self) -> Result<f64, Error<'a>> {
let value: f64 = self.value().parse().map_err(|e| self.as_external_error(e))?;
if value.is_finite() {
Ok(value)
@@ -156,8 +157,9 @@ pub enum FilterCondition<'a> {
Or(Vec<Self>),
And(Vec<Self>),
VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a>, resolution: Option<Token<'a>> },
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
GeoPolygon { points: Vec<[Token<'a>; 2]> },
}
pub enum TraversedElement<'a> {
@@ -166,7 +168,7 @@ pub enum TraversedElement<'a> {
}
impl<'a> FilterCondition<'a> {
pub fn use_contains_operator(&self) -> Option<&Token> {
pub fn use_contains_operator(&self) -> Option<&Token<'a>> {
match self {
FilterCondition::Condition { fid: _, op } => match op {
Condition::GreaterThan(_)
@@ -189,11 +191,12 @@ impl<'a> FilterCondition<'a> {
FilterCondition::VectorExists { .. }
| FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. }
| FilterCondition::GeoPolygon { .. }
| FilterCondition::In { .. } => None,
}
}
pub fn use_vector_filter(&self) -> Option<&Token> {
pub fn use_vector_filter(&self) -> Option<&Token<'a>> {
match self {
FilterCondition::Condition { .. } => None,
FilterCondition::Not(this) => this.use_vector_filter(),
@@ -202,12 +205,13 @@ impl<'a> FilterCondition<'a> {
}
FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. }
| FilterCondition::GeoPolygon { .. }
| FilterCondition::In { .. } => None,
FilterCondition::VectorExists { fid, .. } => Some(fid),
}
}
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token> + '_> {
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token<'a>> + '_> {
if depth == 0 {
return Box::new(std::iter::empty());
}
@@ -228,7 +232,7 @@ impl<'a> FilterCondition<'a> {
}
/// Returns the first token found at the specified depth, `None` if no token at this depth.
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
pub fn token_at_depth(&self, depth: usize) -> Option<&Token<'a>> {
match self {
FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid),
FilterCondition::Or(subfilters) => {
@@ -396,23 +400,27 @@ fn parse_not(input: Span, depth: usize) -> IResult<FilterCondition> {
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
// we want to allow space BEFORE the _geoRadius but not after
let parsed = preceded(
tuple((multispace0, word_exact("_geoRadius"))),
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
)(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoRadius)));
let (input, _) = tuple((multispace0, word_exact("_geoRadius")))(input)?;
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
let parsed =
delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))(input)
.map_cut(ErrorKind::GeoRadius);
let (input, args) = parsed?;
if args.len() != 3 {
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoRadius)));
if !(3..=4).contains(&args.len()) {
return Err(Error::failure_from_kind(input, ErrorKind::GeoRadiusArgumentCount(args.len())));
}
let res = FilterCondition::GeoLowerThan {
point: [args[0].into(), args[1].into()],
radius: args[2].into(),
resolution: args.get(3).cloned().map(Token::from),
};
Ok((input, res))
}
@@ -420,26 +428,33 @@ fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
/// If we parse `_geoBoundingBox` we MUST parse the rest of the expression.
fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
// we want to allow space BEFORE the _geoBoundingBox but not after
let parsed = preceded(
tuple((multispace0, word_exact("_geoBoundingBox"))),
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
cut(delimited(
char('('),
separated_list1(
tag(","),
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
),
char(')'),
)),
let (input, _) = tuple((multispace0, word_exact("_geoBoundingBox")))(input)?;
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
let (input, args) = delimited(
char('('),
separated_list1(
tag(","),
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
),
char(')'),
)(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoBoundingBox)));
.map_cut(ErrorKind::GeoBoundingBox)?;
let (input, args) = parsed?;
if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
if args.len() != 2 {
return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
}
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
let context = offending.first().unwrap_or(&input);
return Err(Error::failure_from_kind(
*context,
ErrorKind::GeoCoordinatesNotPair(offending.len()),
));
}
let res = FilterCondition::GeoBoundingBox {
top_right_point: [args[0][0].into(), args[0][1].into()],
bottom_left_point: [args[1][0].into(), args[1][1].into()],
@@ -447,6 +462,47 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
Ok((input, res))
}
/// geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
/// If we parse `_geoPolygon` we MUST parse the rest of the expression.
fn parse_geo_polygon(input: Span) -> IResult<FilterCondition> {
// we want to allow space BEFORE the _geoPolygon but not after
let (input, _) = tuple((multispace0, word_exact("_geoPolygon")))(input)?;
// if we were able to parse `_geoPolygon` and can't parse the rest of the input we return a failure
let (input, args): (_, Vec<Vec<LocatedSpan<_, _>>>) = delimited(
char('('),
separated_list1(
tag(","),
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
),
preceded(opt(ws(char(','))), char(')')), // Tolerate trailing comma
)(input)
.map_cut(ErrorKind::GeoPolygon)?;
if args.len() < 3 {
let context = args.last().and_then(|a| a.last()).unwrap_or(&input);
return Err(Error::failure_from_kind(
*context,
ErrorKind::GeoPolygonNotEnoughPoints(args.len()),
));
}
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
let context = offending.first().unwrap_or(&input);
return Err(Error::failure_from_kind(
*context,
ErrorKind::GeoCoordinatesNotPair(offending.len()),
));
}
let res = FilterCondition::GeoPolygon {
points: args.into_iter().map(|a| [a[0].into(), a[1].into()]).collect(),
};
Ok((input, res))
}
/// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float)
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoPoint but not after
@@ -516,8 +572,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
}),
),
parse_geo_radius,
parse_geo_bounding_box,
// Made a random block of functions because we reached the maximum number of elements per alt
alt((parse_geo_radius, parse_geo_bounding_box, parse_geo_polygon)),
parse_in,
parse_not_in,
parse_condition,
@@ -597,9 +653,12 @@ impl std::fmt::Display for FilterCondition<'_> {
}
write!(f, " EXISTS")
}
FilterCondition::GeoLowerThan { point, radius } => {
FilterCondition::GeoLowerThan { point, radius, resolution: None } => {
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
}
FilterCondition::GeoLowerThan { point, radius, resolution: Some(resolution) } => {
write!(f, "_geoRadius({}, {}, {}, {})", point[0], point[1], radius, resolution)
}
FilterCondition::GeoBoundingBox {
top_right_point: top_left_point,
bottom_left_point: bottom_right_point,
@@ -613,6 +672,13 @@ impl std::fmt::Display for FilterCondition<'_> {
bottom_right_point[1]
)
}
FilterCondition::GeoPolygon { points } => {
write!(f, "_geoPolygon([")?;
for point in points {
write!(f, "[{}, {}], ", point[0], point[1])?;
}
write!(f, "])")
}
}
}
}
@@ -651,7 +717,7 @@ pub mod tests {
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
// if the string is empty we still need to return 1 for the line number
let lines = before.is_empty().then_some(1).unwrap_or_else(|| before.lines().count());
let lines = if before.is_empty() { 1 } else { before.lines().count() };
let offset = before.chars().count();
// the extra field is not checked in the tests so we can set it to nothing
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
@@ -776,12 +842,17 @@ pub mod tests {
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
insta::assert_snapshot!(p("_geoRadius(12,13,14,1000)"), @"_geoRadius({12}, {13}, {14}, {1000})");
// Test geo bounding box
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
// Test geo polygon
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [16, 17])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{16}, {17}], ])");
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [-1.2,2939.2], [1,1])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{-1.2}, {2939.2}], [{1}, {1}], ])");
// Test OR + AND
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
@@ -838,50 +909,80 @@ pub mod tests {
11:12 channel = 🐻 AND followers < 100
"###);
insta::assert_snapshot!(p("'OR'"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
insta::assert_snapshot!(p("'OR'"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `\'OR\'`.
1:5 'OR'
"###);
");
insta::assert_snapshot!(p("OR"), @r###"
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
1:3 OR
"###);
insta::assert_snapshot!(p("channel Ponce"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
insta::assert_snapshot!(p("channel Ponce"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `channel Ponce`.
1:14 channel Ponce
"###);
");
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
insta::assert_snapshot!(p("channel = Ponce OR"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` but instead got nothing.
19:19 channel = Ponce OR
"###);
");
insta::assert_snapshot!(p("_geoRadius"), @r###"
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
1:11 _geoRadius
"###);
insta::assert_snapshot!(p("_geoRadius"), @r"
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
11:11 _geoRadius
");
insta::assert_snapshot!(p("_geoRadius = 12"), @r###"
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
1:16 _geoRadius = 12
"###);
insta::assert_snapshot!(p("_geoRadius = 12"), @r"
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
11:16 _geoRadius = 12
");
insta::assert_snapshot!(p("_geoBoundingBox"), @r###"
insta::assert_snapshot!(p("_geoBoundingBox"), @r"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:16 _geoBoundingBox
"###);
16:16 _geoBoundingBox
");
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###"
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:21 _geoBoundingBox = 12
"###);
16:21 _geoBoundingBox = 12
");
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:26 _geoBoundingBox(1.0, 1.0)
"###);
17:26 _geoBoundingBox(1.0, 1.0)
");
insta::assert_snapshot!(p("_geoPolygon([1,2,3])"), @r"
The `_geoPolygon` filter expects at least 3 points but only 1 were specified
18:19 _geoPolygon([1,2,3])
");
insta::assert_snapshot!(p("_geoPolygon(1,2,3)"), @r"
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
13:19 _geoPolygon(1,2,3)
");
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2],[1,2,3])"), @r"
Was expecting 2 coordinates but instead found 3.
26:27 _geoPolygon([1,2],[1,2],[1,2,3])
");
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2,3])"), @r"
The `_geoPolygon` filter expects at least 3 points but only 2 were specified
24:25 _geoPolygon([1,2],[1,2,3])
");
insta::assert_snapshot!(p("_geoPolygon(1)"), @r"
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
13:15 _geoPolygon(1)
");
insta::assert_snapshot!(p("_geoPolygon([1,2)"), @r"
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
17:18 _geoPolygon([1,2)
");
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
@@ -938,15 +1039,15 @@ pub mod tests {
34:35 channel = mv OR followers >= 1000)
"###);
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
insta::assert_snapshot!(p("colour NOT EXIST"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `colour NOT EXIST`.
1:17 colour NOT EXIST
"###);
");
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `subscribers 100 TO1000`.
1:23 subscribers 100 TO1000
"###);
");
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
@@ -1071,38 +1172,38 @@ pub mod tests {
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
"###);
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
insta::assert_snapshot!(p(r#"value NULL"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NULL`.
1:11 value NULL
"###);
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
");
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT NULL`.
1:15 value NOT NULL
"###);
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
");
insta::assert_snapshot!(p(r#"value EMPTY"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value EMPTY`.
1:12 value EMPTY
"###);
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
");
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT EMPTY`.
1:16 value NOT EMPTY
"###);
insta::assert_snapshot!(p(r#"value IS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
");
insta::assert_snapshot!(p(r#"value IS"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS`.
1:9 value IS
"###);
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
");
insta::assert_snapshot!(p(r#"value IS NOT"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT`.
1:13 value IS NOT
"###);
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
");
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS EXISTS`.
1:16 value IS EXISTS
"###);
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
");
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT EXISTS`.
1:20 value IS NOT EXISTS
"###);
");
}
#[test]

View File

@@ -14,6 +14,7 @@ license.workspace = true
anyhow = "1.0.98"
bincode = "1.3.3"
byte-unit = "5.1.6"
bytes = "1.10.1"
bumpalo = "3.18.1"
bumparaw-collections = "0.1.4"
convert_case = "0.8.0"
@@ -32,6 +33,7 @@ rayon = "1.10.0"
roaring = { version = "0.10.12", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
tar = "0.4.44"
synchronoise = "1.0.1"
tempfile = "3.20.0"
thiserror = "2.0.12"
@@ -45,6 +47,9 @@ tracing = "0.1.41"
ureq = "2.12.1"
uuid = { version = "1.17.0", features = ["serde", "v4"] }
backoff = "0.4.0"
reqwest = { version = "0.12.23", features = ["rustls-tls", "http2"], default-features = false }
rusty-s3 = "0.8.1"
tokio = { version = "1.47.1", features = ["full"] }
[dev-dependencies]
big_s = "1.0.2"

View File

@@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]
use std::collections::HashMap;
use std::io;
@@ -148,6 +150,7 @@ impl<'a> Dump<'a> {
details: task.details,
status: task.status,
network: task.network,
custom_metadata: task.custom_metadata,
kind: match task.kind {
KindDump::DocumentImport {
primary_key,
@@ -232,6 +235,12 @@ impl<'a> Dump<'a> {
}
}
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
KindDump::IndexCompaction { index_uid } => {
KindWithContent::IndexCompaction { index_uid }
}
KindDump::NetworkTopologyChange(network_topology_change) => {
KindWithContent::NetworkTopologyChange(network_topology_change)
}
},
};

View File

@@ -3,9 +3,13 @@ use std::fmt::Display;
use meilisearch_types::batches::BatchId;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::milli::index::RollbackOutcome;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::tasks::enterprise_edition::network::ReceiveTaskError;
use meilisearch_types::tasks::{Kind, Status};
use meilisearch_types::{heed, milli};
use reqwest::StatusCode;
use thiserror::Error;
use uuid::Uuid;
use crate::TaskId;
@@ -127,6 +131,14 @@ pub enum Error {
#[error("Aborted task")]
AbortedTask,
#[error("S3 error: status: {status}, body: {body}")]
S3Error { status: StatusCode, body: String },
#[error("S3 HTTP error: {0}")]
S3HttpError(reqwest::Error),
#[error("S3 XML error: {0}")]
S3XmlError(Box<dyn std::error::Error + Send + Sync>),
#[error("S3 bucket error: {0}")]
S3BucketError(rusty_s3::BucketError),
#[error(transparent)]
Dump(#[from] dump::Error),
#[error(transparent)]
@@ -182,6 +194,15 @@ pub enum Error {
#[error(transparent)]
HeedTransaction(heed::Error),
#[error("No network topology change task is currently enqueued or processing")]
ImportTaskWithoutNetworkTask,
#[error("The network task version (`{network_task}`) does not match the import task version (`{import_task}`)")]
NetworkVersionMismatch { network_task: Uuid, import_task: Uuid },
#[error("The import task emanates from an unknown remote `{0}`")]
ImportTaskUnknownRemote(String),
#[error("The import task with key `{0}` was already received")]
ImportTaskAlreadyReceived(DocumentId),
#[cfg(test)]
#[error("Planned failure for tests.")]
PlannedFailure,
@@ -226,6 +247,10 @@ impl Error {
| Error::TaskCancelationWithEmptyQuery
| Error::FromRemoteWhenExporting { .. }
| Error::AbortedTask
| Error::S3Error { .. }
| Error::S3HttpError(_)
| Error::S3XmlError(_)
| Error::S3BucketError(_)
| Error::Dump(_)
| Error::Heed(_)
| Error::Milli { .. }
@@ -235,6 +260,10 @@ impl Error {
| Error::Persist(_)
| Error::FeatureNotEnabled(_)
| Error::Export(_)
| Error::ImportTaskWithoutNetworkTask
| Error::NetworkVersionMismatch { .. }
| Error::ImportTaskAlreadyReceived(_)
| Error::ImportTaskUnknownRemote(_)
| Error::Anyhow(_) => true,
Error::CreateBatch(_)
| Error::CorruptedTaskQueue
@@ -293,8 +322,18 @@ impl ErrorCode for Error {
Error::BatchNotFound(_) => Code::BatchNotFound,
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
// TODO: not sure of the Code to use
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::ImportTaskWithoutNetworkTask => Code::ImportTaskWithoutNetworkTask,
Error::NetworkVersionMismatch { .. } => Code::NetworkVersionMismatch,
Error::ImportTaskAlreadyReceived(_) => Code::ImportTaskAlreadyReceived,
Error::ImportTaskUnknownRemote(_) => Code::ImportTaskUnknownRemote,
Error::S3Error { status, .. } if status.is_client_error() => {
Code::InvalidS3SnapshotRequest
}
Error::S3Error { .. } => Code::S3SnapshotServerError,
Error::S3HttpError(_) => Code::S3SnapshotServerError,
Error::S3XmlError(_) => Code::S3SnapshotServerError,
Error::S3BucketError(_) => Code::InvalidS3SnapshotParameters,
Error::Dump(e) => e.error_code(),
Error::Milli { error, .. } => error.error_code(),
Error::ProcessBatchPanicked(_) => Code::Internal,
@@ -326,3 +365,12 @@ impl ErrorCode for Error {
}
}
}
impl From<ReceiveTaskError> for Error {
fn from(value: ReceiveTaskError) -> Self {
match value {
ReceiveTaskError::UnknownRemote(unknown) => Error::ImportTaskUnknownRemote(unknown),
ReceiveTaskError::DuplicateTask(dup) => Error::ImportTaskAlreadyReceived(dup),
}
}
}

View File

@@ -199,7 +199,7 @@ impl IndexMapper {
let uuid = Uuid::new_v4();
self.index_mapping.put(&mut wtxn, name, &uuid)?;
let index_path = self.base_path.join(uuid.to_string());
let index_path = self.index_path(uuid);
fs::create_dir_all(&index_path)?;
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
@@ -286,7 +286,7 @@ impl IndexMapper {
};
let index_map = self.index_map.clone();
let index_path = self.base_path.join(uuid.to_string());
let index_path = self.index_path(uuid);
let index_name = name.to_string();
thread::Builder::new()
.name(String::from("index_deleter"))
@@ -341,6 +341,31 @@ impl IndexMapper {
Ok(())
}
/// Closes the specified index.
///
/// This operation involves closing the underlying environment and so can take a long time to complete.
///
/// # Panics
///
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
/// in memory hash map.
pub fn close_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
let uuid = self
.index_mapping
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// We remove the index from the in-memory index map.
self.index_map.write().unwrap().close_for_resize(&uuid, self.enable_mdb_writemap, 0);
Ok(())
}
/// The number of indexes in the database
pub fn index_count(&self, rtxn: &RoTxn) -> Result<u64> {
Ok(self.index_mapping.len(rtxn)?)
}
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
if let Some((current_name, current_index)) =
@@ -388,7 +413,7 @@ impl IndexMapper {
} else {
continue;
};
let index_path = self.base_path.join(uuid.to_string());
let index_path = self.index_path(uuid);
// take the lock to reopen the environment.
reopen
.reopen(&mut self.index_map.write().unwrap(), &index_path)
@@ -405,7 +430,7 @@ impl IndexMapper {
// if it's not already there.
match index_map.get(&uuid) {
Missing => {
let index_path = self.base_path.join(uuid.to_string());
let index_path = self.index_path(uuid);
break index_map
.create(
@@ -432,6 +457,14 @@ impl IndexMapper {
Ok(index)
}
/// Returns the path of the index.
///
/// The folder located at this path is containing the data.mdb,
/// the lock.mdb and an optional data.mdb.cpy file.
pub fn index_path(&self, uuid: Uuid) -> PathBuf {
self.base_path.join(uuid.to_string())
}
pub fn rollback_index(
&self,
rtxn: &RoTxn,
@@ -472,7 +505,7 @@ impl IndexMapper {
};
}
let index_path = self.base_path.join(uuid.to_string());
let index_path = self.index_path(uuid);
Index::rollback(milli::heed::EnvOpenOptions::new().read_txn_without_tls(), index_path, to)
.map_err(|err| crate::Error::from_milli(err, Some(name.to_string())))
}

View File

@@ -21,7 +21,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let IndexScheduler {
cleanup_enabled: _,
experimental_no_edition_2024_for_dumps: _,
processing_tasks,
runtime_tasks,
env,
version,
queue,
@@ -36,6 +36,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
run_loop_iteration: _,
embedders: _,
chat_settings: _,
runtime: _,
} = scheduler;
let rtxn = env.read_txn().unwrap();
@@ -49,7 +50,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
snap.push_str(&format!("index scheduler running on version {indx_sched_version:?}\n"));
}
let processing = processing_tasks.read().unwrap().clone();
let processing = runtime_tasks.read().unwrap().processing.clone();
snap.push_str(&format!("### Autobatching Enabled = {}\n", scheduler.autobatching_enabled));
snap.push_str(&format!(
"### Processing batch {:?}:\n",
@@ -231,6 +232,7 @@ pub fn snapshot_task(task: &Task) -> String {
status,
kind,
network,
custom_metadata,
} = task;
snap.push('{');
snap.push_str(&format!("uid: {uid}, "));
@@ -251,6 +253,9 @@ pub fn snapshot_task(task: &Task) -> String {
if let Some(network) = network {
snap.push_str(&format!("network: {network:?}, "))
}
if let Some(custom_metadata) = custom_metadata {
snap.push_str(&format!("custom_metadata: {custom_metadata:?}"))
}
snap.push('}');
snap
@@ -317,6 +322,12 @@ fn snapshot_details(d: &Details) -> String {
Details::UpgradeDatabase { from, to } => {
format!("{{ from: {from:?}, to: {to:?} }}")
}
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
}
Details::NetworkTopologyChange { moved_documents, received_documents, message } => {
format!("{{ moved_documents: {moved_documents:?}, received_documents: {received_documents:?}, message: {message:?}")
}
}
}

View File

@@ -1,3 +1,6 @@
// The main Error type is large and boxing the large variant make the pattern matching fails
#![allow(clippy::result_large_err)]
/*!
This crate defines the index scheduler, which is responsible for:
1. Keeping references to meilisearch's indexes and mapping them to their
@@ -65,10 +68,10 @@ use meilisearch_types::milli::vector::{
};
use meilisearch_types::milli::{self, Index};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork};
use meilisearch_types::tasks::enterprise_edition::network::{DbTaskNetwork, TaskNetwork};
use meilisearch_types::tasks::{KindWithContent, Task};
use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
use milli::vector::db::IndexEmbeddingConfig;
use processing::ProcessingTasks;
pub use queue::Query;
use queue::Queue;
use roaring::RoaringBitmap;
@@ -79,6 +82,7 @@ use uuid::Uuid;
use versioning::Versioning;
use crate::index_mapper::IndexMapper;
use crate::processing::RuntimeTasks;
use crate::utils::clamp_to_page_size;
pub(crate) type BEI128 = I128<BE>;
@@ -160,7 +164,7 @@ pub struct IndexScheduler {
pub(crate) env: Env<WithoutTls>,
/// The list of tasks currently processing
pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>,
pub(crate) runtime_tasks: Arc<RwLock<RuntimeTasks>>,
/// A database containing only the version of the index-scheduler
pub version: versioning::Versioning,
@@ -213,13 +217,16 @@ pub struct IndexScheduler {
/// A counter that is incremented before every call to [`tick`](IndexScheduler::tick)
#[cfg(test)]
run_loop_iteration: Arc<RwLock<usize>>,
/// The tokio runtime used for asynchronous tasks.
runtime: Option<tokio::runtime::Handle>,
}
impl IndexScheduler {
fn private_clone(&self) -> IndexScheduler {
IndexScheduler {
env: self.env.clone(),
processing_tasks: self.processing_tasks.clone(),
runtime_tasks: self.runtime_tasks.clone(),
version: self.version.clone(),
queue: self.queue.private_clone(),
scheduler: self.scheduler.private_clone(),
@@ -239,6 +246,7 @@ impl IndexScheduler {
run_loop_iteration: self.run_loop_iteration.clone(),
features: self.features.clone(),
chat_settings: self.chat_settings,
runtime: self.runtime.clone(),
}
}
@@ -252,13 +260,23 @@ impl IndexScheduler {
}
/// Create an index scheduler and start its run loop.
#[allow(private_interfaces)] // because test_utils is private
pub fn new(
options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32),
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
runtime: Option<tokio::runtime::Handle>,
) -> Result<Self> {
let this = Self::new_without_run(options, auth_env, from_db_version, runtime)?;
this.run();
Ok(this)
}
fn new_without_run(
options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32),
runtime: Option<tokio::runtime::Handle>,
) -> Result<Self> {
std::fs::create_dir_all(&options.tasks_path)?;
std::fs::create_dir_all(&options.update_file_path)?;
@@ -313,9 +331,8 @@ impl IndexScheduler {
wtxn.commit()?;
// allow unreachable_code to get rids of the warning in the case of a test build.
let this = Self {
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
Ok(Self {
runtime_tasks: Arc::new(RwLock::new(RuntimeTasks::new())),
version,
queue,
scheduler: Scheduler::new(&options, auth_env),
@@ -330,21 +347,38 @@ impl IndexScheduler {
webhooks: Arc::new(webhooks),
embedders: Default::default(),
#[cfg(test)]
test_breakpoint_sdr,
#[cfg(test)]
planned_failures,
#[cfg(test)] // Will be replaced in `new_tests` in test environments
test_breakpoint_sdr: crossbeam_channel::bounded(0).0,
#[cfg(test)] // Will be replaced in `new_tests` in test environments
planned_failures: Default::default(),
#[cfg(test)]
run_loop_iteration: Arc::new(RwLock::new(0)),
features,
chat_settings,
};
runtime,
})
}
/// Create an index scheduler and start its run loop.
#[cfg(test)]
fn new_test(
options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32),
runtime: Option<tokio::runtime::Handle>,
test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
planned_failures: Vec<(usize, test_utils::FailureLocation)>,
) -> Result<Self> {
let mut this = Self::new_without_run(options, auth_env, from_db_version, runtime)?;
this.test_breakpoint_sdr = test_breakpoint_sdr;
this.planned_failures = planned_failures;
this.run();
Ok(this)
}
fn read_txn(&self) -> Result<RoTxn<WithoutTls>> {
fn read_txn(&self) -> Result<RoTxn<'_, WithoutTls>> {
self.env.read_txn().map_err(|e| e.into())
}
@@ -606,19 +640,19 @@ impl IndexScheduler {
/// 3. The number of times the properties appeared.
pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let rtxn = self.read_txn()?;
self.queue.get_stats(&rtxn, &self.processing_tasks.read().unwrap())
self.queue.get_stats(&rtxn, &self.runtime_tasks.read().unwrap().processing)
}
// Return true if there is at least one task that is processing.
pub fn is_task_processing(&self) -> Result<bool> {
Ok(!self.processing_tasks.read().unwrap().processing.is_empty())
Ok(!self.runtime_tasks.read().unwrap().processing.processing.is_empty())
}
/// Return true iff there is at least one task associated with this index
/// that is processing.
pub fn is_index_processing(&self, index: &str) -> Result<bool> {
let rtxn = self.env.read_txn()?;
let processing_tasks = self.processing_tasks.read().unwrap().processing.clone();
let processing_tasks = self.runtime_tasks.read().unwrap().processing.processing.clone();
let index_tasks = self.queue.tasks.index_tasks(&rtxn, index)?;
let nbr_index_processing_tasks = processing_tasks.intersection_len(&index_tasks);
Ok(nbr_index_processing_tasks > 0)
@@ -644,8 +678,8 @@ impl IndexScheduler {
filters: &meilisearch_auth::AuthFilter,
) -> Result<(Vec<Task>, u64)> {
let rtxn = self.read_txn()?;
let processing = self.processing_tasks.read().unwrap();
self.queue.get_tasks_from_authorized_indexes(&rtxn, query, filters, &processing)
let processing = &self.runtime_tasks.read().unwrap().processing;
self.queue.get_tasks_from_authorized_indexes(&rtxn, query, filters, processing)
}
/// Return the task ids matching the query along with the total number of tasks
@@ -663,18 +697,18 @@ impl IndexScheduler {
filters: &meilisearch_auth::AuthFilter,
) -> Result<(RoaringBitmap, u64)> {
let rtxn = self.read_txn()?;
let processing = self.processing_tasks.read().unwrap();
self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing)
let processing = &self.runtime_tasks.read().unwrap().processing;
self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, processing)
}
pub fn set_task_network(&self, task_id: TaskId, network: TaskNetwork) -> Result<()> {
pub fn set_task_network(&self, task_id: TaskId, network: DbTaskNetwork) -> Result<Task> {
let mut wtxn = self.env.write_txn()?;
let mut task =
self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?;
task.network = Some(network);
self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?;
wtxn.commit()?;
Ok(())
Ok(task)
}
/// Return the batches matching the query from the user's point of view along
@@ -692,8 +726,8 @@ impl IndexScheduler {
filters: &meilisearch_auth::AuthFilter,
) -> Result<(Vec<Batch>, u64)> {
let rtxn = self.read_txn()?;
let processing = self.processing_tasks.read().unwrap();
self.queue.get_batches_from_authorized_indexes(&rtxn, query, filters, &processing)
let processing = &self.runtime_tasks.read().unwrap().processing;
self.queue.get_batches_from_authorized_indexes(&rtxn, query, filters, processing)
}
/// Return the batch ids matching the query along with the total number of batches
@@ -711,8 +745,8 @@ impl IndexScheduler {
filters: &meilisearch_auth::AuthFilter,
) -> Result<(RoaringBitmap, u64)> {
let rtxn = self.read_txn()?;
let processing = self.processing_tasks.read().unwrap();
self.queue.get_batch_ids_from_authorized_indexes(&rtxn, query, filters, &processing)
let processing = &self.runtime_tasks.read().unwrap().processing;
self.queue.get_batch_ids_from_authorized_indexes(&rtxn, query, filters, processing)
}
/// Register a new task in the scheduler.
@@ -723,6 +757,31 @@ impl IndexScheduler {
kind: KindWithContent,
task_id: Option<TaskId>,
dry_run: bool,
) -> Result<Task> {
self.register_with_custom_metadata(kind, task_id, None, dry_run, None)
}
/// Register a new task in the scheduler, with metadata.
///
/// If it fails and data was associated with the task, it tries to delete the associated data.
///
/// # Parameters
///
/// - task_network: network of the task to check.
///
/// If the task is an import task, only accept it if:
///
/// 1. There is an ongoing network topology change task
/// 2. The task to register matches the network version of the network topology change task
///
/// Always accept the task if it is not an import task.
pub fn register_with_custom_metadata(
&self,
kind: KindWithContent,
task_id: Option<TaskId>,
custom_metadata: Option<String>,
dry_run: bool,
task_network: Option<TaskNetwork>,
) -> Result<Task> {
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
@@ -733,13 +792,86 @@ impl IndexScheduler {
}
let mut wtxn = self.env.write_txn()?;
let task = self.queue.register(&mut wtxn, &kind, task_id, dry_run)?;
if let Some(TaskNetwork::Import { import_from, network_change, metadata }) = &task_network {
let mut network_tasks = self
.queue
.tasks
.get_kind(&wtxn, meilisearch_types::tasks::Kind::NetworkTopologyChange)?;
if network_tasks.is_empty() {
return Err(Error::ImportTaskWithoutNetworkTask);
}
let network_task = {
let processing = self.runtime_tasks.read().unwrap().processing.processing.clone();
if processing.is_disjoint(&network_tasks) {
let enqueued = self
.queue
.tasks
.get_status(&wtxn, meilisearch_types::tasks::Status::Enqueued)?;
network_tasks &= enqueued;
if let Some(network_task) = network_tasks.into_iter().next() {
network_task
} else {
return Err(Error::ImportTaskWithoutNetworkTask);
}
} else {
network_tasks &= &*processing;
network_tasks.into_iter().next().unwrap()
}
};
let mut network_task = self.queue.tasks.get_task(&wtxn, network_task)?.unwrap();
let network_task_version = network_task
.network
.as_ref()
.map(|network| network.network_version())
.unwrap_or_default();
if network_task_version != network_change.network_version {
return Err(Error::NetworkVersionMismatch {
network_task: network_task_version,
import_task: network_change.network_version,
});
}
let KindWithContent::NetworkTopologyChange(network_topology_change) =
&mut network_task.kind
else {
return Err(Error::CorruptedTaskQueue);
};
network_topology_change.receive_remote_task(
&import_from.remote_name,
&import_from.index_name,
metadata.task_key,
import_from.document_count,
metadata.index_count,
metadata.total_index_documents,
)?;
self.queue.tasks.update_task(&mut wtxn, &mut network_task)?;
}
let task = self.queue.register(
&mut wtxn,
&kind,
task_id,
custom_metadata,
dry_run,
task_network.map(DbTaskNetwork::from),
)?;
// If the registered task is a task cancelation
// we inform the processing tasks to stop (if necessary).
if let KindWithContent::TaskCancelation { tasks, .. } = kind {
let tasks_to_cancel = RoaringBitmap::from_iter(tasks);
if self.processing_tasks.read().unwrap().must_cancel_processing_tasks(&tasks_to_cancel)
if self
.runtime_tasks
.read()
.unwrap()
.processing
.must_cancel_processing_tasks(&tasks_to_cancel)
{
self.scheduler.must_stop_processing.must_stop();
}
@@ -757,7 +889,7 @@ impl IndexScheduler {
/// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(&mut self) -> Result<Dump> {
pub fn register_dumped_task(&mut self) -> Result<Dump<'_>> {
Dump::new(self)
}
@@ -806,10 +938,8 @@ impl IndexScheduler {
.queue
.tasks
.get_task(self.rtxn, task_id)
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
.ok_or_else(|| {
io::Error::new(io::ErrorKind::Other, Error::CorruptedTaskQueue)
})?;
.map_err(io::Error::other)?
.ok_or_else(|| io::Error::other(Error::CorruptedTaskQueue))?;
serde_json::to_writer(&mut self.buffer, &TaskView::from_task(&task))?;
self.buffer.push(b'\n');

View File

@@ -6,6 +6,28 @@ use roaring::RoaringBitmap;
use crate::utils::ProcessingBatch;
pub struct RuntimeTasks {
pub processing: ProcessingTasks,
pub enqueued_network: EnqueuedNetworkTasks,
}
impl RuntimeTasks {
pub(crate) fn new() -> Self {
Self { processing: ProcessingTasks::new(), enqueued_network: Default::default() }
}
}
#[derive(Default)]
pub struct EnqueuedNetworkTasks {
tasks: RoaringBitmap,
}
impl EnqueuedNetworkTasks {
pub fn swap(&mut self, mut new: RoaringBitmap) -> RoaringBitmap {
std::mem::swap(&mut self.tasks, &mut new);
new
}
}
#[derive(Clone, Default)]
pub struct ProcessingTasks {
pub batch: Option<Arc<ProcessingBatch>>,
@@ -75,6 +97,7 @@ make_enum_progress! {
pub enum TaskCancelationProgress {
RetrievingTasks,
CancelingUpgrade,
CleaningCompactionLeftover,
UpdatingTasks,
}
}
@@ -138,6 +161,17 @@ make_enum_progress! {
}
}
make_enum_progress! {
pub enum IndexCompaction {
RetrieveTheIndex,
CreateTemporaryFile,
CopyAndCompactTheIndex,
PersistTheCompactedIndex,
CloseTheIndex,
ReopenTheIndex,
}
}
make_enum_progress! {
pub enum InnerSwappingTwoIndexes {
RetrieveTheTasks,

View File

@@ -26,7 +26,7 @@ fn query_batches_from_and_limit() {
handle.advance_n_successful_batches(3);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
let proc = index_scheduler.runtime_tasks.read().unwrap().processing.clone();
let rtxn = index_scheduler.env.read_txn().unwrap();
let query = Query { limit: Some(0), ..Default::default() };
let (batches, _) = index_scheduler
@@ -359,7 +359,7 @@ fn query_batches_special_rules() {
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
let proc = index_scheduler.runtime_tasks.read().unwrap().processing.clone();
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
let (batches, _) = index_scheduler

View File

@@ -15,6 +15,7 @@ use file_store::FileStore;
use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::enterprise_edition::network::DbTaskNetwork;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use time::format_description::well_known::Rfc3339;
@@ -257,7 +258,9 @@ impl Queue {
wtxn: &mut RwTxn,
kind: &KindWithContent,
task_id: Option<TaskId>,
custom_metadata: Option<String>,
dry_run: bool,
network: Option<DbTaskNetwork>,
) -> Result<Task> {
let next_task_id = self.tasks.next_task_id(wtxn)?;
@@ -279,7 +282,8 @@ impl Queue {
details: kind.default_details(),
status: Status::Enqueued,
kind: kind.clone(),
network: None,
network,
custom_metadata,
};
// For deletion and cancelation tasks, we want to make extra sure that they
// don't attempt to delete/cancel tasks that are newer than themselves.
@@ -310,7 +314,8 @@ impl Queue {
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
let to_delete =
RoaringBitmap::from_sorted_iter(finished.into_iter().take(100_000)).unwrap();
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
// the deletion tasks we enqueued ourselves.
@@ -326,7 +331,7 @@ impl Queue {
);
// it's safe to unwrap here because we checked the len above
let newest_task_id = to_delete.iter().last().unwrap();
let newest_task_id = to_delete.iter().next_back().unwrap();
let last_task_to_delete =
self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
@@ -343,7 +348,9 @@ impl Queue {
tasks: to_delete,
},
None,
None,
false,
None,
)?;
Ok(())

View File

@@ -27,7 +27,7 @@ fn query_tasks_from_and_limit() {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
let rtxn = index_scheduler.env.read_txn().unwrap();
let processing = index_scheduler.processing_tasks.read().unwrap();
let processing = &index_scheduler.runtime_tasks.read().unwrap().processing;
let query = Query { limit: Some(0), ..Default::default() };
let (tasks, _) = index_scheduler
.queue
@@ -317,7 +317,7 @@ fn query_tasks_special_rules() {
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let proc = &index_scheduler.runtime_tasks.read().unwrap().processing;
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
let (tasks, _) = index_scheduler
@@ -414,7 +414,7 @@ fn query_tasks_canceled_by() {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
let rtxn = index_scheduler.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let proc = &index_scheduler.runtime_tasks.read().unwrap().processing;
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
let (tasks, _) = index_scheduler
.queue

View File

@@ -203,26 +203,30 @@ fn test_disable_auto_deletion_of_tasks() {
)
.unwrap();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
drop(rtxn);
drop(proc);
{
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = &index_scheduler.runtime_tasks.read().unwrap().processing;
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
}
// now we're above the max number of tasks
// and if we try to advance in the tick function no new task deletion should be enqueued
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
drop(rtxn);
drop(proc);
{
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = &index_scheduler.runtime_tasks.read().unwrap().processing;
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
}
}
#[test]
@@ -267,59 +271,69 @@ fn test_auto_deletion_of_tasks() {
)
.unwrap();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
drop(rtxn);
drop(proc);
{
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = &index_scheduler.runtime_tasks.read().unwrap().processing;
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
}
// now we're above the max number of tasks
// and if we try to advance in the tick function a new task deletion should be enqueued
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
drop(rtxn);
drop(proc);
{
// now we're above the max number of tasks
// and if we try to advance in the tick function a new task deletion should be enqueued
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = &index_scheduler.runtime_tasks.read().unwrap().processing;
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
}
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
drop(rtxn);
drop(proc);
{
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = &index_scheduler.runtime_tasks.read().unwrap().processing;
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
}
handle.advance_one_failed_batch();
// a new task deletion has been enqueued
handle.advance_one_successful_batch();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
drop(rtxn);
drop(proc);
{
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = &index_scheduler.runtime_tasks.read().unwrap().processing;
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
}
handle.advance_one_failed_batch();
handle.advance_one_successful_batch();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
drop(rtxn);
drop(proc);
{
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = &index_scheduler.runtime_tasks.read().unwrap().processing;
let tasks = index_scheduler
.queue
.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc)
.unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
}
}
#[test]

View File

@@ -68,13 +68,15 @@ impl From<KindWithContent> for AutobatchKind {
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
KindWithContent::TaskCancelation { .. }
KindWithContent::IndexCompaction { .. }
| KindWithContent::TaskCancelation { .. }
| KindWithContent::TaskDeletion { .. }
| KindWithContent::DumpCreation { .. }
| KindWithContent::Export { .. }
| KindWithContent::UpgradeDatabase { .. }
| KindWithContent::NetworkTopologyChange(_)
| KindWithContent::SnapshotCreation => {
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
panic!("The autobatcher should never be called with tasks with special priority or that don't apply to an index.")
}
}
}
@@ -287,8 +289,10 @@ impl BatchKind {
};
match (self, autobatch_kind) {
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break((this, BatchStopReason::TaskCannotBeBatched { kind, id })),
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => {
Break((this, BatchStopReason::TaskCannotBeBatched { kind, id }))
},
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
Break((this, BatchStopReason::IndexCreationMismatch { id }))

View File

@@ -4,6 +4,7 @@ use std::io::ErrorKind;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::tasks::enterprise_edition::network::NetworkTopologyState;
use meilisearch_types::tasks::{BatchStopReason, Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use uuid::Uuid;
@@ -55,6 +56,17 @@ pub(crate) enum Batch {
UpgradeDatabase {
tasks: Vec<Task>,
},
IndexCompaction {
index_uid: String,
task: Task,
},
NetworkIndexBatch {
network_task: Task,
inner_batch: Box<Batch>,
},
NetworkWait {
task: Task,
},
}
#[derive(Debug)]
@@ -66,6 +78,7 @@ pub(crate) enum DocumentOperation {
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug)]
#[allow(clippy::large_enum_variant)]
pub(crate) enum IndexOperation {
DocumentOperation {
index_uid: String,
@@ -109,7 +122,8 @@ impl Batch {
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::Export { task }
| Batch::IndexUpdate { task, .. } => {
| Batch::IndexUpdate { task, .. }
| Batch::IndexCompaction { task, .. } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
Batch::SnapshotCreation(tasks)
@@ -134,9 +148,14 @@ impl Batch {
..
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
},
Batch::IndexSwap { task } => {
Batch::IndexSwap { task } | Batch::NetworkWait { task } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
Batch::NetworkIndexBatch { network_task, inner_batch } => {
let mut tasks = inner_batch.ids();
tasks.insert(network_task.uid);
tasks
}
}
}
@@ -150,11 +169,14 @@ impl Batch {
| Dump(_)
| Export { .. }
| UpgradeDatabase { .. }
| NetworkWait { .. }
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. } => Some(index_uid),
| IndexDeletion { index_uid, .. }
| IndexCompaction { index_uid, .. } => Some(index_uid),
NetworkIndexBatch { network_task: _, inner_batch } => inner_batch.index_uid(),
}
}
}
@@ -174,8 +196,11 @@ impl fmt::Display for Batch {
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
Batch::IndexCompaction { .. } => f.write_str("IndexCompaction")?,
Batch::Export { .. } => f.write_str("Export")?,
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
Batch::NetworkIndexBatch { .. } => f.write_str("NetworkTopologyChange")?,
Batch::NetworkWait { .. } => f.write_str("NetworkTopologyChange")?,
};
match index_uid {
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
@@ -452,7 +477,6 @@ impl IndexScheduler {
let mut current_batch = ProcessingBatch::new(batch_id);
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
let count_total_enqueued = enqueued.len();
let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?;
// 0. we get the last task to cancel.
@@ -511,17 +535,41 @@ impl IndexScheduler {
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
}
// 3. we batch the export.
// 3. Check for enqueued network topology changes
let network_changes =
self.queue.tasks.get_kind(rtxn, Kind::NetworkTopologyChange)? & enqueued;
if let Some(task_id) = network_changes.iter().next() {
let task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
return self.start_processing_network(rtxn, task, enqueued, current_batch);
}
// 4. we get the next task to compact
let to_compact = self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)? & enqueued;
if let Some(task_id) = to_compact.min() {
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
current_batch.processing(Some(&mut task));
current_batch.reason(BatchStopReason::TaskCannotBeBatched {
kind: Kind::IndexCompaction,
id: task_id,
});
let index_uid =
task.index_uid().expect("Compaction task must have an index uid").to_owned();
return Ok(Some((Batch::IndexCompaction { index_uid, task }, current_batch)));
}
// 5. we batch the export.
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
if !to_export.is_empty() {
let task_id = to_export.iter().next().expect("There must be at least one export task");
let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
current_batch.processing([&mut task]);
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export });
current_batch
.reason(BatchStopReason::TaskCannotBeBatched { kind: Kind::Export, id: task_id });
return Ok(Some((Batch::Export { task }, current_batch)));
}
// 4. we batch the snapshot.
// 6. we batch the snapshot.
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
if !to_snapshot.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
@@ -531,7 +579,7 @@ impl IndexScheduler {
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
}
// 5. we batch the dumps.
// 7. we batch the dumps.
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
if let Some(to_dump) = to_dump.min() {
let mut task =
@@ -544,8 +592,25 @@ impl IndexScheduler {
return Ok(Some((Batch::Dump(task), current_batch)));
}
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
// 8. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let (batch, current_batch) =
self.create_next_batch_unprioritized(rtxn, &enqueued, current_batch)?;
Ok(batch.map(|batch| (batch, current_batch)))
}
fn create_next_batch_unprioritized(
&self,
rtxn: &RoTxn,
enqueued: &RoaringBitmap,
mut current_batch: ProcessingBatch,
) -> Result<(Option<Batch>, ProcessingBatch)> {
let count_total_enqueued = enqueued.len();
let task_id = if let Some(task_id) = enqueued.min() {
task_id
} else {
return Ok((None, current_batch));
};
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
@@ -562,7 +627,7 @@ impl IndexScheduler {
kind: Kind::IndexSwap,
id: task.uid,
});
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
return Ok((Some(Batch::IndexSwap { task }), current_batch));
};
let index_already_exists = self.index_mapper.exists(rtxn, index_name)?;
@@ -627,7 +692,7 @@ impl IndexScheduler {
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
{
current_batch.reason(autobatch_stop_reason.unwrap_or(stop_reason));
return Ok(self
let batch = self
.create_next_batch_index(
rtxn,
index_name.to_string(),
@@ -635,11 +700,127 @@ impl IndexScheduler {
&mut current_batch,
create_index,
)?
.map(|batch| (batch, current_batch)));
.map(|batch| batch);
return Ok((batch, current_batch));
}
// If we found no tasks then we were notified for something that got autobatched
// somehow and there is nothing to do.
Ok(None)
Ok((None, current_batch))
}
fn start_processing_network(
&self,
rtxn: &RoTxn,
mut task: Task,
enqueued: &RoaringBitmap,
current_batch: ProcessingBatch,
) -> Result<Option<(Batch, ProcessingBatch)>> {
let change_version =
task.network.as_ref().map(|network| network.network_version()).unwrap_or_default();
let KindWithContent::NetworkTopologyChange(network_topology_change) = &task.kind else {
panic!("inconsistent kind with content")
};
match network_topology_change.state() {
NetworkTopologyState::WaitingForOlderTasks => {
let mut old_tasks = RoaringBitmap::new();
for task_id in enqueued {
let task = self
.queue
.tasks
.get_task(rtxn, task_id)?
.ok_or(Error::CorruptedTaskQueue)?;
let has_index = task.index_uid().is_some();
if !has_index {
continue;
}
let has_older_network_version = task
.network
.map(|network| network.network_version() <= change_version)
// if there is no version, we never retain the task
.unwrap_or_default();
if has_older_network_version {
old_tasks.push(task_id);
}
}
let res = self.create_next_batch_unprioritized(rtxn, &old_tasks, current_batch);
self.runtime_tasks.write().unwrap().enqueued_network.swap(old_tasks);
let (batch, mut current_batch) = res?;
current_batch.processing(Some(&mut task));
let batch = match batch {
Some(batch) => {
let inner_batch = Box::new(batch);
Batch::NetworkIndexBatch { network_task: task, inner_batch }
}
None => Batch::NetworkWait { task },
};
Ok(Some((batch, current_batch)))
}
NetworkTopologyState::ImportingDocuments => {
let mut import_tasks = RoaringBitmap::new();
for task_id in enqueued {
let task = self
.queue
.tasks
.get_task(rtxn, task_id)?
.ok_or(Error::CorruptedTaskQueue)?;
let has_index = task.index_uid().is_some();
if !has_index {
continue;
}
let is_import_task = task
.network
.map(|network| {
network.network_version() == change_version
&& network.import_data().is_some()
})
// if there is no version, we never retain the task
.unwrap_or_default();
if is_import_task {
import_tasks.push(task_id);
}
}
let res = self.create_next_batch_unprioritized(rtxn, &import_tasks, current_batch);
self.runtime_tasks.write().unwrap().enqueued_network.swap(import_tasks);
let (batch, mut current_batch) = res?;
current_batch.processing(Some(&mut task));
let batch = match batch {
Some(batch) => {
let inner_batch = Box::new(batch);
Batch::NetworkIndexBatch { network_task: task, inner_batch }
}
None => Batch::NetworkWait { task },
};
Ok(Some((batch, current_batch)))
}
NetworkTopologyState::ExportingDocuments | NetworkTopologyState::Finished => {
Ok(Some((Batch::NetworkWait { task }, current_batch)))
}
}
}
}
pub enum BatchOutcome {
NoTaskToProcess,
Batch { batch: Batch, processing: ProcessingBatch },
}

View File

@@ -25,6 +25,7 @@ use convert_case::{Case, Casing as _};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::milli;
use meilisearch_types::milli::update::S3SnapshotOptions;
use meilisearch_types::tasks::Status;
use process_batch::ProcessBatchInfo;
use rayon::current_num_threads;
@@ -87,11 +88,14 @@ pub struct Scheduler {
/// Snapshot compaction status.
pub(crate) experimental_no_snapshot_compaction: bool,
/// S3 Snapshot options.
pub(crate) s3_snapshot_options: Option<S3SnapshotOptions>,
}
impl Scheduler {
pub(crate) fn private_clone(&self) -> Scheduler {
Scheduler {
pub(crate) fn private_clone(&self) -> Self {
Self {
must_stop_processing: self.must_stop_processing.clone(),
wake_up: self.wake_up.clone(),
autobatching_enabled: self.autobatching_enabled,
@@ -103,23 +107,52 @@ impl Scheduler {
version_file_path: self.version_file_path.clone(),
embedding_cache_cap: self.embedding_cache_cap,
experimental_no_snapshot_compaction: self.experimental_no_snapshot_compaction,
s3_snapshot_options: self.s3_snapshot_options.clone(),
}
}
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
let IndexSchedulerOptions {
version_file_path,
auth_path: _,
tasks_path: _,
update_file_path: _,
indexes_path: _,
snapshots_path,
dumps_path,
cli_webhook_url: _,
cli_webhook_authorization: _,
task_db_size: _,
index_base_map_size: _,
enable_mdb_writemap: _,
index_growth_amount: _,
index_count: _,
indexer_config,
autobatching_enabled,
cleanup_enabled: _,
max_number_of_tasks: _,
max_number_of_batched_tasks,
batched_tasks_size_limit,
instance_features: _,
auto_upgrade: _,
embedding_cache_cap,
experimental_no_snapshot_compaction,
} = options;
Scheduler {
must_stop_processing: MustStopProcessing::default(),
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
wake_up: Arc::new(SignalEvent::auto(true)),
autobatching_enabled: options.autobatching_enabled,
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
batched_tasks_size_limit: options.batched_tasks_size_limit,
dumps_path: options.dumps_path.clone(),
snapshots_path: options.snapshots_path.clone(),
autobatching_enabled: *autobatching_enabled,
max_number_of_batched_tasks: *max_number_of_batched_tasks,
batched_tasks_size_limit: *batched_tasks_size_limit,
dumps_path: dumps_path.clone(),
snapshots_path: snapshots_path.clone(),
auth_env,
version_file_path: options.version_file_path.clone(),
embedding_cache_cap: options.embedding_cache_cap,
experimental_no_snapshot_compaction: options.experimental_no_snapshot_compaction,
version_file_path: version_file_path.clone(),
embedding_cache_cap: *embedding_cache_cap,
experimental_no_snapshot_compaction: *experimental_no_snapshot_compaction,
s3_snapshot_options: indexer_config.s3_snapshot_options.clone(),
}
}
}
@@ -167,9 +200,10 @@ impl IndexScheduler {
// We reset the must_stop flag to be sure that we don't stop processing tasks
self.scheduler.must_stop_processing.reset();
let progress = self
.processing_tasks
.runtime_tasks
.write()
.unwrap()
.processing
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
.start_processing(processing_batch.clone(), ids.clone());
@@ -420,7 +454,7 @@ impl IndexScheduler {
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
// and then become « not found » for some time until the commit everything is written and the final commit is made.
self.processing_tasks.write().unwrap().stop_processing();
self.runtime_tasks.write().unwrap().processing.stop_processing();
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
tracing::debug!("Deleting the update files");

View File

@@ -1,29 +1,45 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::fs::{remove_file, File};
use std::io::{ErrorKind, Seek, SeekFrom};
use std::panic::{catch_unwind, AssertUnwindSafe};
use std::sync::atomic::Ordering;
use std::time::Duration;
use bumpalo::Bump;
use byte_unit::Byte;
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
use meilisearch_types::enterprise_edition::network::Remote;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::documents::PrimaryKey;
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::progress::{EmbedderStats, Progress, VariableNameStep};
use meilisearch_types::milli::update::new::indexer;
use meilisearch_types::milli::update::new::indexer::enterprise_edition::sharding::Shards;
use meilisearch_types::milli::{self, ChannelCongestion};
use meilisearch_types::tasks::enterprise_edition::network::{NetworkTopologyState, Origin};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use milli::update::Settings as MilliSettings;
use roaring::RoaringBitmap;
use tempfile::{PersistError, TempPath};
use time::OffsetDateTime;
use super::create_batch::Batch;
use crate::processing::{
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
UpdateIndexProgress,
IndexCompaction, InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress,
TaskDeletionProgress, UpdateIndexProgress,
};
use crate::scheduler::process_export::{ExportContext, ExportOptions, TargetInstance};
use crate::utils::{
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
ProcessingBatch,
};
use crate::{Error, IndexScheduler, Result, TaskId};
/// The name of the copy of the data.mdb file used during compaction.
const DATA_MDB_COPY_NAME: &str = "data.mdb.cpy";
#[derive(Debug, Default)]
pub struct ProcessBatchInfo {
/// The write channel congestion. None when unavailable: settings update.
@@ -418,6 +434,47 @@ impl IndexScheduler {
task.status = Status::Succeeded;
Ok((vec![task], ProcessBatchInfo::default()))
}
Batch::IndexCompaction { index_uid: _, mut task } => {
let KindWithContent::IndexCompaction { index_uid } = &task.kind else {
unreachable!()
};
let rtxn = self.env.read_txn()?;
let ret = catch_unwind(AssertUnwindSafe(|| {
self.apply_compaction(&rtxn, &progress, index_uid)
}));
let (pre_size, post_size) = match ret {
Ok(Ok(stats)) => stats,
Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask),
Ok(Err(e)) => return Err(e),
Err(e) => {
let msg = match e.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match e.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
return Err(Error::Export(Box::new(Error::ProcessBatchPanicked(
msg.to_string(),
))));
}
};
task.status = Status::Succeeded;
if let Some(Details::IndexCompaction {
index_uid: _,
pre_compaction_size,
post_compaction_size,
}) = task.details.as_mut()
{
*pre_compaction_size = Some(Byte::from_u64(pre_size));
*post_compaction_size = Some(Byte::from_u64(post_size));
}
Ok((vec![task], ProcessBatchInfo::default()))
}
Batch::Export { mut task } => {
let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind
else {
@@ -490,9 +547,295 @@ impl IndexScheduler {
Ok((tasks, ProcessBatchInfo::default()))
}
Batch::NetworkIndexBatch { mut network_task, inner_batch } => {
let (mut tasks, info) =
self.process_batch(*inner_batch, current_batch, progress)?;
let KindWithContent::NetworkTopologyChange(network_topology_change) =
&mut network_task.kind
else {
return Err(Error::CorruptedTaskQueue);
};
for task in &tasks {
let Some(network) = task.network.as_ref() else {
continue;
};
let Some(import) = network.import_data() else {
continue;
};
network_topology_change.process_remote_tasks(
&import.remote_name,
&import.index_name,
import.document_count,
);
}
tasks.push(network_task);
Ok((tasks, info))
}
Batch::NetworkWait { mut task } => {
let KindWithContent::NetworkTopologyChange(network_topology_change) =
&mut task.kind
else {
tracing::error!("network topology change task has the wrong kind with content");
return Err(Error::CorruptedTaskQueue);
};
let Some(task_network) = &task.network else {
tracing::error!("network topology change task has no network");
return Err(Error::CorruptedTaskQueue);
};
let origin;
let origin = match task_network.origin() {
Some(origin) => origin,
None => {
let myself =
network_topology_change.in_name().expect("origin is not the leader");
origin = Origin {
remote_name: myself.to_string(),
task_uid: task.uid,
network_version: task_network.network_version(),
};
&origin
}
};
if let Some((remotes, out_name)) = network_topology_change.export_to_process() {
self.balance_documents(
remotes,
out_name,
network_topology_change.in_name(),
origin,
&progress,
&self.scheduler.must_stop_processing,
)?;
}
network_topology_change.update_state();
if network_topology_change.state() == NetworkTopologyState::Finished {
task.status = Status::Succeeded;
}
Ok((vec![task], Default::default()))
}
}
}
fn balance_documents(
&self,
remotes: &BTreeMap<String, Remote>,
out_name: &str,
in_name: Option<&str>,
network_change_origin: &Origin,
progress: &Progress,
must_stop_processing: &crate::scheduler::MustStopProcessing,
) -> crate::Result<()> {
let new_shards = Shards::from_remotes_local(remotes.keys().map(String::as_str), in_name);
// TECHDEBT: this spawns a `ureq` agent additionally to `reqwest`. We probably want to harmonize all of this.
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
let mut indexer_alloc = Bump::new();
let scheduler_rtxn = self.env.read_txn()?;
let index_count = self.index_mapper.index_count(&scheduler_rtxn)?;
// process by batches of 20MiB. Allow for compression? Don't forget about embeddings
let _: Vec<()> = self.index_mapper.try_for_each_index(
&scheduler_rtxn,
|index_uid, index| -> crate::Result<()> {
indexer_alloc.reset();
let err = |err| Error::from_milli(err, Some(index_uid.to_string()));
let index_rtxn = index.read_txn()?;
let all_docids = index.external_documents_ids();
let mut documents_to_move_to: HashMap<String, RoaringBitmap> = HashMap::new();
let mut documents_to_delete = RoaringBitmap::new();
for res in all_docids.iter(&index_rtxn)? {
let (external_docid, docid) = res?;
match new_shards.processing_shard(external_docid) {
Some(shard) if shard.is_own => continue,
Some(shard) => {
documents_to_move_to
.entry(shard.name.clone())
.or_default()
.insert(docid);
}
None => {
documents_to_delete.insert(docid);
}
}
}
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
for (remote, documents_to_move) in documents_to_move_to {
/// TODO: justify the unwrap
let remote = remotes.get(&remote).unwrap();
let target = TargetInstance {
base_url: &remote.url,
api_key: remote.write_api_key.as_deref(),
};
let options = ExportOptions {
index_uid,
payload_size: None,
override_settings: false,
export_mode: super::process_export::ExportMode::NetworkBalancing {
index_count,
export_old_remote_name: out_name,
network_change_origin,
},
};
let ctx = ExportContext {
index,
index_rtxn: &index_rtxn,
universe: &documents_to_move,
progress,
agent: &agent,
must_stop_processing,
};
self.export_one_index(target, options, ctx)?;
documents_to_delete |= documents_to_move;
}
if documents_to_delete.is_empty() {
return Ok(());
}
let mut new_fields_ids_map = fields_ids_map.clone();
// candidates not empty => index not empty => a primary key is set
let primary_key = index.primary_key(&index_rtxn)?.unwrap();
let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(milli::Error::from)
.map_err(err)?;
let mut index_wtxn = index.write_txn()?;
let mut indexer = indexer::DocumentDeletion::new();
indexer.delete_documents_by_docids(documents_to_delete);
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
let embedders = index
.embedding_configs()
.embedding_configs(&index_wtxn)
.map_err(milli::Error::from)
.map_err(err)?;
let embedders = self.embedders(index_uid.to_string(), embedders)?;
let indexer_config = self.index_mapper.indexer_config();
let pool = &indexer_config.thread_pool;
indexer::index(
&mut index_wtxn,
index,
pool,
indexer_config.grenad_parameters(),
&fields_ids_map,
new_fields_ids_map,
None, // document deletion never changes primary key
&document_changes,
embedders,
&|| must_stop_processing.get(),
&progress,
&EmbedderStats::default(),
)
.map_err(err)?;
index_wtxn.commit()?;
Ok(())
},
)?;
Ok(())
}
fn apply_compaction(
&self,
rtxn: &RoTxn,
progress: &Progress,
index_uid: &str,
) -> Result<(u64, u64)> {
// 1. Verify that the index exists
if !self.index_mapper.index_exists(rtxn, index_uid)? {
return Err(Error::IndexNotFound(index_uid.to_owned()));
}
// 2. We retrieve the index and create a temporary file in the index directory
progress.update_progress(IndexCompaction::RetrieveTheIndex);
let index = self.index_mapper.index(rtxn, index_uid)?;
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
self.index_mapper
.set_currently_updating_index(Some((index_uid.to_string(), index.clone())));
progress.update_progress(IndexCompaction::CreateTemporaryFile);
let src_path = index.path().join("data.mdb");
let pre_size = std::fs::metadata(&src_path)?.len();
let dst_path = TempPath::from_path(index.path().join(DATA_MDB_COPY_NAME));
let file = File::create(&dst_path)?;
let mut file = tempfile::NamedTempFile::from_parts(file, dst_path);
// 3. We copy the index data to the temporary file
progress.update_progress(IndexCompaction::CopyAndCompactTheIndex);
index
.copy_to_file(file.as_file_mut(), CompactionOption::Enabled)
.map_err(|error| Error::Milli { error, index_uid: Some(index_uid.to_string()) })?;
// ...and reset the file position as specified in the documentation
file.seek(SeekFrom::Start(0))?;
// 4. We replace the index data file with the temporary file
progress.update_progress(IndexCompaction::PersistTheCompactedIndex);
match file.persist(src_path) {
Ok(file) => file.sync_all()?,
// TODO see if we have a _resource busy_ error and probably handle this by:
// 1. closing the index, 2. replacing and 3. reopening it
Err(PersistError { error, file: _ }) => return Err(Error::IoError(error)),
};
// 5. Prepare to close the index
progress.update_progress(IndexCompaction::CloseTheIndex);
// unmark that the index is the processing one so we don't keep a handle to it, preventing its closing
self.index_mapper.set_currently_updating_index(None);
self.index_mapper.close_index(rtxn, index_uid)?;
drop(index);
progress.update_progress(IndexCompaction::ReopenTheIndex);
// 6. Reopen the index
// The index will use the compacted data file when being reopened
let index = self.index_mapper.index(rtxn, index_uid)?;
// if the update processed successfully, we're going to store the new
// stats of the index. Since the tasks have already been processed and
// this is a non-critical operation. If it fails, we should not fail
// the entire batch.
let res = || -> Result<_> {
let mut wtxn = self.env.write_txn()?;
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
self.index_mapper.store_stats_of(&mut wtxn, index_uid, &stats)?;
wtxn.commit()?;
Ok(stats.database_size)
}();
let post_size = match res {
Ok(post_size) => post_size,
Err(e) => {
tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
);
0
}
};
Ok((pre_size, post_size))
}
/// Swap the index `lhs` with the index `rhs`.
fn apply_index_swap(
&self,
@@ -576,7 +919,7 @@ impl IndexScheduler {
// 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.queue.tasks.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
let processing_tasks = &self.runtime_tasks.read().unwrap().processing.processing.clone();
let all_task_ids = self.queue.tasks.all_task_ids(wtxn)?;
let mut to_delete_tasks = all_task_ids & matched_tasks;
@@ -780,9 +1123,10 @@ impl IndexScheduler {
let enqueued_tasks = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
// 0. Check if any upgrade task was matched.
// 0. Check if any upgrade or compaction tasks were matched.
// If so, we cancel all the failed or enqueued upgrade tasks.
let upgrade_tasks = &self.queue.tasks.get_kind(rtxn, Kind::UpgradeDatabase)?;
let compaction_tasks = &self.queue.tasks.get_kind(rtxn, Kind::IndexCompaction)?;
let is_canceling_upgrade = !matched_tasks.is_disjoint(upgrade_tasks);
if is_canceling_upgrade {
let failed_tasks = self.queue.tasks.get_status(rtxn, Status::Failed)?;
@@ -847,7 +1191,33 @@ impl IndexScheduler {
}
}
// 3. We now have a list of tasks to cancel, cancel them
// 3. If we are cancelling a compaction task, remove the tempfiles after incomplete compactions
for compaction_task in &tasks_to_cancel & compaction_tasks {
progress.update_progress(TaskCancelationProgress::CleaningCompactionLeftover);
let task = self.queue.tasks.get_task(rtxn, compaction_task)?.unwrap();
let Some(Details::IndexCompaction {
index_uid,
pre_compaction_size: _,
post_compaction_size: _,
}) = task.details
else {
unreachable!("wrong details for compaction task {compaction_task}")
};
let index_path = match self.index_mapper.index_mapping.get(rtxn, &index_uid)? {
Some(index_uuid) => self.index_mapper.index_path(index_uuid),
None => continue,
};
if let Err(e) = remove_file(index_path.join(DATA_MDB_COPY_NAME)) {
match e.kind() {
ErrorKind::NotFound => (),
_ => return Err(Error::IoError(e)),
}
}
}
// 4. We now have a list of tasks to cancel, cancel them
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
progress.update_progress(progress_obj);

View File

@@ -7,6 +7,7 @@ use backoff::ExponentialBackoff;
use byte_unit::Byte;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::error::Code;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
@@ -15,7 +16,11 @@ use meilisearch_types::milli::update::{request_threads, Setting};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
use meilisearch_types::settings::{self, SecretPolicy};
use meilisearch_types::tasks::enterprise_edition::network::{
headers, ImportData, ImportMetadata, Origin,
};
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
use roaring::RoaringBitmap;
use serde::Deserialize;
use ureq::{json, Response};
@@ -50,6 +55,7 @@ impl IndexScheduler {
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
let must_stop_processing = self.scheduler.must_stop_processing.clone();
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
let err = |err| Error::from_milli(err, Some(uid.to_string()));
if must_stop_processing.get() {
return Err(Error::AbortedTask);
}
@@ -61,104 +67,31 @@ impl IndexScheduler {
));
let ExportIndexSettings { filter, override_settings } = export_settings;
let index = self.index(uid)?;
let index_rtxn = index.read_txn()?;
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
// First, check if the index already exists
let url = format!("{base_url}/indexes/{uid}");
let response = retry(&must_stop_processing, || {
let mut request = agent.get(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(Default::default()).map_err(into_backoff_error)
});
let index_exists = match response {
Ok(response) => response.status() == 200,
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
false
}
Err(e) => return Err(e),
};
let primary_key = index
.primary_key(&index_rtxn)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
// Create the index
if !index_exists {
let url = format!("{base_url}/indexes");
retry(&must_stop_processing, || {
let mut request = agent.post(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
// Patch the index primary key
if index_exists && *override_settings {
let url = format!("{base_url}/indexes/{uid}");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
// Send the index settings
if !index_exists || *override_settings {
let mut settings =
settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// Remove the experimental chat setting if not enabled
if self.features().check_chat_completions("exporting chat settings").is_err() {
settings.chat = Setting::NotSet;
}
// Retry logic for sending settings
let url = format!("{base_url}/indexes/{uid}/settings");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = bearer.as_ref() {
request = request.set("Authorization", bearer);
}
request.send_json(settings.clone()).map_err(into_backoff_error)
})?;
}
let filter = filter
.as_ref()
.map(Filter::from_json)
.transpose()
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
.flatten();
let filter_universe = filter
.map(|f| f.evaluate(&index_rtxn, &index))
.transpose()
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let whole_universe = index
.documents_ids(&index_rtxn)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
let filter = filter.as_ref().map(Filter::from_json).transpose().map_err(err)?.flatten();
let filter_universe =
filter.map(|f| f.evaluate(&index_rtxn, &index)).transpose().map_err(err)?;
let whole_universe =
index.documents_ids(&index_rtxn).map_err(milli::Error::from).map_err(err)?;
let universe = filter_universe.unwrap_or(whole_universe);
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// We don't need to keep this one alive as we will
// spawn many threads to process the documents
drop(index_rtxn);
let total_documents = universe.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
progress.update_progress(progress_step);
let target = TargetInstance { base_url, api_key };
let ctx = ExportContext {
index: &index,
index_rtxn: &index_rtxn,
universe: &universe,
progress: &progress,
agent: &agent,
must_stop_processing: &must_stop_processing,
};
let options = ExportOptions {
index_uid: uid,
payload_size,
override_settings: *override_settings,
export_mode: ExportMode::ExportRoute,
};
let total_documents = self.export_one_index(target, options, ctx)?;
output.insert(
IndexUidPattern::new_unchecked(uid.clone()),
@@ -167,155 +100,307 @@ impl IndexScheduler {
matched_documents: Some(total_documents as u64),
},
);
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
let documents_url = format!("{base_url}/indexes/{uid}/documents");
let results = request_threads()
.broadcast(|ctx| {
let index_rtxn = index
.read_txn()
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
let mut buffer = Vec::new();
let mut tmp_buffer = Vec::new();
let mut compressed_buffer = Vec::new();
for (i, docid) in universe.iter().enumerate() {
if i % ctx.num_threads() != ctx.index() {
continue;
}
let document = index
.document(&index_rtxn, docid)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// TODO definitely factorize this code
'inject_vectors: {
let embeddings = index
.embeddings(&index_rtxn, docid)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME)
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(Error::from_milli(
milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
.external_id_of(
&index_rtxn,
std::iter::once(docid),
)
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={docid}")
}
},
value: vectors.clone(),
},
),
Some(uid.to_string()),
));
};
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
),
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors.insert(
embedder_name,
serde_json::to_value(embeddings).unwrap(),
);
}
}
tmp_buffer.clear();
serde_json::to_writer(&mut tmp_buffer, &document)
.map_err(milli::InternalError::from)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
// Make sure we put at least one document in the buffer even
// though we might go above the buffer limit before sending
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
// We compress the documents before sending them
let mut encoder =
GzEncoder::new(&mut compressed_buffer, Compression::default());
encoder
.write_all(&buffer)
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
encoder
.finish()
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
})?;
buffer.clear();
compressed_buffer.clear();
}
buffer.extend_from_slice(&tmp_buffer);
if i > 0 && i % 100 == 0 {
step.fetch_add(100, atomic::Ordering::Relaxed);
}
}
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&buffer).map_err(into_backoff_error)
})?;
Ok(())
})
.map_err(|e| {
Error::from_milli(
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
Some(uid.to_string()),
)
})?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
}
Ok(output)
}
pub(super) fn export_one_index(
&self,
target: TargetInstance<'_>,
options: ExportOptions<'_>,
ctx: ExportContext<'_>,
) -> Result<u64, Error> {
let err = |err| Error::from_milli(err, Some(options.index_uid.to_string()));
let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}"));
let url = format!(
"{base_url}/indexes/{index_uid}",
base_url = target.base_url,
index_uid = options.index_uid
);
let response = retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.get(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(Default::default()).map_err(into_backoff_error)
});
let index_exists = match response {
Ok(response) => response.status() == 200,
Err(Error::FromRemoteWhenExporting { code, .. })
if code == Code::IndexNotFound.name() =>
{
false
}
Err(e) => return Err(e),
};
let primary_key =
ctx.index.primary_key(&ctx.index_rtxn).map_err(milli::Error::from).map_err(err)?;
if !index_exists {
let url = format!("{base_url}/indexes", base_url = target.base_url);
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.post(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "uid": options.index_uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
if index_exists && options.override_settings {
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.patch(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
if !index_exists || options.override_settings {
let mut settings =
settings::settings(&ctx.index, &ctx.index_rtxn, SecretPolicy::RevealSecrets)
.map_err(err)?;
// Remove the experimental chat setting if not enabled
if self.features().check_chat_completions("exporting chat settings").is_err() {
settings.chat = Setting::NotSet;
}
// Retry logic for sending settings
let url = format!(
"{base_url}/indexes/{index_uid}/settings",
base_url = target.base_url,
index_uid = options.index_uid
);
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.patch(&url);
if let Some(bearer) = bearer.as_ref() {
request = request.set("Authorization", bearer);
}
request.send_json(settings.clone()).map_err(into_backoff_error)
})?;
}
let fields_ids_map = ctx.index.fields_ids_map(&ctx.index_rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let total_documents = ctx.universe.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
ctx.progress.update_progress(progress_step);
let limit = options.payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024);
let documents_url = format!(
"{base_url}/indexes/{index_uid}/documents",
base_url = target.base_url,
index_uid = options.index_uid
);
let results = request_threads()
.broadcast(|broadcast| {
let mut task_network = if let ExportMode::NetworkBalancing {
index_count,
export_old_remote_name,
network_change_origin,
} = options.export_mode
{
Some((
ImportData {
remote_name: export_old_remote_name.to_string(),
index_name: options.index_uid.to_string(),
document_count: 0,
},
network_change_origin.clone(),
ImportMetadata {
index_count,
task_key: 0,
total_index_documents: ctx.universe.len(),
},
))
} else {
None
};
let index_rtxn = ctx.index.read_txn().map_err(milli::Error::from).map_err(err)?;
let mut buffer = Vec::new();
let mut tmp_buffer = Vec::new();
let mut compressed_buffer = Vec::new();
for (i, docid) in ctx.universe.iter().enumerate() {
if i % broadcast.num_threads() != broadcast.index() {
continue;
}
if let Some((import_data, _, metadata)) = &mut task_network {
import_data.document_count += 1;
metadata.task_key = docid;
}
let document = ctx.index.document(&index_rtxn, docid).map_err(err)?;
let mut document =
obkv_to_json(&all_fields, &fields_ids_map, document).map_err(err)?;
// TODO definitely factorize this code
'inject_vectors: {
let embeddings = ctx.index.embeddings(&index_rtxn, docid).map_err(err)?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME)
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(err(milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = ctx
.index
.external_id_of(&index_rtxn, std::iter::once(docid))
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={docid}")
}
},
value: vectors.clone(),
},
)));
};
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
}
}
tmp_buffer.clear();
serde_json::to_writer(&mut tmp_buffer, &document)
.map_err(milli::InternalError::from)
.map_err(milli::Error::from)
.map_err(err)?;
// Make sure we put at least one document in the buffer even
// though we might go above the buffer limit before sending
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
// We compress the documents before sending them
let mut encoder =
GzEncoder::new(&mut compressed_buffer, Compression::default());
encoder.write_all(&buffer).map_err(milli::Error::from).map_err(err)?;
encoder.finish().map_err(milli::Error::from).map_err(err)?;
match retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
if let Some((import_data, origin, metadata)) = &task_network {
request = set_network_ureq_headers(
request,
import_data,
origin,
metadata,
);
}
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
}) {
Ok(_response) => {}
Err(Error::FromRemoteWhenExporting { code, .. })
if code == Code::ImportTaskAlreadyReceived.name() =>
{
continue;
}
Err(Error::FromRemoteWhenExporting { code, message, .. })
if code == Code::ImportTaskUnknownRemote.name() =>
{
tracing::warn!("remote answered with: {message}");
break;
}
Err(Error::FromRemoteWhenExporting { code, message, .. })
if code == Code::ImportTaskWithoutNetworkTask.name() =>
{
tracing::warn!("remote answered with: {message}");
break;
}
Err(e) => return Err(e),
}
buffer.clear();
compressed_buffer.clear();
if let Some((import_data, _, metadata)) = &mut task_network {
import_data.document_count = 0;
metadata.task_key = 0;
}
}
buffer.extend_from_slice(&tmp_buffer);
if i > 0 && i % 100 == 0 {
step.fetch_add(100, atomic::Ordering::Relaxed);
}
}
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
if let Some((import_data, origin, metadata)) = &task_network {
request = set_network_ureq_headers(request, import_data, origin, metadata);
}
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&buffer).map_err(into_backoff_error)
})?;
Ok(())
})
.map_err(|e| err(milli::Error::InternalError(InternalError::PanicInThreadPool(e))))?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
Ok(total_documents as u64)
}
}
fn set_network_ureq_headers(
request: ureq::Request,
import_data: &ImportData,
origin: &Origin,
metadata: &ImportMetadata,
) -> ureq::Request {
request
.set(headers::PROXY_ORIGIN_REMOTE_HEADER, &origin.remote_name)
.set(headers::PROXY_ORIGIN_TASK_UID_HEADER, &origin.task_uid.to_string())
.set(
headers::PROXY_ORIGIN_NETWORK_VERSION_HEADER,
&origin.network_version.to_u128_le().to_string(),
)
.set(headers::PROXY_IMPORT_REMOTE_HEADER, &import_data.remote_name)
.set(headers::PROXY_IMPORT_INDEX_HEADER, &import_data.index_name)
.set(headers::PROXY_IMPORT_TASK_KEY_HEADER, &metadata.task_key.to_string())
.set(headers::PROXY_IMPORT_DOCS_HEADER, &import_data.document_count.to_string())
.set(headers::PROXY_IMPORT_INDEX_COUNT_HEADER, &metadata.index_count.to_string())
.set(
headers::PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
&metadata.total_index_documents.to_string(),
)
}
fn retry<F>(must_stop_processing: &MustStopProcessing, send_request: F) -> Result<ureq::Response>
@@ -370,8 +455,41 @@ fn ureq_error_into_error(error: ureq::Error) -> Error {
}
Err(e) => e.into(),
},
ureq::Error::Transport(transport) => io::Error::new(io::ErrorKind::Other, transport).into(),
ureq::Error::Transport(transport) => io::Error::other(transport).into(),
}
}
// export_one_index arguments
pub(super) struct TargetInstance<'a> {
pub(super) base_url: &'a str,
pub(super) api_key: Option<&'a str>,
}
pub(super) struct ExportOptions<'a> {
pub(super) index_uid: &'a str,
pub(super) payload_size: Option<&'a Byte>,
pub(super) override_settings: bool,
pub(super) export_mode: ExportMode<'a>,
}
pub(super) struct ExportContext<'a> {
pub(super) index: &'a meilisearch_types::milli::Index,
pub(super) index_rtxn: &'a milli::heed::RoTxn<'a>,
pub(super) universe: &'a RoaringBitmap,
pub(super) progress: &'a Progress,
pub(super) agent: &'a ureq::Agent,
pub(super) must_stop_processing: &'a MustStopProcessing,
}
pub(super) enum ExportMode<'a> {
ExportRoute,
NetworkBalancing {
index_count: u64,
export_old_remote_name: &'a str,
network_change_origin: &'a Origin,
},
}
// progress related
enum ExportIndex {}

View File

@@ -12,6 +12,8 @@ use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
use crate::queue::TaskQueue;
use crate::{Error, IndexScheduler, Result};
const UPDATE_FILES_DIR_NAME: &str = "update_files";
/// # Safety
///
/// See [`EnvOpenOptions::open`].
@@ -78,10 +80,32 @@ impl IndexScheduler {
pub(super) fn process_snapshot(
&self,
progress: Progress,
mut tasks: Vec<Task>,
tasks: Vec<Task>,
) -> Result<Vec<Task>> {
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
match self.scheduler.s3_snapshot_options.clone() {
Some(options) => {
#[cfg(not(unix))]
{
let _ = options;
panic!("Non-unix platform does not support S3 snapshotting");
}
#[cfg(unix)]
self.runtime
.as_ref()
.expect("Runtime not initialized")
.block_on(self.process_snapshot_to_s3(progress, options, tasks))
}
None => self.process_snapshots_to_disk(progress, tasks),
}
}
fn process_snapshots_to_disk(
&self,
progress: Progress,
mut tasks: Vec<Task>,
) -> Result<Vec<Task>, Error> {
fs::create_dir_all(&self.scheduler.snapshots_path)?;
let temp_snapshot_dir = tempfile::tempdir()?;
@@ -128,7 +152,7 @@ impl IndexScheduler {
let rtxn = self.env.read_txn()?;
// 2.4 Create the update files directory
let update_files_dir = temp_snapshot_dir.path().join("update_files");
let update_files_dir = temp_snapshot_dir.path().join(UPDATE_FILES_DIR_NAME);
fs::create_dir_all(&update_files_dir)?;
// 2.5 Only copy the update files of the enqueued tasks
@@ -140,7 +164,7 @@ impl IndexScheduler {
let task =
self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
if let Some(content_uuid) = task.content_uuid() {
let src = self.queue.file_store.get_update_path(content_uuid);
let src = self.queue.file_store.update_path(content_uuid);
let dst = update_files_dir.join(content_uuid.to_string());
fs::copy(src, dst)?;
}
@@ -206,4 +230,403 @@ impl IndexScheduler {
Ok(tasks)
}
#[cfg(unix)]
pub(super) async fn process_snapshot_to_s3(
&self,
progress: Progress,
opts: meilisearch_types::milli::update::S3SnapshotOptions,
mut tasks: Vec<Task>,
) -> Result<Vec<Task>> {
use meilisearch_types::milli::update::S3SnapshotOptions;
let S3SnapshotOptions {
s3_bucket_url,
s3_bucket_region,
s3_bucket_name,
s3_snapshot_prefix,
s3_access_key,
s3_secret_key,
s3_max_in_flight_parts,
s3_compression_level: level,
s3_signature_duration,
s3_multipart_part_size,
} = opts;
let must_stop_processing = self.scheduler.must_stop_processing.clone();
let retry_backoff = backoff::ExponentialBackoff::default();
let db_name = {
let mut base_path = self.env.path().to_owned();
base_path.pop();
base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms").to_string()
};
let (reader, writer) = std::io::pipe()?;
let uploader_task = tokio::spawn(multipart_stream_to_s3(
s3_bucket_url,
s3_bucket_region,
s3_bucket_name,
s3_snapshot_prefix,
s3_access_key,
s3_secret_key,
s3_max_in_flight_parts,
s3_signature_duration,
s3_multipart_part_size,
must_stop_processing,
retry_backoff,
db_name,
reader,
));
let index_scheduler = IndexScheduler::private_clone(self);
let builder_task = tokio::task::spawn_blocking(move || {
stream_tarball_into_pipe(progress, level, writer, index_scheduler)
});
let (uploader_result, builder_result) = tokio::join!(uploader_task, builder_task);
// Check uploader result first to early return on task abortion.
// safety: JoinHandle can return an error if the task was aborted, cancelled, or panicked.
uploader_result.unwrap()?;
builder_result.unwrap()?;
for task in &mut tasks {
task.status = Status::Succeeded;
}
Ok(tasks)
}
}
/// Streams a tarball of the database content into a pipe.
#[cfg(unix)]
fn stream_tarball_into_pipe(
progress: Progress,
level: u32,
writer: std::io::PipeWriter,
index_scheduler: IndexScheduler,
) -> std::result::Result<(), Error> {
use std::io::Write as _;
use std::path::Path;
let writer = flate2::write::GzEncoder::new(writer, flate2::Compression::new(level));
let mut tarball = tar::Builder::new(writer);
// 1. Snapshot the version file
tarball
.append_path_with_name(&index_scheduler.scheduler.version_file_path, VERSION_FILE_NAME)?;
// 2. Snapshot the index scheduler LMDB env
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
let tasks_env_file = index_scheduler.env.try_clone_inner_file()?;
let path = Path::new("tasks").join("data.mdb");
append_file_to_tarball(&mut tarball, path, tasks_env_file)?;
// 2.3 Create a read transaction on the index-scheduler
let rtxn = index_scheduler.env.read_txn()?;
// 2.4 Create the update files directory
// And only copy the update files of the enqueued tasks
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
let enqueued = index_scheduler.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
progress.update_progress(update_file_progress);
// We create the update_files directory so that it
// always exists even if there are no update files
let update_files_dir = Path::new(UPDATE_FILES_DIR_NAME);
let src_update_files_dir = {
let mut path = index_scheduler.env.path().to_path_buf();
path.pop();
path.join(UPDATE_FILES_DIR_NAME)
};
tarball.append_dir(update_files_dir, src_update_files_dir)?;
for task_id in enqueued {
let task = index_scheduler
.queue
.tasks
.get_task(&rtxn, task_id)?
.ok_or(Error::CorruptedTaskQueue)?;
if let Some(content_uuid) = task.content_uuid() {
use std::fs::File;
let src = index_scheduler.queue.file_store.update_path(content_uuid);
let mut update_file = File::open(src)?;
let path = update_files_dir.join(content_uuid.to_string());
tarball.append_file(path, &mut update_file)?;
}
atomic.fetch_add(1, Ordering::Relaxed);
}
// 3. Snapshot every indexes
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
let index_mapping = index_scheduler.index_mapper.index_mapping;
let nb_indexes = index_mapping.len(&rtxn)? as u32;
let indexes_dir = Path::new("indexes");
let indexes_references: Vec<_> = index_scheduler
.index_mapper
.index_mapping
.iter(&rtxn)?
.map(|res| res.map_err(Error::from).map(|(name, uuid)| (name.to_string(), uuid)))
.collect::<Result<_, Error>>()?;
// It's prettier to use a for loop instead of the IndexMapper::try_for_each_index
// method, especially when we need to access the UUID, local path and index number.
for (i, (name, uuid)) in indexes_references.into_iter().enumerate() {
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
&name, i as u32, nb_indexes,
));
let path = indexes_dir.join(uuid.to_string()).join("data.mdb");
let index = index_scheduler.index_mapper.index(&rtxn, &name)?;
let index_file = index.try_clone_inner_file()?;
tracing::trace!("Appending index file for {name} in {}", path.display());
append_file_to_tarball(&mut tarball, path, index_file)?;
}
drop(rtxn);
// 4. Snapshot the auth LMDB env
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
let auth_env_file = index_scheduler.scheduler.auth_env.try_clone_inner_file()?;
let path = Path::new("auth").join("data.mdb");
append_file_to_tarball(&mut tarball, path, auth_env_file)?;
let mut gzencoder = tarball.into_inner()?;
gzencoder.flush()?;
gzencoder.try_finish()?;
let mut writer = gzencoder.finish()?;
writer.flush()?;
Result::<_, Error>::Ok(())
}
#[cfg(unix)]
fn append_file_to_tarball<W, P>(
tarball: &mut tar::Builder<W>,
path: P,
mut auth_env_file: fs::File,
) -> Result<(), Error>
where
W: std::io::Write,
P: AsRef<std::path::Path>,
{
use std::io::{Seek as _, SeekFrom};
// Note: A previous snapshot operation may have left the cursor
// at the end of the file so we need to seek to the start.
auth_env_file.seek(SeekFrom::Start(0))?;
tarball.append_file(path, &mut auth_env_file)?;
Ok(())
}
/// Streams the content read from the given reader to S3.
#[cfg(unix)]
#[allow(clippy::too_many_arguments)]
async fn multipart_stream_to_s3(
s3_bucket_url: String,
s3_bucket_region: String,
s3_bucket_name: String,
s3_snapshot_prefix: String,
s3_access_key: String,
s3_secret_key: String,
s3_max_in_flight_parts: std::num::NonZero<usize>,
s3_signature_duration: std::time::Duration,
s3_multipart_part_size: u64,
must_stop_processing: super::MustStopProcessing,
retry_backoff: backoff::exponential::ExponentialBackoff<backoff::SystemClock>,
db_name: String,
reader: std::io::PipeReader,
) -> Result<(), Error> {
use std::{collections::VecDeque, os::fd::OwnedFd, path::PathBuf};
use bytes::{Bytes, BytesMut};
use reqwest::{Client, Response};
use rusty_s3::S3Action as _;
use rusty_s3::{actions::CreateMultipartUpload, Bucket, BucketError, Credentials, UrlStyle};
use tokio::task::JoinHandle;
let reader = OwnedFd::from(reader);
let reader = tokio::net::unix::pipe::Receiver::from_owned_fd(reader)?;
let s3_snapshot_prefix = PathBuf::from(s3_snapshot_prefix);
let url =
s3_bucket_url.parse().map_err(BucketError::ParseError).map_err(Error::S3BucketError)?;
let bucket = Bucket::new(url, UrlStyle::Path, s3_bucket_name, s3_bucket_region)
.map_err(Error::S3BucketError)?;
let credential = Credentials::new(s3_access_key, s3_secret_key);
// Note for the future (rust 1.91+): use with_added_extension, it's prettier
let object_path = s3_snapshot_prefix.join(format!("{db_name}.snapshot"));
// Note: It doesn't work on Windows and if a port to this platform is needed,
// use the slash-path crate or similar to get the correct path separator.
let object = object_path.display().to_string();
let action = bucket.create_multipart_upload(Some(&credential), &object);
let url = action.sign(s3_signature_duration);
let client = Client::new();
let resp = client.post(url).send().await.map_err(Error::S3HttpError)?;
let status = resp.status();
let body = match resp.error_for_status_ref() {
Ok(_) => resp.text().await.map_err(Error::S3HttpError)?,
Err(_) => {
return Err(Error::S3Error { status, body: resp.text().await.unwrap_or_default() })
}
};
let multipart =
CreateMultipartUpload::parse_response(&body).map_err(|e| Error::S3XmlError(Box::new(e)))?;
tracing::debug!("Starting the upload of the snapshot to {object}");
// We use this bumpalo for etags strings.
let bump = bumpalo::Bump::new();
let mut etags = Vec::<&str>::new();
let mut in_flight = VecDeque::<(JoinHandle<reqwest::Result<Response>>, Bytes)>::with_capacity(
s3_max_in_flight_parts.get(),
);
// Part numbers start at 1 and cannot be larger than 10k
for part_number in 1u16.. {
if must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let part_upload =
bucket.upload_part(Some(&credential), &object, part_number, multipart.upload_id());
let url = part_upload.sign(s3_signature_duration);
// Wait for a buffer to be ready if there are in-flight parts that landed
let mut buffer = if in_flight.len() >= s3_max_in_flight_parts.get() {
let (handle, buffer) = in_flight.pop_front().expect("At least one in flight request");
let resp = join_and_map_error(handle).await?;
extract_and_append_etag(&bump, &mut etags, resp.headers())?;
let mut buffer = match buffer.try_into_mut() {
Ok(buffer) => buffer,
Err(_) => unreachable!("All bytes references were consumed in the task"),
};
buffer.clear();
buffer
} else {
BytesMut::with_capacity(s3_multipart_part_size as usize)
};
// If we successfully read enough bytes,
// we can continue and send the buffer/part
while buffer.len() < (s3_multipart_part_size as usize / 2) {
// Wait for the pipe to be readable
use std::io;
reader.readable().await?;
match reader.try_read_buf(&mut buffer) {
Ok(0) => break,
// We read some bytes but maybe not enough
Ok(_) => continue,
// The readiness event is a false positive.
Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => continue,
Err(e) => return Err(e.into()),
}
}
if buffer.is_empty() {
// Break the loop if the buffer is
// empty after we tried to read bytes
break;
}
let body = buffer.freeze();
tracing::trace!("Sending part {part_number}");
let task = tokio::spawn({
let client = client.clone();
let body = body.clone();
backoff::future::retry(retry_backoff.clone(), move || {
let client = client.clone();
let url = url.clone();
let body = body.clone();
async move {
match client.put(url).body(body).send().await {
Ok(resp) if resp.status().is_client_error() => {
resp.error_for_status().map_err(backoff::Error::Permanent)
}
Ok(resp) => Ok(resp),
Err(e) => Err(backoff::Error::transient(e)),
}
}
})
});
in_flight.push_back((task, body));
}
for (handle, _buffer) in in_flight {
let resp = join_and_map_error(handle).await?;
extract_and_append_etag(&bump, &mut etags, resp.headers())?;
}
tracing::debug!("Finalizing the multipart upload");
let action = bucket.complete_multipart_upload(
Some(&credential),
&object,
multipart.upload_id(),
etags.iter().map(AsRef::as_ref),
);
let url = action.sign(s3_signature_duration);
let body = action.body();
let resp = backoff::future::retry(retry_backoff, move || {
let client = client.clone();
let url = url.clone();
let body = body.clone();
async move {
match client.post(url).body(body).send().await {
Ok(resp) if resp.status().is_client_error() => {
resp.error_for_status().map_err(backoff::Error::Permanent)
}
Ok(resp) => Ok(resp),
Err(e) => Err(backoff::Error::transient(e)),
}
}
})
.await
.map_err(Error::S3HttpError)?;
let status = resp.status();
let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;
if status.is_success() {
Ok(())
} else {
Err(Error::S3Error { status, body })
}
}
#[cfg(unix)]
async fn join_and_map_error(
join_handle: tokio::task::JoinHandle<Result<reqwest::Response, reqwest::Error>>,
) -> Result<reqwest::Response> {
// safety: Panic happens if the task (JoinHandle) was aborted, cancelled, or panicked
let request = join_handle.await.unwrap();
let resp = request.map_err(Error::S3HttpError)?;
match resp.error_for_status_ref() {
Ok(_) => Ok(resp),
Err(_) => Err(Error::S3Error {
status: resp.status(),
body: resp.text().await.unwrap_or_default(),
}),
}
}
#[cfg(unix)]
fn extract_and_append_etag<'b>(
bump: &'b bumpalo::Bump,
etags: &mut Vec<&'b str>,
headers: &reqwest::header::HeaderMap,
) -> Result<()> {
use reqwest::header::ETAG;
let etag = headers.get(ETAG).ok_or_else(|| Error::S3XmlError("Missing ETag header".into()))?;
let etag = etag.to_str().map_err(|e| Error::S3XmlError(Box::new(e)))?;
etags.push(bump.alloc_str(etag));
Ok(())
}

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 26, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.26.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -722,7 +722,7 @@ fn basic_get_stats() {
let kind = index_creation_task("whalo", "fish");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
{
"indexes": {
"catto": 1,
@@ -742,6 +742,7 @@ fn basic_get_stats() {
"documentEdition": 0,
"dumpCreation": 0,
"export": 0,
"indexCompaction": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
@@ -753,10 +754,10 @@ fn basic_get_stats() {
"upgradeDatabase": 0
}
}
"#);
"###);
handle.advance_till([Start, BatchCreated]);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
{
"indexes": {
"catto": 1,
@@ -776,6 +777,7 @@ fn basic_get_stats() {
"documentEdition": 0,
"dumpCreation": 0,
"export": 0,
"indexCompaction": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
@@ -787,7 +789,7 @@ fn basic_get_stats() {
"upgradeDatabase": 0
}
}
"#);
"###);
handle.advance_till([
InsideProcessBatch,
@@ -797,7 +799,7 @@ fn basic_get_stats() {
Start,
BatchCreated,
]);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
{
"indexes": {
"catto": 1,
@@ -817,6 +819,7 @@ fn basic_get_stats() {
"documentEdition": 0,
"dumpCreation": 0,
"export": 0,
"indexCompaction": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
@@ -828,7 +831,7 @@ fn basic_get_stats() {
"upgradeDatabase": 0
}
}
"#);
"###);
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
handle.advance_till([
@@ -839,7 +842,7 @@ fn basic_get_stats() {
Start,
BatchCreated,
]);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
{
"indexes": {
"catto": 1,
@@ -859,6 +862,7 @@ fn basic_get_stats() {
"documentEdition": 0,
"dumpCreation": 0,
"export": 0,
"indexCompaction": 0,
"indexCreation": 3,
"indexDeletion": 0,
"indexSwap": 0,
@@ -870,7 +874,7 @@ fn basic_get_stats() {
"upgradeDatabase": 0
}
}
"#);
"###);
}
#[test]

View File

@@ -126,7 +126,7 @@ impl IndexScheduler {
std::fs::create_dir_all(&options.auth_path).unwrap();
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
let index_scheduler =
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
Self::new_test(options, auth_env, version, None, sender, planned_failures).unwrap();
// To be 100% consistent between all test we're going to start the scheduler right now
// and ensure it's in the expected starting state.

View File

@@ -45,6 +45,11 @@ pub fn upgrade_index_scheduler(
(1, 19, _) => 0,
(1, 20, _) => 0,
(1, 21, _) => 0,
(1, 22, _) => 0,
(1, 23, _) => 0,
(1, 24, _) => 0,
(1, 25, _) => 0,
(1, 26, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)
@@ -95,6 +100,7 @@ pub fn upgrade_index_scheduler(
status: Status::Enqueued,
kind: KindWithContent::UpgradeDatabase { from },
network: None,
custom_metadata: None,
},
)?;
wtxn.commit()?;

View File

@@ -256,14 +256,15 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
use KindWithContent as K;
let mut index_uids = vec![];
match &mut task.kind {
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
K::DocumentEdition { index_uid, .. } => index_uids.push(index_uid),
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
K::DocumentClear { index_uid } => index_uids.push(index_uid),
K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid),
K::IndexDeletion { index_uid } => index_uids.push(index_uid),
K::IndexCreation { index_uid, .. } => index_uids.push(index_uid),
K::DocumentAdditionOrUpdate { index_uid, .. }
| K::DocumentEdition { index_uid, .. }
| K::DocumentDeletion { index_uid, .. }
| K::DocumentDeletionByFilter { index_uid, .. }
| K::DocumentClear { index_uid }
| K::SettingsUpdate { index_uid, .. }
| K::IndexDeletion { index_uid }
| K::IndexCreation { index_uid, .. }
| K::IndexCompaction { index_uid, .. } => index_uids.push(index_uid),
K::IndexUpdate { index_uid, new_index_uid, .. } => {
index_uids.push(index_uid);
if let Some(new_uid) = new_index_uid {
@@ -285,6 +286,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
| K::DumpCreation { .. }
| K::Export { .. }
| K::UpgradeDatabase { .. }
| K::NetworkTopologyChange(_)
| K::SnapshotCreation => (),
};
if let Some(Details::IndexSwap { swaps }) = &mut task.details {
@@ -378,6 +380,7 @@ impl crate::IndexScheduler {
status,
kind,
network: _,
custom_metadata: _,
} = task;
assert_eq!(uid, task.uid);
if task.status != Status::Enqueued {
@@ -618,6 +621,20 @@ impl crate::IndexScheduler {
Details::UpgradeDatabase { from: _, to: _ } => {
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
}
Details::IndexCompaction {
index_uid: _,
pre_compaction_size: _,
post_compaction_size: _,
} => {
assert_eq!(kind.as_kind(), Kind::IndexCompaction);
}
Details::NetworkTopologyChange {
moved_documents: _,
received_documents: _,
message: _,
} => {
assert_eq!(kind.as_kind(), Kind::NetworkTopologyChange);
}
}
}

View File

@@ -17,7 +17,7 @@ impl<'a> BytesDecode<'a> for UuidCodec {
impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
}
}

View File

@@ -271,9 +271,10 @@ macro_rules! json_string {
#[cfg(test)]
mod tests {
use uuid::Uuid;
use crate as meili_snap;
use crate::UUID_IN_MESSAGE_RE;
use uuid::Uuid;
#[test]
fn snap() {

View File

@@ -109,6 +109,7 @@ impl HeedAuthStore {
Action::IndexesGet,
Action::IndexesUpdate,
Action::IndexesSwap,
Action::IndexesCompact,
]
.iter(),
);
@@ -315,7 +316,9 @@ impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
fn bytes_encode(
(key_id, action, index): &'_ Self::EItem,
) -> StdResult<Cow<'_, [u8]>, BoxedError> {
let mut bytes = Vec::new();
bytes.extend_from_slice(key_id.as_bytes());

View File

@@ -7,6 +7,7 @@ use std::collections::BTreeMap;
use milli::update::new::indexer::enterprise_edition::sharding::Shards;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
@@ -16,20 +17,18 @@ pub struct Network {
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
#[serde(default)]
pub sharding: bool,
pub leader: Option<String>,
#[serde(default)]
pub version: Uuid,
}
impl Network {
pub fn shards(&self) -> Option<Shards> {
if self.sharding {
let this = self.local.as_deref().expect("Inconsistent `sharding` and `self`");
let others = self
.remotes
.keys()
.filter(|name| name.as_str() != this)
.map(|name| name.to_owned())
.collect();
Some(Shards { own: vec![this.to_owned()], others })
if self.leader.is_some() {
Some(Shards::from_remotes_local(
self.remotes.keys().map(String::as_str),
self.local.as_deref(),
))
} else {
None
}

View File

@@ -5,6 +5,7 @@ use actix_web::{self as aweb, HttpResponseBuilder};
use aweb::http::header;
use aweb::rt::task::JoinError;
use convert_case::Casing;
use milli::cellulite;
use milli::heed::{Error as HeedError, MdbError};
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
@@ -155,7 +156,7 @@ macro_rules! make_error_codes {
}
/// return error name, used as error code
fn name(&self) -> String {
pub fn name(&self) -> String {
match self {
$(
Code::$code_ident => stringify!($code_ident).to_case(convert_case::Case::Snake)
@@ -213,6 +214,9 @@ ImmutableApiKeyUid , InvalidRequest , BAD_REQU
ImmutableApiKeyUpdatedAt , InvalidRequest , BAD_REQUEST;
ImmutableIndexCreatedAt , InvalidRequest , BAD_REQUEST;
ImmutableIndexUpdatedAt , InvalidRequest , BAD_REQUEST;
ImportTaskAlreadyReceived , InvalidRequest , PRECONDITION_FAILED;
ImportTaskUnknownRemote , InvalidRequest , PRECONDITION_FAILED;
ImportTaskWithoutNetworkTask , InvalidRequest , SERVICE_UNAVAILABLE;
IndexAlreadyExists , InvalidRequest , CONFLICT ;
IndexCreationFailed , Internal , INTERNAL_SERVER_ERROR;
IndexNotFound , InvalidRequest , NOT_FOUND;
@@ -239,6 +243,7 @@ InconsistentDocumentChangeHeaders , InvalidRequest , BAD_REQU
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeojsonField , InvalidRequest , BAD_REQUEST ;
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
@@ -252,10 +257,12 @@ InvalidSearchHybridQuery , InvalidRequest , BAD_REQU
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
InvalidIndexCustomMetadata , InvalidRequest , BAD_REQUEST ;
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryPersonalization , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ;
@@ -266,9 +273,9 @@ InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQU
InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
InvalidNetworkLeader , InvalidRequest , BAD_REQUEST ;
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSharding , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
InvalidNetworkWriteApiKey , InvalidRequest , BAD_REQUEST ;
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
@@ -313,6 +320,8 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
InvalidSearchPersonalize , InvalidRequest , BAD_REQUEST ;
InvalidSearchPersonalizeUserContext , InvalidRequest , BAD_REQUEST ;
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
@@ -371,7 +380,9 @@ MissingPayload , InvalidRequest , BAD_REQU
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
NetworkVersionMismatch , InvalidRequest , PRECONDITION_FAILED ;
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
NotLeader , InvalidRequest , BAD_REQUEST ;
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
RemoteBadResponse , System , BAD_GATEWAY ;
RemoteBadRequest , InvalidRequest , BAD_REQUEST ;
@@ -385,9 +396,13 @@ TaskFileNotFound , InvalidRequest , NOT_FOUN
BatchNotFound , InvalidRequest , NOT_FOUND ;
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
TooManyVectors , InvalidRequest , BAD_REQUEST ;
UnexpectedNetworkPreviousRemotes , InvalidRequest , BAD_REQUEST ;
UnretrievableDocument , Internal , BAD_REQUEST ;
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
InvalidS3SnapshotRequest , Internal , BAD_REQUEST ;
InvalidS3SnapshotParameters , Internal , BAD_REQUEST ;
S3SnapshotServerError , Internal , BAD_GATEWAY ;
// Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
@@ -501,7 +516,9 @@ impl ErrorCode for milli::Error {
Code::InvalidFacetSearchFacetName
}
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidGeoField { .. } | UserError::GeoJsonError(_) => {
Code::InvalidDocumentGeoField
}
UserError::InvalidVectorDimensions { .. }
| UserError::InvalidIndexingVectorDimensions { .. } => {
Code::InvalidVectorDimensions
@@ -525,6 +542,17 @@ impl ErrorCode for milli::Error {
| UserError::DocumentEditionCompilationError(_) => {
Code::EditDocumentsByFunctionError
}
UserError::CelluliteError(err) => match err {
cellulite::Error::BuildCanceled
| cellulite::Error::VersionMismatchOnBuild(_)
| cellulite::Error::DatabaseDoesntExists
| cellulite::Error::Heed(_)
| cellulite::Error::InvalidGeometry(_)
| cellulite::Error::InternalDocIdMissing(_, _)
| cellulite::Error::CannotConvertLineToCell(_, _, _) => Code::Internal,
cellulite::Error::InvalidGeoJson(_) => Code::InvalidDocumentGeojsonField,
},
UserError::MalformedGeojson(_) => Code::InvalidDocumentGeojsonField,
}
}
}
@@ -664,6 +692,18 @@ impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
}
}
impl fmt::Display for deserr_codes::InvalidSearchPersonalize {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `personalize` is invalid, expected a JSON object with `userContext` string.")
}
}
impl fmt::Display for deserr_codes::InvalidSearchPersonalizeUserContext {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `userContext` is invalid, expected a string.")
}
}
#[macro_export]
macro_rules! internal_error {
($target:ty : $($other:path), *) => {

View File

@@ -380,6 +380,9 @@ pub enum Action {
#[serde(rename = "webhooks.*")]
#[deserr(rename = "webhooks.*")]
WebhooksAll,
#[serde(rename = "indexes.compact")]
#[deserr(rename = "indexes.compact")]
IndexesCompact,
}
impl Action {
@@ -398,6 +401,7 @@ impl Action {
INDEXES_UPDATE => Some(Self::IndexesUpdate),
INDEXES_DELETE => Some(Self::IndexesDelete),
INDEXES_SWAP => Some(Self::IndexesSwap),
INDEXES_COMPACT => Some(Self::IndexesCompact),
TASKS_ALL => Some(Self::TasksAll),
TASKS_CANCEL => Some(Self::TasksCancel),
TASKS_DELETE => Some(Self::TasksDelete),
@@ -462,6 +466,7 @@ impl Action {
IndexesUpdate => false,
IndexesDelete => false,
IndexesSwap => false,
IndexesCompact => false,
TasksCancel => false,
TasksDelete => false,
TasksGet => true,
@@ -513,6 +518,7 @@ pub mod actions {
pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr();
pub const INDEXES_DELETE: u8 = IndexesDelete.repr();
pub const INDEXES_SWAP: u8 = IndexesSwap.repr();
pub const INDEXES_COMPACT: u8 = IndexesCompact.repr();
pub const TASKS_ALL: u8 = TasksAll.repr();
pub const TASKS_CANCEL: u8 = TasksCancel.repr();
pub const TASKS_DELETE: u8 = TasksDelete.repr();
@@ -614,6 +620,7 @@ pub(crate) mod test {
assert!(WebhooksDelete.repr() == 47 && WEBHOOKS_DELETE == 47);
assert!(WebhooksCreate.repr() == 48 && WEBHOOKS_CREATE == 48);
assert!(WebhooksAll.repr() == 49 && WEBHOOKS_ALL == 49);
assert!(IndexesCompact.repr() == 50 && INDEXES_COMPACT == 50);
}
#[test]

View File

@@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]
pub mod batch_view;
pub mod batches;
pub mod compression;

View File

@@ -346,24 +346,26 @@ impl<T> Settings<T> {
continue;
};
Self::hide_secret(api_key);
hide_secret(api_key, 0);
}
}
}
fn hide_secret(secret: &mut String) {
match secret.len() {
x if x < 10 => {
secret.replace_range(.., "XXX...");
}
x if x < 20 => {
secret.replace_range(2.., "XXXX...");
}
x if x < 30 => {
secret.replace_range(3.., "XXXXX...");
}
_x => {
secret.replace_range(5.., "XXXXXX...");
}
/// Redact a secret string, starting from the `secret_offset`th byte.
pub fn hide_secret(secret: &mut String, secret_offset: usize) {
match secret.len().checked_sub(secret_offset) {
None => (),
Some(x) if x < 10 => {
secret.replace_range(secret_offset.., "XXX...");
}
Some(x) if x < 20 => {
secret.replace_range((secret_offset + 2).., "XXXX...");
}
Some(x) if x < 30 => {
secret.replace_range((secret_offset + 3).., "XXXXX...");
}
Some(_x) => {
secret.replace_range((secret_offset + 5).., "XXXXXX...");
}
}
}

View File

@@ -9,9 +9,9 @@ use utoipa::ToSchema;
use crate::batches::BatchId;
use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::enterprise_edition::network::DbTaskNetwork;
use crate::tasks::{
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
TaskNetwork,
};
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
@@ -54,7 +54,10 @@ pub struct TaskView {
pub finished_at: Option<OffsetDateTime>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
pub network: Option<DbTaskNetwork>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub custom_metadata: Option<String>,
}
impl TaskView {
@@ -73,6 +76,7 @@ impl TaskView {
started_at: task.started_at,
finished_at: task.finished_at,
network: task.network.clone(),
custom_metadata: task.custom_metadata.clone(),
}
}
}
@@ -142,6 +146,16 @@ pub struct DetailsView {
pub old_index_uid: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub new_index_uid: Option<String>,
// index compaction
#[serde(skip_serializing_if = "Option::is_none")]
pub pre_compaction_size: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub post_compaction_size: Option<String>,
// network topology change
#[serde(skip_serializing_if = "Option::is_none")]
pub moved_documents: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
}
impl DetailsView {
@@ -152,6 +166,17 @@ impl DetailsView {
(None, Some(doc)) | (Some(doc), None) => Some(doc),
(Some(left), Some(right)) => Some(left + right),
},
moved_documents: match (self.moved_documents, other.moved_documents) {
(None, None) => None,
(None, Some(doc)) | (Some(doc), None) => Some(doc),
(Some(left), Some(right)) => Some(left + right),
},
message: match (&mut self.message, &other.message) {
(None, None) => None,
(None, Some(message)) => Some(message.clone()),
(Some(message), None) => Some(std::mem::take(message)),
(Some(message), Some(_)) => Some(std::mem::take(message)),
},
indexed_documents: match (self.indexed_documents, other.indexed_documents) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
@@ -314,6 +339,24 @@ impl DetailsView {
// We should never be able to batch multiple renames at the same time.
(Some(left), Some(_right)) => Some(left),
},
pre_compaction_size: match (
self.pre_compaction_size.clone(),
other.pre_compaction_size.clone(),
) {
(None, None) => None,
(None, Some(size)) | (Some(size), None) => Some(size),
// We should never be able to batch multiple compactions at the same time.
(Some(left), Some(_right)) => Some(left),
},
post_compaction_size: match (
self.post_compaction_size.clone(),
other.post_compaction_size.clone(),
) {
(None, None) => None,
(None, Some(size)) | (Some(size), None) => Some(size),
// We should never be able to batch multiple compactions at the same time.
(Some(left), Some(_right)) => Some(left),
},
}
}
}
@@ -415,6 +458,23 @@ impl From<Details> for DetailsView {
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
..Default::default()
},
Details::IndexCompaction { pre_compaction_size, post_compaction_size, .. } => {
DetailsView {
pre_compaction_size: pre_compaction_size
.map(|size| size.get_appropriate_unit(UnitType::Both).to_string()),
post_compaction_size: post_compaction_size
.map(|size| size.get_appropriate_unit(UnitType::Both).to_string()),
..Default::default()
}
}
Details::NetworkTopologyChange { moved_documents, received_documents, message } => {
DetailsView {
moved_documents: Some(moved_documents),
received_documents: Some(received_documents),
message: Some(message),
..Default::default()
}
}
}
}
}

View File

@@ -0,0 +1,6 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
pub mod network;

View File

@@ -0,0 +1,564 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::collections::BTreeMap;
use milli::DocumentId;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use uuid::Uuid;
use crate::enterprise_edition::network::{Network, Remote};
use crate::error::ResponseError;
use crate::tasks::{Details, TaskId};
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(untagged, rename_all = "camelCase")]
// This type is used in the database, care should be taken when modifying it.
pub enum DbTaskNetwork {
/// Tasks that were duplicated from `origin`
Origin { origin: Origin },
/// Tasks that were duplicated as `remote_tasks`
Remotes {
remote_tasks: BTreeMap<String, RemoteTask>,
#[serde(default)]
network_version: Uuid,
},
/// Document import tasks sent in the context of `network_change`
Import { import_from: ImportData, network_change: Origin },
}
impl DbTaskNetwork {
pub fn network_version(&self) -> Uuid {
match self {
DbTaskNetwork::Origin { origin } => origin.network_version,
DbTaskNetwork::Remotes { remote_tasks: _, network_version } => *network_version,
DbTaskNetwork::Import { import_from: _, network_change } => {
network_change.network_version
}
}
}
pub fn import_data(&self) -> Option<&ImportData> {
match self {
DbTaskNetwork::Origin { .. } | DbTaskNetwork::Remotes { .. } => None,
DbTaskNetwork::Import { import_from, .. } => Some(import_from),
}
}
pub fn origin(&self) -> Option<&Origin> {
match self {
DbTaskNetwork::Origin { origin } => Some(origin),
DbTaskNetwork::Remotes { .. } => None,
DbTaskNetwork::Import { network_change, .. } => Some(network_change),
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum TaskNetwork {
/// Tasks that were duplicated from `origin`
Origin { origin: Origin },
/// Tasks that were duplicated as `remote_tasks`
Remotes { remote_tasks: BTreeMap<String, RemoteTask>, network_version: Uuid },
/// Document import tasks sent in the context of `network_change`
Import { import_from: ImportData, network_change: Origin, metadata: ImportMetadata },
}
impl From<TaskNetwork> for DbTaskNetwork {
fn from(value: TaskNetwork) -> Self {
match value {
TaskNetwork::Origin { origin } => DbTaskNetwork::Origin { origin },
TaskNetwork::Remotes { remote_tasks, network_version } => {
DbTaskNetwork::Remotes { remote_tasks, network_version }
}
TaskNetwork::Import { import_from, network_change, metadata: _ } => {
DbTaskNetwork::Import { import_from, network_change }
}
}
}
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct Origin {
pub remote_name: String,
pub task_uid: u32,
pub network_version: Uuid,
}
/// Import data stored in a task
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct ImportData {
pub remote_name: String,
pub index_name: String,
pub document_count: u64,
}
/// Import metadata associated with a task but not stored in the task
#[derive(Debug, PartialEq, Clone)]
pub struct ImportMetadata {
/// Total number of indexes to import from this host
pub index_count: u64,
/// Key unique to this (network_change, index, host, key).
///
/// In practice, an internal document id of one of the documents to import.
pub task_key: DocumentId,
/// Total number of documents to import for this index from this host.
pub total_index_documents: u64,
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct RemoteTask {
#[serde(skip_serializing_if = "Option::is_none")]
task_uid: Option<TaskId>,
error: Option<ResponseError>,
}
impl From<Result<TaskId, ResponseError>> for RemoteTask {
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
match res {
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
}
}
}
/// Contains the full state of a network topology change.
///
/// A network topology change task is unique in that it can be processed in multiple different batches, as its resolution
/// depends on various document additions tasks being processed.
///
/// A network topology task has 4 states:
///
/// 1. Processing any task that was meant for an earlier version of the network. This is necessary to know that we have the right version of
/// documents.
/// 2. Sending all documents that must be moved to other remotes.
/// 3. Processing any task coming from the remotes.
/// 4. Finished.
///
/// Furthermore, it maintains some stats
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NetworkTopologyChange {
state: NetworkTopologyState,
// in name, `None` if the node is no longer part of the network
#[serde(skip_serializing_if = "Option::is_none")]
in_name: Option<String>,
// out name, `None` if the node is new to the network
#[serde(skip_serializing_if = "Option::is_none")]
out_name: Option<String>,
out_remotes: BTreeMap<String, Remote>,
in_remotes: BTreeMap<String, InRemote>,
stats: NetworkTopologyStats,
}
impl NetworkTopologyChange {
pub fn new(old_network: Network, new_network: Network) -> Self {
// we use our old name as export name
let out_name = old_network.local;
// we use our new name as import name
let in_name = new_network.local;
// we export to the new network
let mut out_remotes = new_network.remotes;
// don't export to ourselves
if let Some(in_name) = &in_name {
out_remotes.remove(in_name);
}
/// FIXME: doesn't work if the old network is not the same for old nodes
let in_remotes = old_network
.remotes
.into_keys()
// don't await imports from ourselves
.filter(|name| Some(name.as_str()) != out_name.as_deref())
.map(|name| (name, InRemote::new()))
.collect();
Self {
state: NetworkTopologyState::WaitingForOlderTasks,
in_name,
out_name,
out_remotes,
in_remotes,
stats: NetworkTopologyStats { received_documents: 0, moved_documents: 0 },
}
}
pub fn state(&self) -> NetworkTopologyState {
self.state
}
pub fn out_name(&self) -> Option<&str> {
// unwrap: one of out name or in_name must be defined
self.out_name.as_deref()
}
pub fn in_name(&self) -> Option<&str> {
self.in_name.as_deref()
}
pub fn export_to_process(&self) -> Option<(&BTreeMap<String, Remote>, &str)> {
if self.state != NetworkTopologyState::ExportingDocuments {
return None;
}
if self.out_remotes.is_empty() {
return None;
}
let out_name = self.out_name()?;
Some((&self.out_remotes, out_name))
}
/// Compute the next state from the current state of the task.
pub fn update_state(&mut self) {
self.state = match self.state {
NetworkTopologyState::WaitingForOlderTasks => {
// no more older tasks, so finished waiting
NetworkTopologyState::ExportingDocuments
}
NetworkTopologyState::ExportingDocuments => {
// processed all exported documents
NetworkTopologyState::ImportingDocuments
}
NetworkTopologyState::ImportingDocuments => {
if self.is_import_finished() {
NetworkTopologyState::Finished
} else {
NetworkTopologyState::ImportingDocuments
}
}
NetworkTopologyState::Finished => NetworkTopologyState::Finished,
};
}
pub fn receive_remote_task(
&mut self,
remote_name: &str,
index_name: &str,
task_key: DocumentId,
document_count: u64,
total_indexes: u64,
total_index_documents: u64,
) -> Result<(), ReceiveTaskError> {
let remote = self
.in_remotes
.get_mut(remote_name)
.ok_or_else(|| ReceiveTaskError::UnknownRemote(remote_name.to_string()))?;
remote.import_state = match std::mem::take(&mut remote.import_state) {
ImportState::WaitingForInitialTask => {
let mut import_index_state = BTreeMap::new();
import_index_state.insert(
index_name.to_owned(),
ImportIndexState::Ongoing {
total_documents: total_index_documents,
received_documents: document_count,
task_keys: vec![task_key],
processed_documents: 0,
},
);
ImportState::Ongoing { import_index_state, total_indexes }
}
ImportState::Ongoing { mut import_index_state, total_indexes } => {
if let Some((index_name, mut index_state)) =
import_index_state.remove_entry(index_name)
{
index_state = match index_state {
ImportIndexState::Ongoing {
total_documents,
received_documents: previously_received,
processed_documents,
mut task_keys,
} => {
if task_keys.contains(&task_key) {
return Err(ReceiveTaskError::DuplicateTask(task_key));
}
task_keys.push(task_key);
ImportIndexState::Ongoing {
total_documents,
received_documents: previously_received + document_count,
processed_documents,
task_keys,
}
}
ImportIndexState::Finished { total_documents } => {
ImportIndexState::Finished { total_documents }
}
};
import_index_state.insert(index_name, index_state);
} else {
let state = ImportIndexState::Ongoing {
total_documents: total_index_documents,
received_documents: document_count,
processed_documents: 0,
task_keys: vec![task_key],
};
import_index_state.insert(index_name.to_string(), state);
}
ImportState::Ongoing { import_index_state, total_indexes: total_indexes }
}
ImportState::Finished { total_indexes, total_documents } => {
ImportState::Finished { total_indexes, total_documents }
}
};
Ok(())
}
pub fn process_remote_tasks(
&mut self,
remote_name: &str,
index_name: &str,
document_count: u64,
) {
/// FIXME: unwraps and panics
let remote = self.in_remotes.get_mut(remote_name).unwrap();
remote.import_state = match std::mem::take(&mut remote.import_state) {
ImportState::WaitingForInitialTask => panic!("no task received yet one processed"),
ImportState::Ongoing { mut import_index_state, total_indexes } => {
let (index_name, mut index_state) =
import_index_state.remove_entry(index_name).unwrap();
index_state = match index_state {
ImportIndexState::Ongoing {
total_documents,
received_documents,
processed_documents: previously_processed,
task_keys,
} => {
let newly_processed_documents = previously_processed + document_count;
if newly_processed_documents >= total_documents {
ImportIndexState::Finished { total_documents }
} else {
ImportIndexState::Ongoing {
total_documents,
received_documents,
processed_documents: newly_processed_documents,
task_keys,
}
}
}
ImportIndexState::Finished { total_documents } => {
ImportIndexState::Finished { total_documents }
}
};
import_index_state.insert(index_name, index_state);
if import_index_state.len() as u64 == total_indexes
&& import_index_state.values().all(|index| index.is_finished())
{
let total_documents =
import_index_state.values().map(|index| index.total_documents()).sum();
ImportState::Finished { total_indexes, total_documents }
} else {
ImportState::Ongoing { import_index_state, total_indexes }
}
}
ImportState::Finished { total_indexes, total_documents } => {
ImportState::Finished { total_indexes, total_documents }
}
}
}
pub fn to_details(&self) -> Details {
let message = match self.state {
NetworkTopologyState::WaitingForOlderTasks => {
"Waiting for tasks enqueued before the network change to finish processing".into()
}
NetworkTopologyState::ExportingDocuments => "Exporting documents".into(),
NetworkTopologyState::ImportingDocuments => {
let mut finished_count = 0;
let mut first_ongoing = None;
let mut ongoing_total_indexes = 0;
let mut ongoing_processed_documents = 0;
let mut ongoing_missing_documents = 0;
let mut ongoing_total_documents = 0;
let mut other_ongoing_count = 0;
let mut first_waiting = None;
let mut other_waiting_count = 0;
for (remote_name, in_remote) in &self.in_remotes {
match &in_remote.import_state {
ImportState::WaitingForInitialTask => {
first_waiting = match first_waiting {
None => Some(remote_name),
first_waiting => {
other_waiting_count += 1;
first_waiting
}
};
}
ImportState::Ongoing { import_index_state, total_indexes } => {
first_ongoing = match first_ongoing {
None => {
ongoing_total_indexes = *total_indexes;
Some(remote_name)
}
first_ongoing => {
other_ongoing_count += 1;
first_ongoing
}
};
for import_state in import_index_state.values() {
match import_state {
ImportIndexState::Ongoing {
total_documents,
processed_documents,
received_documents,
task_keys: _,
} => {
ongoing_total_documents += total_documents;
ongoing_processed_documents += processed_documents;
ongoing_missing_documents +=
total_documents.saturating_sub(*received_documents);
}
ImportIndexState::Finished { total_documents } => {
ongoing_total_documents += total_documents;
ongoing_processed_documents += total_documents;
}
}
}
}
ImportState::Finished { total_indexes, total_documents } => {
finished_count += 1;
ongoing_total_indexes = *total_indexes;
ongoing_total_documents += *total_documents;
ongoing_processed_documents += *total_documents;
}
}
}
format!(
"Importing documents from {total} remotes{waiting}{ongoing}{finished}",
total = self.in_remotes.len(),
waiting = if let Some(first_waiting) = first_waiting {
&format!(
", waiting on first task from `{}`{others}",
first_waiting,
others = if other_waiting_count > 0 {
&format!(" and {other_waiting_count} other remotes")
} else {
""
}
)
} else {
""
},
ongoing = if let Some(first_ongoing) = first_ongoing {
&format!(", awaiting {ongoing_missing_documents} and processed {ongoing_processed_documents} out of {ongoing_total_documents} documents in {ongoing_total_indexes} indexes from `{first_ongoing}`{others}",
others=if other_ongoing_count > 0 {&format!(" and {other_ongoing_count} other remotes")} else {""})
} else {
""
},
finished = if finished_count >= 0 {
&format!(", {finished_count} remotes finished processing")
} else {
""
}
)
}
NetworkTopologyState::Finished => "Finished".into(),
};
Details::NetworkTopologyChange {
moved_documents: self.stats.moved_documents,
received_documents: self.stats.received_documents,
message,
}
}
fn is_import_finished(&self) -> bool {
self.in_remotes.values().all(|remote| remote.is_finished())
}
}
pub enum ReceiveTaskError {
UnknownRemote(String),
DuplicateTask(DocumentId),
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum NetworkTopologyState {
WaitingForOlderTasks,
ExportingDocuments,
ImportingDocuments,
Finished,
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NetworkTopologyStats {
pub received_documents: u64,
pub moved_documents: u64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct InRemote {
import_state: ImportState,
}
impl InRemote {
pub fn new() -> Self {
Self { import_state: ImportState::WaitingForInitialTask }
}
pub fn is_finished(&self) -> bool {
matches!(self.import_state, ImportState::Finished { .. })
}
}
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
enum ImportState {
/// Initially Meilisearch doesn't know how many documents it should expect from a remote.
/// The first task for each remote contains the information of how many indexes will be imported,
/// and the first task for each index contains the number of documents to import for that index.
#[default]
WaitingForInitialTask,
Ongoing {
import_index_state: BTreeMap<String, ImportIndexState>,
total_indexes: u64,
},
Finished {
total_indexes: u64,
total_documents: u64,
},
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
enum ImportIndexState {
Ongoing {
total_documents: u64,
received_documents: u64,
processed_documents: u64,
task_keys: Vec<DocumentId>,
},
Finished {
total_documents: u64,
},
}
impl ImportIndexState {
pub fn is_finished(&self) -> bool {
matches!(self, ImportIndexState::Finished { .. })
}
fn total_documents(&self) -> u64 {
match *self {
ImportIndexState::Ongoing { total_documents, .. }
| ImportIndexState::Finished { total_documents } => total_documents,
}
}
}
pub mod headers {
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote";
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid";
pub const PROXY_ORIGIN_NETWORK_VERSION_HEADER: &str = "Meili-Proxy-Origin-Network-Version";
pub const PROXY_IMPORT_REMOTE_HEADER: &str = "Meili-Proxy-Import-Remote";
pub const PROXY_IMPORT_INDEX_COUNT_HEADER: &str = "Meili-Proxy-Import-Index-Count";
pub const PROXY_IMPORT_INDEX_HEADER: &str = "Meili-Proxy-Import-Index";
pub const PROXY_IMPORT_TASK_KEY_HEADER: &str = "Meili-Proxy-Import-Task-Key";
pub const PROXY_IMPORT_DOCS_HEADER: &str = "Meili-Proxy-Import-Docs";
pub const PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER: &str = "Meili-Proxy-Import-Total-Index-Docs";
}

View File

@@ -23,6 +23,8 @@ use crate::{versioning, InstanceUid};
pub type TaskId = u32;
pub mod enterprise_edition;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Task {
@@ -44,7 +46,10 @@ pub struct Task {
pub kind: KindWithContent,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
pub network: Option<enterprise_edition::network::DbTaskNetwork>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub custom_metadata: Option<String>,
}
impl Task {
@@ -58,6 +63,7 @@ impl Task {
| TaskDeletion { .. }
| Export { .. }
| UpgradeDatabase { .. }
| NetworkTopologyChange { .. }
| IndexSwap { .. } => None,
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
@@ -67,7 +73,8 @@ impl Task {
| SettingsUpdate { index_uid, .. }
| IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid } => Some(index_uid),
| IndexDeletion { index_uid }
| IndexCompaction { index_uid } => Some(index_uid),
}
}
@@ -94,7 +101,9 @@ impl Task {
| KindWithContent::DumpCreation { .. }
| KindWithContent::SnapshotCreation
| KindWithContent::Export { .. }
| KindWithContent::UpgradeDatabase { .. } => None,
| KindWithContent::UpgradeDatabase { .. }
| KindWithContent::NetworkTopologyChange { .. }
| KindWithContent::IndexCompaction { .. } => None,
}
}
}
@@ -170,6 +179,10 @@ pub enum KindWithContent {
UpgradeDatabase {
from: (u32, u32, u32),
},
IndexCompaction {
index_uid: String,
},
NetworkTopologyChange(enterprise_edition::network::NetworkTopologyChange),
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
@@ -206,6 +219,8 @@ impl KindWithContent {
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
KindWithContent::Export { .. } => Kind::Export,
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
KindWithContent::IndexCompaction { .. } => Kind::IndexCompaction,
KindWithContent::NetworkTopologyChange { .. } => Kind::NetworkTopologyChange,
}
}
@@ -218,6 +233,7 @@ impl KindWithContent {
| TaskCancelation { .. }
| TaskDeletion { .. }
| Export { .. }
| NetworkTopologyChange { .. }
| UpgradeDatabase { .. } => vec![],
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
@@ -226,7 +242,8 @@ impl KindWithContent {
| DocumentClear { index_uid }
| SettingsUpdate { index_uid, .. }
| IndexCreation { index_uid, .. }
| IndexDeletion { index_uid } => vec![index_uid],
| IndexDeletion { index_uid }
| IndexCompaction { index_uid } => vec![index_uid],
IndexUpdate { index_uid, new_index_uid, .. } => {
let mut indexes = vec![index_uid.as_str()];
if let Some(new_uid) = new_index_uid {
@@ -325,6 +342,16 @@ impl KindWithContent {
versioning::VERSION_PATCH,
),
}),
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
index_uid: index_uid.clone(),
pre_compaction_size: None,
post_compaction_size: None,
}),
KindWithContent::NetworkTopologyChange { .. } => Some(Details::NetworkTopologyChange {
moved_documents: 0,
received_documents: 0,
message: "processing tasks for previous network versions".into(),
}),
}
}
@@ -377,7 +404,7 @@ impl KindWithContent {
})
}
KindWithContent::IndexSwap { .. } => {
todo!()
unimplemented!("do not call `default_finished_details` for `IndexSwap` tasks")
}
KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation {
matched_tasks: tasks.len(),
@@ -407,6 +434,14 @@ impl KindWithContent {
versioning::VERSION_PATCH,
),
}),
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
index_uid: index_uid.clone(),
pre_compaction_size: None,
post_compaction_size: None,
}),
KindWithContent::NetworkTopologyChange(network_topology_change) => {
Some(network_topology_change.to_details())
}
}
}
}
@@ -469,6 +504,14 @@ impl From<&KindWithContent> for Option<Details> {
versioning::VERSION_PATCH,
),
}),
KindWithContent::IndexCompaction { index_uid } => Some(Details::IndexCompaction {
index_uid: index_uid.clone(),
pre_compaction_size: None,
post_compaction_size: None,
}),
KindWithContent::NetworkTopologyChange(network_topology_change) => {
Some(network_topology_change.to_details())
}
}
}
}
@@ -579,6 +622,8 @@ pub enum Kind {
SnapshotCreation,
Export,
UpgradeDatabase,
IndexCompaction,
NetworkTopologyChange,
}
impl Kind {
@@ -590,13 +635,15 @@ impl Kind {
| Kind::SettingsUpdate
| Kind::IndexCreation
| Kind::IndexDeletion
| Kind::IndexUpdate => true,
| Kind::IndexUpdate
| Kind::IndexCompaction => true,
Kind::IndexSwap
| Kind::TaskCancelation
| Kind::TaskDeletion
| Kind::DumpCreation
| Kind::Export
| Kind::UpgradeDatabase
| Kind::NetworkTopologyChange
| Kind::SnapshotCreation => false,
}
}
@@ -618,6 +665,8 @@ impl Display for Kind {
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
Kind::Export => write!(f, "export"),
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
Kind::IndexCompaction => write!(f, "indexCompaction"),
Kind::NetworkTopologyChange => write!(f, "networkTopologyChange"),
}
}
}
@@ -653,6 +702,10 @@ impl FromStr for Kind {
Ok(Kind::Export)
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
Ok(Kind::UpgradeDatabase)
} else if kind.eq_ignore_ascii_case("indexCompaction") {
Ok(Kind::IndexCompaction)
} else if kind.eq_ignore_ascii_case("networkTopologyChange") {
Ok(Kind::NetworkTopologyChange)
} else {
Err(ParseTaskKindError(kind.to_owned()))
}
@@ -738,36 +791,16 @@ pub enum Details {
from: (u32, u32, u32),
to: (u32, u32, u32),
},
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(untagged, rename_all = "camelCase")]
pub enum TaskNetwork {
Origin { origin: Origin },
Remotes { remote_tasks: BTreeMap<String, RemoteTask> },
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct Origin {
pub remote_name: String,
pub task_uid: usize,
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct RemoteTask {
#[serde(skip_serializing_if = "Option::is_none")]
task_uid: Option<TaskId>,
error: Option<ResponseError>,
}
impl From<Result<TaskId, ResponseError>> for RemoteTask {
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
match res {
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
}
}
IndexCompaction {
index_uid: String,
pre_compaction_size: Option<Byte>,
post_compaction_size: Option<Byte>,
},
NetworkTopologyChange {
moved_documents: u64,
received_documents: u64,
message: String,
},
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
@@ -800,12 +833,19 @@ impl Details {
Self::ClearAll { deleted_documents } => *deleted_documents = Some(0),
Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0),
Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0),
Self::IndexCompaction { pre_compaction_size, post_compaction_size, .. } => {
*pre_compaction_size = None;
*post_compaction_size = None;
}
Self::SettingsUpdate { .. }
| Self::IndexInfo { .. }
| Self::Dump { .. }
| Self::Export { .. }
| Self::UpgradeDatabase { .. }
| Self::IndexSwap { .. } => (),
Self::NetworkTopologyChange { moved_documents: _, received_documents: _, message } => {
*message = format!("Failed. Previous status: {}", message);
}
}
details

View File

@@ -11,6 +11,24 @@ pub struct Webhook {
pub headers: BTreeMap<String, String>,
}
impl Webhook {
pub fn redact_authorization_header(&mut self) {
// headers are case insensitive, so to make the redaction robust we iterate over qualifying headers
// rather than getting one canonical `Authorization` header.
for value in self
.headers
.iter_mut()
.filter_map(|(name, value)| name.eq_ignore_ascii_case("authorization").then_some(value))
{
if value.starts_with("Bearer ") {
crate::settings::hide_secret(value, "Bearer ".len());
} else {
crate::settings::hide_secret(value, 0);
}
}
}
}
#[derive(Debug, Serialize, Default, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct WebhooksView {

View File

@@ -91,7 +91,7 @@ time = { version = "0.3.41", features = [
] }
tokio = { version = "1.45.1", features = ["full"] }
toml = "0.8.23"
uuid = { version = "1.17.0", features = ["serde", "v4"] }
uuid = { version = "1.18.0", features = ["serde", "v4", "v7"] }
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.4", features = ["serde"] }

View File

@@ -205,7 +205,10 @@ struct Infos {
experimental_no_snapshot_compaction: bool,
experimental_no_edition_2024_for_dumps: bool,
experimental_no_edition_2024_for_settings: bool,
experimental_no_edition_2024_for_prefix_post_processing: bool,
experimental_no_edition_2024_for_facet_post_processing: bool,
experimental_vector_store_setting: bool,
experimental_personalization: bool,
gpu_enabled: bool,
db_path: bool,
import_dump: bool,
@@ -215,6 +218,7 @@ struct Infos {
import_snapshot: bool,
schedule_snapshot: Option<u64>,
snapshot_dir: bool,
uses_s3_snapshots: bool,
ignore_missing_snapshot: bool,
ignore_snapshot_if_db_exists: bool,
http_addr: bool,
@@ -283,6 +287,8 @@ impl Infos {
indexer_options,
config_file_path,
no_analytics: _,
experimental_personalization_api_key,
s3_snapshot_options,
} = options;
let schedule_snapshot = match schedule_snapshot {
@@ -296,6 +302,8 @@ impl Infos {
skip_index_budget: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing,
} = indexer_options;
let RuntimeTogglableFeatures {
@@ -344,6 +352,7 @@ impl Infos {
import_snapshot: import_snapshot.is_some(),
schedule_snapshot,
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
uses_s3_snapshots: s3_snapshot_options.is_some(),
ignore_missing_snapshot,
ignore_snapshot_if_db_exists,
http_addr: http_addr != default_http_addr(),
@@ -365,6 +374,9 @@ impl Infos {
ssl_resumption,
ssl_tickets,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing,
experimental_personalization: experimental_personalization_api_key.is_some(),
}
}
}

View File

@@ -6,12 +6,17 @@ use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
use meilisearch_types::milli;
use meilisearch_types::milli::OrderBy;
use meilisearch_types::tasks::enterprise_edition::network::headers::{
PROXY_IMPORT_DOCS_HEADER, PROXY_IMPORT_INDEX_COUNT_HEADER, PROXY_IMPORT_INDEX_HEADER,
PROXY_IMPORT_REMOTE_HEADER, PROXY_IMPORT_TASK_KEY_HEADER, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
};
use serde_json::Value;
use tokio::task::JoinError;
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
#[derive(Debug, thiserror::Error)]
#[allow(clippy::large_enum_variant)]
pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
@@ -37,6 +42,8 @@ pub enum MeilisearchHttpError {
PaginationInFederatedQuery(usize, &'static str),
#[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
FacetsInFederatedQuery(usize, String, Vec<String>),
#[error("Inside `.queries[{0}]`: Using `.personalize` is not allowed in federated queries.\n - Hint: remove `personalize` from query #{0} or remove `federation` from the request")]
PersonalizationInFederatedQuery(usize),
#[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
InconsistentFacetOrder {
facet: String,
@@ -90,8 +97,49 @@ pub enum MeilisearchHttpError {
} else { PROXY_ORIGIN_TASK_UID_HEADER }
)]
InconsistentOriginHeaders { is_remote_missing: bool },
#[error("Inconsistent `Import` headers: {remote}: {remote_status}, {index}: {index_status}, {docs}: {docs_status}.\n - Hint: either all three headers should be provided, or none of them",
remote = PROXY_IMPORT_REMOTE_HEADER,
remote_status = if *is_remote_missing { "missing" } else{ "provided" },
index = PROXY_IMPORT_INDEX_HEADER,
index_status = if *is_index_missing { "missing" } else { "provided" },
docs = PROXY_IMPORT_DOCS_HEADER,
docs_status = if *is_docs_missing { "missing" } else { "provided" }
)]
InconsistentImportHeaders {
is_remote_missing: bool,
is_index_missing: bool,
is_docs_missing: bool,
},
#[error("Inconsistent `Import-Metadata` headers: {index_count}: {index_count_status}, {task_key}: {task_key_status}, {total_index_documents}: {total_index_documents_status}.\n - Hint: either all three headers should be provided, or none of them",
index_count = PROXY_IMPORT_INDEX_COUNT_HEADER,
index_count_status = if *is_index_count_missing { "missing" } else { "provided"},
task_key = PROXY_IMPORT_TASK_KEY_HEADER,
task_key_status = if *is_task_key_missing { "missing" } else { "provided"},
total_index_documents = PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
total_index_documents_status = if *is_total_index_documents_missing { "missing" } else { "provided"},
)]
InconsistentImportMetadataHeaders {
is_index_count_missing: bool,
is_task_key_missing: bool,
is_total_index_documents_missing: bool,
},
#[error(
"Inconsistent task network headers: origin headers: {origin_status}, import headers: {import_status}, import metadata: {import_metadata_status}",
origin_status = if *is_missing_origin { "missing"} else { "present" },
import_status = if *is_missing_import { "missing"} else { "present" },
import_metadata_status = if *is_missing_import_metadata { "missing"} else { "present" })]
InconsistentTaskNetworkHeaders {
is_missing_origin: bool,
is_missing_import: bool,
is_missing_import_metadata: bool,
},
#[error("Invalid value for header {header_name}: {msg}")]
InvalidHeaderValue { header_name: &'static str, msg: String },
#[error("This remote is not the leader of the network.\n - Note: only the leader `{leader}` can receive new tasks.")]
NotLeader { leader: String },
#[error("Unexpected `previousRemotes` in network call.\n - Note: `previousRemote` is reserved for internal use.")]
UnexpectedNetworkPreviousRemotes,
}
impl MeilisearchHttpError {
@@ -136,10 +184,20 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
Code::InvalidMultiSearchFacetOrder
}
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
MeilisearchHttpError::PersonalizationInFederatedQuery(_) => {
Code::InvalidMultiSearchQueryPersonalization
}
MeilisearchHttpError::InconsistentOriginHeaders { .. }
| MeilisearchHttpError::InconsistentImportHeaders { .. }
| MeilisearchHttpError::InconsistentImportMetadataHeaders { .. }
| MeilisearchHttpError::InconsistentTaskNetworkHeaders { .. } => {
Code::InconsistentDocumentChangeHeaders
}
MeilisearchHttpError::InvalidHeaderValue { .. } => Code::InvalidHeaderValue,
MeilisearchHttpError::NotLeader { .. } => Code::NotLeader,
MeilisearchHttpError::UnexpectedNetworkPreviousRemotes => {
Code::UnexpectedNetworkPreviousRemotes
}
}
}
}

View File

@@ -1,4 +1,6 @@
#![allow(clippy::result_large_err)]
#![allow(rustdoc::private_intra_doc_links)]
#[macro_use]
pub mod error;
pub mod analytics;
@@ -9,6 +11,7 @@ pub mod middleware;
pub mod option;
#[cfg(test)]
mod option_test;
pub mod personalization;
pub mod routes;
pub mod search;
pub mod search_queue;
@@ -56,6 +59,7 @@ use tracing::{error, info_span};
use tracing_subscriber::filter::Targets;
use crate::error::MeilisearchHttpError;
use crate::personalization::PersonalizationService;
/// Default number of simultaneously opened indexes.
///
@@ -126,12 +130,8 @@ pub type LogStderrType = tracing_subscriber::filter::Filtered<
>;
pub fn create_app(
index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>,
search_queue: Data<SearchQueue>,
services: ServicesData,
opt: Opt,
logs: (LogRouteHandle, LogStderrHandle),
analytics: Data<Analytics>,
enable_dashboard: bool,
) -> actix_web::App<
impl ServiceFactory<
@@ -143,17 +143,7 @@ pub fn create_app(
>,
> {
let app = actix_web::App::new()
.configure(|s| {
configure_data(
s,
index_scheduler.clone(),
auth_controller.clone(),
search_queue.clone(),
&opt,
logs,
analytics.clone(),
)
})
.configure(|s| configure_data(s, services, &opt))
.configure(routes::configure)
.configure(|s| dashboard(s, enable_dashboard));
@@ -214,7 +204,10 @@ enum OnFailure {
KeepDb,
}
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
pub fn setup_meilisearch(
opt: &Opt,
handle: tokio::runtime::Handle,
) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
let index_scheduler_opt = IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
@@ -228,7 +221,11 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
task_db_size: opt.max_task_db_size.as_u64() as usize,
index_base_map_size: opt.max_index_size.as_u64() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: Arc::new((&opt.indexer_options).try_into()?),
indexer_config: Arc::new({
let s3_snapshot_options =
opt.s3_snapshot_options.clone().map(|opt| opt.try_into()).transpose()?;
IndexerConfig { s3_snapshot_options, ..(&opt.indexer_options).try_into()? }
}),
autobatching_enabled: true,
cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000,
@@ -254,6 +251,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
index_scheduler_opt,
OnFailure::RemoveDb,
binary_version, // the db is empty
handle,
)?,
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
@@ -271,7 +269,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
bail!("snapshot doesn't exist at {}", snapshot_path.display())
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
} else {
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
}
} else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists();
@@ -282,6 +280,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
index_scheduler_opt,
OnFailure::RemoveDb,
binary_version, // the db is empty
handle,
)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
@@ -302,10 +301,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
} else {
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
}
} else {
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
};
// We create a loop in a thread that registers snapshotCreation tasks
@@ -336,6 +335,7 @@ fn open_or_create_database_unchecked(
index_scheduler_opt: IndexSchedulerOptions,
on_failure: OnFailure,
version: (u32, u32, u32),
handle: tokio::runtime::Handle,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
@@ -343,7 +343,7 @@ fn open_or_create_database_unchecked(
let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version, Some(handle))?)
};
match (
@@ -450,6 +450,7 @@ fn open_or_create_database(
index_scheduler_opt: IndexSchedulerOptions,
empty_db: bool,
binary_version: (u32, u32, u32),
handle: tokio::runtime::Handle,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
let version = if !empty_db {
check_version(opt, &index_scheduler_opt, binary_version)?
@@ -457,7 +458,7 @@ fn open_or_create_database(
binary_version
};
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version)
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version, handle)
}
fn import_dump(
@@ -525,7 +526,11 @@ fn import_dump(
let indexer_config = if base_config.max_threads.is_none() {
let (thread_pool, _) = default_thread_pool_and_threads();
let _config = IndexerConfig { thread_pool, ..*base_config };
let _config = IndexerConfig {
thread_pool,
s3_snapshot_options: base_config.s3_snapshot_options.clone(),
..*base_config
};
backup_config = _config;
&backup_config
} else {
@@ -673,23 +678,26 @@ fn import_dump(
Ok(index_scheduler_dump.finish()?)
}
pub fn configure_data(
config: &mut web::ServiceConfig,
index_scheduler: Data<IndexScheduler>,
auth: Data<AuthController>,
search_queue: Data<SearchQueue>,
opt: &Opt,
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Data<Analytics>,
) {
pub fn configure_data(config: &mut web::ServiceConfig, services: ServicesData, opt: &Opt) {
let ServicesData {
index_scheduler,
auth,
search_queue,
personalization_service,
logs_route_handle,
logs_stderr_handle,
analytics,
} = services;
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
config
.app_data(index_scheduler)
.app_data(auth)
.app_data(search_queue)
.app_data(analytics)
.app_data(web::Data::new(logs_route))
.app_data(web::Data::new(logs_stderr))
.app_data(personalization_service)
.app_data(logs_route_handle)
.app_data(logs_stderr_handle)
.app_data(web::Data::new(opt.clone()))
.app_data(
web::JsonConfig::default()
@@ -750,3 +758,14 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
config.service(web::resource("/").route(web::get().to(routes::running)));
}
#[derive(Clone)]
pub struct ServicesData {
pub index_scheduler: Data<IndexScheduler>,
pub auth: Data<AuthController>,
pub search_queue: Data<SearchQueue>,
pub personalization_service: Data<PersonalizationService>,
pub logs_route_handle: Data<LogRouteHandle>,
pub logs_stderr_handle: Data<LogStderrHandle>,
pub analytics: Data<Analytics>,
}

View File

@@ -14,10 +14,11 @@ use index_scheduler::IndexScheduler;
use is_terminal::IsTerminal;
use meilisearch::analytics::Analytics;
use meilisearch::option::LogMode;
use meilisearch::personalization::PersonalizationService;
use meilisearch::search_queue::SearchQueue;
use meilisearch::{
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
LogStderrType, Opt, SubscriberForSecondLayer,
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
@@ -76,7 +77,10 @@ fn on_panic(info: &std::panic::PanicHookInfo) {
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
try_main().await.inspect_err(|error| {
// won't panic inside of tokio::main
let runtime = tokio::runtime::Handle::current();
try_main(runtime).await.inspect_err(|error| {
tracing::error!(%error);
let mut current = error.source();
let mut depth = 0;
@@ -88,7 +92,7 @@ async fn main() -> anyhow::Result<()> {
})
}
async fn try_main() -> anyhow::Result<()> {
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
std::panic::set_hook(Box::new(on_panic));
@@ -122,7 +126,7 @@ async fn try_main() -> anyhow::Result<()> {
_ => (),
}
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
let (index_scheduler, auth_controller) = setup_meilisearch(&opt, runtime)?;
let analytics =
analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;
@@ -149,8 +153,15 @@ async fn run_http(
let enable_dashboard = &opt.env == "development";
let opt_clone = opt.clone();
let index_scheduler = Data::from(index_scheduler);
let auth_controller = Data::from(auth_controller);
let auth = Data::from(auth_controller);
let analytics = Data::from(analytics);
// Create personalization service with API key from options
let personalization_service = Data::new(
opt.experimental_personalization_api_key
.clone()
.map(PersonalizationService::cohere)
.unwrap_or_else(PersonalizationService::disabled),
);
let search_queue = SearchQueue::new(
opt.experimental_search_queue_size,
available_parallelism()
@@ -162,21 +173,25 @@ async fn run_http(
usize::from(opt.experimental_drop_search_after) as u64
));
let search_queue = Data::new(search_queue);
let (logs_route_handle, logs_stderr_handle) = logs;
let logs_route_handle = Data::new(logs_route_handle);
let logs_stderr_handle = Data::new(logs_stderr_handle);
let http_server = HttpServer::new(move || {
create_app(
index_scheduler.clone(),
auth_controller.clone(),
search_queue.clone(),
opt.clone(),
logs.clone(),
analytics.clone(),
enable_dashboard,
)
})
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
.disable_signals()
.keep_alive(KeepAlive::Os);
let services = ServicesData {
index_scheduler,
auth,
search_queue,
personalization_service,
logs_route_handle,
logs_stderr_handle,
analytics,
};
let http_server =
HttpServer::new(move || create_app(services.clone(), opt.clone(), enable_dashboard))
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
.disable_signals()
.keep_alive(KeepAlive::Os);
if let Some(config) = opt_clone.get_ssl_config()? {
http_server.bind_rustls_0_23(opt_clone.http_addr, config)?.run().await?;

View File

@@ -114,4 +114,9 @@ lazy_static! {
"Meilisearch Task Queue Size Until Stop Registering",
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
"meilisearch_personalized_search_requests",
"Meilisearch number of search requests with personalization"
))
.expect("Can't create a metric");
}

View File

@@ -7,12 +7,13 @@ use std::ops::Deref;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use std::{env, fmt, fs};
use byte_unit::{Byte, ParseError, UnitType};
use clap::Parser;
use meilisearch_types::features::InstanceTogglableFeatures;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::update::{IndexerConfig, S3SnapshotOptions};
use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
use rustls::server::{ServerSessionMemoryCache, WebPkiClientVerifier};
use rustls::RootCertStore;
@@ -55,6 +56,10 @@ const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LO
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
@@ -70,6 +75,22 @@ const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
const MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY: &str =
"MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY";
// Related to S3 snapshots
const MEILI_S3_BUCKET_URL: &str = "MEILI_S3_BUCKET_URL";
const MEILI_S3_BUCKET_REGION: &str = "MEILI_S3_BUCKET_REGION";
const MEILI_S3_BUCKET_NAME: &str = "MEILI_S3_BUCKET_NAME";
const MEILI_S3_SNAPSHOT_PREFIX: &str = "MEILI_S3_SNAPSHOT_PREFIX";
const MEILI_S3_ACCESS_KEY: &str = "MEILI_S3_ACCESS_KEY";
const MEILI_S3_SECRET_KEY: &str = "MEILI_S3_SECRET_KEY";
const MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS: &str = "MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS";
const MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL: &str = "MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL";
const MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS: &str =
"MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS";
const MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE: &str = "MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
@@ -79,6 +100,10 @@ const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
const DEFAULT_SNAPSHOT_INTERVAL_SEC_STR: &str = "86400";
const DEFAULT_DUMP_DIR: &str = "dumps/";
const DEFAULT_S3_SNAPSHOT_MAX_IN_FLIGHT_PARTS: NonZeroUsize = NonZeroUsize::new(10).unwrap();
const DEFAULT_S3_SNAPSHOT_COMPRESSION_LEVEL: u32 = 0;
const DEFAULT_S3_SNAPSHOT_SIGNATURE_DURATION_SECONDS: u64 = 8 * 3600; // 8 hours
const DEFAULT_S3_SNAPSHOT_MULTIPART_PART_SIZE: Byte = Byte::from_u64(375 * 1024 * 1024); // 375 MiB
const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
@@ -471,10 +496,20 @@ pub struct Opt {
#[serde(default)]
pub experimental_no_snapshot_compaction: bool,
/// Experimental personalization API key feature.
///
/// Sets the API key for personalization features.
#[clap(long, env = MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY)]
pub experimental_personalization_api_key: Option<String>,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
#[serde(flatten)]
#[clap(flatten)]
pub s3_snapshot_options: Option<S3SnapshotOpts>,
/// Set the path to a configuration file that should be used to setup the engine.
/// Format must be TOML.
#[clap(long)]
@@ -576,6 +611,8 @@ impl Opt {
experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
experimental_personalization_api_key,
s3_snapshot_options,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -676,7 +713,22 @@ impl Opt {
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
experimental_no_snapshot_compaction.to_string(),
);
if let Some(experimental_personalization_api_key) = experimental_personalization_api_key {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY,
experimental_personalization_api_key,
);
}
indexer_options.export_to_env();
if let Some(s3_snapshot_options) = s3_snapshot_options {
#[cfg(not(unix))]
{
let _ = s3_snapshot_options;
panic!("S3 snapshot options are not supported on Windows");
}
#[cfg(unix)]
s3_snapshot_options.export_to_env();
}
}
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
@@ -772,6 +824,22 @@ pub struct IndexerOpts {
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS)]
#[serde(default)]
pub experimental_no_edition_2024_for_dumps: bool,
/// Experimental no edition 2024 to compute prefixes. For more information,
/// see: <https://github.com/orgs/meilisearch/discussions/862>
///
/// Enables the experimental no edition 2024 to compute prefixes.
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING)]
#[serde(default)]
pub experimental_no_edition_2024_for_prefix_post_processing: bool,
/// Experimental no edition 2024 to compute facets. For more information,
/// see: <https://github.com/orgs/meilisearch/discussions/862>
///
/// Enables the experimental no edition 2024 to compute facets.
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
#[serde(default)]
pub experimental_no_edition_2024_for_facet_post_processing: bool,
}
impl IndexerOpts {
@@ -783,6 +851,8 @@ impl IndexerOpts {
skip_index_budget: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
@@ -808,6 +878,18 @@ impl IndexerOpts {
experimental_no_edition_2024_for_dumps.to_string(),
);
}
if experimental_no_edition_2024_for_prefix_post_processing {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING,
experimental_no_edition_2024_for_prefix_post_processing.to_string(),
);
}
if experimental_no_edition_2024_for_facet_post_processing {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING,
experimental_no_edition_2024_for_facet_post_processing.to_string(),
);
}
}
}
@@ -815,6 +897,16 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
type Error = anyhow::Error;
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
let IndexerOpts {
max_indexing_memory,
max_indexing_threads,
skip_index_budget,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing,
} = other;
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
.num_threads(other.max_indexing_threads.unwrap_or_else(|| num_cpus::get() / 2))
.build()?;
@@ -822,17 +914,163 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
Ok(Self {
thread_pool,
log_every_n: Some(DEFAULT_LOG_EVERY_N),
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
max_threads: *other.max_indexing_threads,
max_memory: max_indexing_memory.map(|b| b.as_u64() as usize),
max_threads: max_indexing_threads.0,
max_positions_per_attributes: None,
skip_index_budget: other.skip_index_budget,
experimental_no_edition_2024_for_settings: other
.experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps: other.experimental_no_edition_2024_for_dumps,
skip_index_budget: *skip_index_budget,
experimental_no_edition_2024_for_settings: *experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps: *experimental_no_edition_2024_for_dumps,
chunk_compression_type: Default::default(),
chunk_compression_level: Default::default(),
documents_chunk_size: Default::default(),
max_nb_chunks: Default::default(),
experimental_no_edition_2024_for_prefix_post_processing:
*experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing:
*experimental_no_edition_2024_for_facet_post_processing,
s3_snapshot_options: None,
})
}
}
#[derive(Debug, Clone, Parser, Deserialize)]
// This group is a bit tricky but makes it possible to require all listed fields if one of them
// is specified. It lets us keep an Option for the S3SnapshotOpts configuration.
// <https://github.com/clap-rs/clap/issues/5092#issuecomment-2616986075>
#[group(requires_all = ["s3_bucket_url", "s3_bucket_region", "s3_bucket_name", "s3_snapshot_prefix", "s3_access_key", "s3_secret_key"])]
pub struct S3SnapshotOpts {
/// The S3 bucket URL in the format https://s3.<region>.amazonaws.com.
#[clap(long, env = MEILI_S3_BUCKET_URL, required = false)]
#[serde(default)]
pub s3_bucket_url: String,
/// The region in the format us-east-1.
#[clap(long, env = MEILI_S3_BUCKET_REGION, required = false)]
#[serde(default)]
pub s3_bucket_region: String,
/// The bucket name.
#[clap(long, env = MEILI_S3_BUCKET_NAME, required = false)]
#[serde(default)]
pub s3_bucket_name: String,
/// The prefix path where to put the snapshot, uses normal slashes (/).
#[clap(long, env = MEILI_S3_SNAPSHOT_PREFIX, required = false)]
#[serde(default)]
pub s3_snapshot_prefix: String,
/// The S3 access key.
#[clap(long, env = MEILI_S3_ACCESS_KEY, required = false)]
#[serde(default)]
pub s3_access_key: String,
/// The S3 secret key.
#[clap(long, env = MEILI_S3_SECRET_KEY, required = false)]
#[serde(default)]
pub s3_secret_key: String,
/// The maximum number of parts that can be uploaded in parallel.
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
#[clap(long, env = MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS, default_value_t = default_experimental_s3_snapshot_max_in_flight_parts())]
#[serde(default = "default_experimental_s3_snapshot_max_in_flight_parts")]
pub experimental_s3_max_in_flight_parts: NonZeroUsize,
/// The compression level. Defaults to no compression (0).
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
#[clap(long, env = MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL, default_value_t = default_experimental_s3_snapshot_compression_level())]
#[serde(default = "default_experimental_s3_snapshot_compression_level")]
pub experimental_s3_compression_level: u32,
/// The signature duration for the multipart upload.
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
#[clap(long, env = MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS, default_value_t = default_experimental_s3_snapshot_signature_duration_seconds())]
#[serde(default = "default_experimental_s3_snapshot_signature_duration_seconds")]
pub experimental_s3_signature_duration_seconds: u64,
/// The size of the the multipart parts.
///
/// Must not be less than 10MiB and larger than 8GiB. Yes,
/// twice the boundaries of the AWS S3 multipart upload
/// because we use it a bit differently internally.
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/869>.
#[clap(long, env = MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE, default_value_t = default_experimental_s3_snapshot_multipart_part_size())]
#[serde(default = "default_experimental_s3_snapshot_multipart_part_size")]
pub experimental_s3_multipart_part_size: Byte,
}
impl S3SnapshotOpts {
/// Exports the values to their corresponding env vars if they are not set.
pub fn export_to_env(self) {
let S3SnapshotOpts {
s3_bucket_url,
s3_bucket_region,
s3_bucket_name,
s3_snapshot_prefix,
s3_access_key,
s3_secret_key,
experimental_s3_max_in_flight_parts,
experimental_s3_compression_level,
experimental_s3_signature_duration_seconds,
experimental_s3_multipart_part_size,
} = self;
export_to_env_if_not_present(MEILI_S3_BUCKET_URL, s3_bucket_url);
export_to_env_if_not_present(MEILI_S3_BUCKET_REGION, s3_bucket_region);
export_to_env_if_not_present(MEILI_S3_BUCKET_NAME, s3_bucket_name);
export_to_env_if_not_present(MEILI_S3_SNAPSHOT_PREFIX, s3_snapshot_prefix);
export_to_env_if_not_present(MEILI_S3_ACCESS_KEY, s3_access_key);
export_to_env_if_not_present(MEILI_S3_SECRET_KEY, s3_secret_key);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_S3_MAX_IN_FLIGHT_PARTS,
experimental_s3_max_in_flight_parts.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_S3_COMPRESSION_LEVEL,
experimental_s3_compression_level.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_S3_SIGNATURE_DURATION_SECONDS,
experimental_s3_signature_duration_seconds.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_S3_MULTIPART_PART_SIZE,
experimental_s3_multipart_part_size.to_string(),
);
}
}
impl TryFrom<S3SnapshotOpts> for S3SnapshotOptions {
type Error = anyhow::Error;
fn try_from(other: S3SnapshotOpts) -> Result<Self, Self::Error> {
let S3SnapshotOpts {
s3_bucket_url,
s3_bucket_region,
s3_bucket_name,
s3_snapshot_prefix,
s3_access_key,
s3_secret_key,
experimental_s3_max_in_flight_parts,
experimental_s3_compression_level,
experimental_s3_signature_duration_seconds,
experimental_s3_multipart_part_size,
} = other;
Ok(S3SnapshotOptions {
s3_bucket_url,
s3_bucket_region,
s3_bucket_name,
s3_snapshot_prefix,
s3_access_key,
s3_secret_key,
s3_max_in_flight_parts: experimental_s3_max_in_flight_parts,
s3_compression_level: experimental_s3_compression_level,
s3_signature_duration: Duration::from_secs(experimental_s3_signature_duration_seconds),
s3_multipart_part_size: experimental_s3_multipart_part_size.as_u64(),
})
}
}
@@ -1051,6 +1289,22 @@ fn default_snapshot_interval_sec() -> &'static str {
DEFAULT_SNAPSHOT_INTERVAL_SEC_STR
}
fn default_experimental_s3_snapshot_max_in_flight_parts() -> NonZeroUsize {
DEFAULT_S3_SNAPSHOT_MAX_IN_FLIGHT_PARTS
}
fn default_experimental_s3_snapshot_compression_level() -> u32 {
DEFAULT_S3_SNAPSHOT_COMPRESSION_LEVEL
}
fn default_experimental_s3_snapshot_signature_duration_seconds() -> u64 {
DEFAULT_S3_SNAPSHOT_SIGNATURE_DURATION_SECONDS
}
fn default_experimental_s3_snapshot_multipart_part_size() -> Byte {
DEFAULT_S3_SNAPSHOT_MULTIPART_PART_SIZE
}
fn default_dump_dir() -> PathBuf {
PathBuf::from(DEFAULT_DUMP_DIR)
}

View File

@@ -0,0 +1,366 @@
use crate::search::{Personalize, SearchResult};
use meilisearch_types::{
error::{Code, ErrorCode, ResponseError},
milli::TimeBudget,
};
use rand::Rng;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::time::Duration;
use tracing::{debug, info, warn};
const COHERE_API_URL: &str = "https://api.cohere.ai/v1/rerank";
const MAX_RETRIES: u32 = 10;
#[derive(Debug, thiserror::Error)]
enum PersonalizationError {
#[error("Personalization service: HTTP request failed: {0}")]
Request(#[from] reqwest::Error),
#[error("Personalization service: Failed to parse response: {0}")]
Parse(String),
#[error("Personalization service: Cohere API error: {0}")]
Api(String),
#[error("Personalization service: Unauthorized: invalid API key")]
Unauthorized,
#[error("Personalization service: Rate limited: too many requests")]
RateLimited,
#[error("Personalization service: Bad request: {0}")]
BadRequest(String),
#[error("Personalization service: Internal server error: {0}")]
InternalServerError(String),
#[error("Personalization service: Network error: {0}")]
Network(String),
#[error("Personalization service: Deadline exceeded")]
DeadlineExceeded,
#[error(transparent)]
FeatureNotEnabled(#[from] index_scheduler::error::FeatureNotEnabledError),
}
impl ErrorCode for PersonalizationError {
fn error_code(&self) -> Code {
match self {
PersonalizationError::FeatureNotEnabled { .. } => Code::FeatureNotEnabled,
PersonalizationError::Unauthorized => Code::RemoteInvalidApiKey,
PersonalizationError::RateLimited => Code::TooManySearchRequests,
PersonalizationError::BadRequest(_) => Code::RemoteBadRequest,
PersonalizationError::InternalServerError(_) => Code::RemoteRemoteError,
PersonalizationError::Network(_) | PersonalizationError::Request(_) => {
Code::RemoteCouldNotSendRequest
}
PersonalizationError::Parse(_) | PersonalizationError::Api(_) => {
Code::RemoteBadResponse
}
PersonalizationError::DeadlineExceeded => Code::Internal, // should not be returned to the client
}
}
}
pub struct CohereService {
client: Client,
api_key: String,
}
impl CohereService {
pub fn new(api_key: String) -> Self {
info!("Personalization service initialized with Cohere API");
let client = Client::builder()
.timeout(Duration::from_secs(30))
.build()
.expect("Failed to create HTTP client");
Self { client, api_key }
}
pub async fn rerank_search_results(
&self,
search_result: SearchResult,
personalize: &Personalize,
query: Option<&str>,
time_budget: TimeBudget,
) -> Result<SearchResult, ResponseError> {
if time_budget.exceeded() {
warn!("Could not rerank due to deadline");
// If the deadline is exceeded, return the original search result instead of an error
return Ok(search_result);
}
// Extract user context from personalization
let user_context = personalize.user_context.as_str();
// Build the prompt by merging query and user context
let prompt = match query {
Some(q) => format!("User Context: {user_context}\nQuery: {q}"),
None => format!("User Context: {user_context}"),
};
// Extract documents for reranking
let documents: Vec<String> = search_result
.hits
.iter()
.map(|hit| {
// Convert the document to a string representation for reranking
serde_json::to_string(&hit.document).unwrap_or_else(|_| "{}".to_string())
})
.collect();
if documents.is_empty() {
return Ok(search_result);
}
// Call Cohere's rerank API with retry logic
let reranked_indices =
match self.call_rerank_with_retry(&prompt, &documents, time_budget).await {
Ok(indices) => indices,
Err(PersonalizationError::DeadlineExceeded) => {
// If the deadline is exceeded, return the original search result instead of an error
return Ok(search_result);
}
Err(e) => return Err(e.into()),
};
debug!("Cohere rerank successful, reordering {} results", search_result.hits.len());
// Reorder the hits based on Cohere's reranking
let mut reranked_hits = Vec::new();
for index in reranked_indices.iter() {
if let Some(hit) = search_result.hits.get(*index) {
reranked_hits.push(hit.clone());
}
}
Ok(SearchResult { hits: reranked_hits, ..search_result })
}
async fn call_rerank_with_retry(
&self,
query: &str,
documents: &[String],
time_budget: TimeBudget,
) -> Result<Vec<usize>, PersonalizationError> {
let request_body = CohereRerankRequest {
query: query.to_string(),
documents: documents.to_vec(),
model: "rerank-english-v3.0".to_string(),
};
// Retry loop similar to vector extraction
for attempt in 0..MAX_RETRIES {
let response_result = self.send_rerank_request(&request_body).await;
let retry_duration = match self.handle_response(response_result).await {
Ok(indices) => return Ok(indices),
Err(retry) => {
warn!("Cohere rerank attempt #{} failed: {}", attempt, retry.error);
if time_budget.exceeded() {
warn!("Could not rerank due to deadline");
return Err(PersonalizationError::DeadlineExceeded);
} else {
match retry.into_duration(attempt) {
Ok(d) => d,
Err(error) => return Err(error),
}
}
}
};
// randomly up to double the retry duration
let retry_duration = retry_duration
+ rand::thread_rng().gen_range(std::time::Duration::ZERO..retry_duration);
warn!("Retrying after {}ms", retry_duration.as_millis());
tokio::time::sleep(retry_duration).await;
}
// Final attempt without retry
let response_result = self.send_rerank_request(&request_body).await;
match self.handle_response(response_result).await {
Ok(indices) => Ok(indices),
Err(retry) => Err(retry.into_error()),
}
}
async fn send_rerank_request(
&self,
request_body: &CohereRerankRequest,
) -> Result<reqwest::Response, reqwest::Error> {
self.client
.post(COHERE_API_URL)
.header("Authorization", format!("Bearer {}", self.api_key))
.header("Content-Type", "application/json")
.json(request_body)
.send()
.await
}
async fn handle_response(
&self,
response_result: Result<reqwest::Response, reqwest::Error>,
) -> Result<Vec<usize>, Retry> {
let response = match response_result {
Ok(r) => r,
Err(e) if e.is_timeout() => {
return Err(Retry::retry_later(PersonalizationError::Network(format!(
"Request timeout: {}",
e
))));
}
Err(e) => {
return Err(Retry::retry_later(PersonalizationError::Network(format!(
"Network error: {}",
e
))));
}
};
let status = response.status();
let status_code = status.as_u16();
if status.is_success() {
let rerank_response: CohereRerankResponse = match response.json().await {
Ok(r) => r,
Err(e) => {
return Err(Retry::retry_later(PersonalizationError::Parse(format!(
"Failed to parse response: {}",
e
))));
}
};
// Extract indices from rerank results
let indices: Vec<usize> =
rerank_response.results.iter().map(|result| result.index as usize).collect();
return Ok(indices);
}
// Handle error status codes
let error_body = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
let retry = match status_code {
401 => Retry::give_up(PersonalizationError::Unauthorized),
429 => Retry::rate_limited(PersonalizationError::RateLimited),
400 => Retry::give_up(PersonalizationError::BadRequest(error_body)),
500..=599 => Retry::retry_later(PersonalizationError::InternalServerError(format!(
"Status {}: {}",
status_code, error_body
))),
402..=499 => Retry::give_up(PersonalizationError::Api(format!(
"Status {}: {}",
status_code, error_body
))),
_ => Retry::retry_later(PersonalizationError::Api(format!(
"Unexpected status {}: {}",
status_code, error_body
))),
};
Err(retry)
}
}
#[derive(Serialize)]
struct CohereRerankRequest {
query: String,
documents: Vec<String>,
model: String,
}
#[derive(Deserialize)]
struct CohereRerankResponse {
results: Vec<CohereRerankResult>,
}
#[derive(Deserialize)]
struct CohereRerankResult {
index: u32,
}
// Retry strategy similar to vector extraction
struct Retry {
error: PersonalizationError,
strategy: RetryStrategy,
}
enum RetryStrategy {
GiveUp,
Retry,
RetryAfterRateLimit,
}
impl Retry {
fn give_up(error: PersonalizationError) -> Self {
Self { error, strategy: RetryStrategy::GiveUp }
}
fn retry_later(error: PersonalizationError) -> Self {
Self { error, strategy: RetryStrategy::Retry }
}
fn rate_limited(error: PersonalizationError) -> Self {
Self { error, strategy: RetryStrategy::RetryAfterRateLimit }
}
fn into_duration(self, attempt: u32) -> Result<Duration, PersonalizationError> {
match self.strategy {
RetryStrategy::GiveUp => Err(self.error),
RetryStrategy::Retry => {
// Exponential backoff: 10^attempt milliseconds
Ok(Duration::from_millis((10u64).pow(attempt)))
}
RetryStrategy::RetryAfterRateLimit => {
// Longer backoff for rate limits: 100ms + exponential
Ok(Duration::from_millis(100 + (10u64).pow(attempt)))
}
}
}
fn into_error(self) -> PersonalizationError {
self.error
}
}
pub enum PersonalizationService {
Cohere(CohereService),
Disabled,
}
impl PersonalizationService {
pub fn cohere(api_key: String) -> Self {
// If the API key is empty, consider the personalization service as disabled
if api_key.trim().is_empty() {
Self::disabled()
} else {
Self::Cohere(CohereService::new(api_key))
}
}
pub fn disabled() -> Self {
debug!("Personalization service disabled");
Self::Disabled
}
pub async fn rerank_search_results(
&self,
search_result: SearchResult,
personalize: &Personalize,
query: Option<&str>,
time_budget: TimeBudget,
) -> Result<SearchResult, ResponseError> {
match self {
Self::Cohere(cohere_service) => {
cohere_service
.rerank_search_results(search_result, personalize, query, time_budget)
.await
}
Self::Disabled => Err(PersonalizationError::FeatureNotEnabled(
index_scheduler::error::FeatureNotEnabledError {
disabled_action: "reranking search results",
feature: "personalization",
issue_link: "https://github.com/orgs/meilisearch/discussions/866",
},
)
.into()),
}
}
}

View File

@@ -0,0 +1,84 @@
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions;
use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use utoipa::OpenApi;
use super::ActionPolicy;
use crate::analytics::Analytics;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::SummarizedTaskView;
#[derive(OpenApi)]
#[openapi(
paths(compact),
tags(
(
name = "Compact an index",
description = "The /compact route uses compacts the database to reorganize and make it smaller and more efficient.",
external_docs(url = "https://www.meilisearch.com/docs/reference/api/compact"),
),
),
)]
pub struct CompactApi;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(compact))));
}
/// Compact an index
#[utoipa::path(
post,
path = "{indexUid}/compact",
tag = "Compact an index",
security(("Bearer" = ["search", "*"])),
params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)),
responses(
(status = ACCEPTED, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
{
"taskUid": 147,
"indexUid": null,
"status": "enqueued",
"type": "documentDeletion",
"enqueuedAt": "2024-08-08T17:05:55.791772Z"
}
)),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
)
)]
pub async fn compact(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_COMPACT }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
analytics.publish(IndexCompacted::default(), &req);
let task = KindWithContent::IndexCompaction { index_uid: index_uid.to_string() };
let task =
match tokio::task::spawn_blocking(move || index_scheduler.register(task, None, false))
.await?
{
Ok(task) => task,
Err(e) => return Err(e.into()),
};
debug!(returns = ?task, "Compact the {index_uid} index");
Ok(HttpResponse::Accepted().json(SummarizedTaskView::from(task)))
}
crate::empty_analytics!(IndexCompacted, "Index Compacted");

View File

@@ -45,7 +45,9 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::enterprise_edition::proxy::{proxy, Body};
use crate::routes::indexes::enterprise_edition::proxy::{
proxy, task_network_and_check_leader, Body,
};
use crate::routes::indexes::search::fix_sort_query_parameters;
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
@@ -333,13 +335,16 @@ impl Aggregate for DocumentsDeletionAggregator {
pub async fn delete_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<DocumentParam>,
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let CustomMetadataQuery { custom_metadata } = params.into_inner();
let DocumentParam { index_uid, document_id } = path.into_inner();
let index_uid = IndexUid::try_from(index_uid)?;
let network = index_scheduler.network();
let task_network = task_network_and_check_leader(&req, &network)?;
analytics.publish(
DocumentsDeletionAggregator {
@@ -357,13 +362,23 @@ pub async fn delete_document(
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = {
let mut task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
tokio::task::spawn_blocking(move || {
index_scheduler.register_with_custom_metadata(
task,
uid,
custom_metadata,
dry_run,
task_network,
)
})
.await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
if let Some(task_network) = task.network.take() {
proxy(&index_scheduler, Some(&index_uid), &req, task_network, network, Body::none(), &task)
.await?;
}
let task: SummarizedTaskView = task.into();
@@ -678,6 +693,19 @@ pub struct UpdateDocumentsQuery {
#[param(value_type = char, default = ",", example = ";")]
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidDocumentCsvDelimiter>, error = DeserrQueryParamError<InvalidDocumentCsvDelimiter>)]
pub csv_delimiter: Option<u8>,
#[param(example = "custom")]
#[deserr(default, error = DeserrQueryParamError<InvalidIndexCustomMetadata>)]
pub custom_metadata: Option<String>,
}
#[derive(Deserialize, Debug, Deserr, IntoParams)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
#[into_params(parameter_in = Query, rename_all = "camelCase")]
pub struct CustomMetadataQuery {
#[param(example = "custom")]
#[deserr(default, error = DeserrQueryParamError<InvalidIndexCustomMetadata>)]
pub custom_metadata: Option<String>,
}
fn from_char_csv_delimiter(
@@ -819,6 +847,7 @@ pub async fn replace_documents(
body,
IndexDocumentsMethod::ReplaceDocuments,
uid,
params.custom_metadata,
dry_run,
allow_index_creation,
&req,
@@ -921,6 +950,7 @@ pub async fn update_documents(
body,
IndexDocumentsMethod::UpdateDocuments,
uid,
params.custom_metadata,
dry_run,
allow_index_creation,
&req,
@@ -940,12 +970,14 @@ async fn document_addition(
body: Payload,
method: IndexDocumentsMethod,
task_id: Option<TaskId>,
custom_metadata: Option<String>,
dry_run: bool,
allow_index_creation: bool,
req: &HttpRequest,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let mime_type = extract_mime_type(req)?;
let network = index_scheduler.network();
let task_network = task_network_and_check_leader(&req, &network)?;
let format = match (
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
@@ -1064,9 +1096,18 @@ async fn document_addition(
index_uid: index_uid.to_string(),
};
/// FIXME: not new to this PR, but _any_ error here will cause the payload to unduly persist
let scheduler = index_scheduler.clone();
let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run))
.await?
let mut task = match tokio::task::spawn_blocking(move || {
scheduler.register_with_custom_metadata(
task,
task_id,
custom_metadata,
dry_run,
task_network,
)
})
.await?
{
Ok(task) => task,
Err(e) => {
@@ -1075,12 +1116,13 @@ async fn document_addition(
}
};
if network.sharding {
if let Some(task_network) = task.network.take() {
if let Some(file) = file {
proxy(
&index_scheduler,
&index_uid,
Some(&index_uid),
req,
task_network,
network,
Body::with_ndjson_payload(file),
&task,
@@ -1130,7 +1172,7 @@ async fn copy_body_to_file(
/// Delete a set of documents based on an array of document ids.
#[utoipa::path(
post,
path = "{indexUid}/delete-batch",
path = "{indexUid}/documents/delete-batch",
tag = "Documents",
security(("Bearer" = ["documents.delete", "documents.*", "*"])),
params(
@@ -1161,13 +1203,17 @@ pub async fn delete_documents_batch(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: web::Json<Vec<Value>>,
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by batch");
let CustomMetadataQuery { custom_metadata } = params.into_inner();
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let network = index_scheduler.network();
let task_network = task_network_and_check_leader(&req, &network)?;
analytics.publish(
DocumentsDeletionAggregator {
@@ -1188,13 +1234,31 @@ pub async fn delete_documents_batch(
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = {
let mut task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
tokio::task::spawn_blocking(move || {
index_scheduler.register_with_custom_metadata(
task,
uid,
custom_metadata,
dry_run,
task_network,
)
})
.await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
if let Some(task_network) = task.network.take() {
proxy(
&index_scheduler,
Some(&index_uid),
&req,
task_network,
network,
Body::inline(body),
&task,
)
.await?;
}
let task: SummarizedTaskView = task.into();
@@ -1244,16 +1308,20 @@ pub struct DocumentDeletionByFilter {
pub async fn delete_documents_by_filter(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by filter");
let CustomMetadataQuery { custom_metadata } = params.into_inner();
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let filter = body.into_inner();
let network = index_scheduler.network();
let task_network = task_network_and_check_leader(&req, &network)?;
analytics.publish(
DocumentsDeletionAggregator {
@@ -1280,13 +1348,31 @@ pub async fn delete_documents_by_filter(
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = {
let mut task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
tokio::task::spawn_blocking(move || {
index_scheduler.register_with_custom_metadata(
task,
uid,
custom_metadata,
dry_run,
task_network,
)
})
.await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(filter), &task).await?;
if let Some(task_network) = task.network.take() {
proxy(
&index_scheduler,
Some(&index_uid),
&req,
task_network,
network,
Body::inline(filter),
&task,
)
.await?;
}
let task: SummarizedTaskView = task.into();
@@ -1372,38 +1458,41 @@ impl Aggregate for EditDocumentsByFunctionAggregator {
pub async fn edit_documents_by_function(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
body: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Edit documents by function");
debug!(parameters = ?body, "Edit documents by function");
let CustomMetadataQuery { custom_metadata } = params.into_inner();
index_scheduler
.features()
.check_edit_documents_by_function("Using the documents edit route")?;
let network = index_scheduler.network();
let task_network = task_network_and_check_leader(&req, &network)?;
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let params = params.into_inner();
let body = body.into_inner();
analytics.publish(
EditDocumentsByFunctionAggregator {
filtered: params.filter.is_some(),
with_context: params.context.is_some(),
filtered: body.filter.is_some(),
with_context: body.context.is_some(),
index_creation: index_scheduler.index(&index_uid).is_err(),
},
&req,
);
let engine = milli::rhai::Engine::new();
if let Err(e) = engine.compile(&params.function) {
if let Err(e) = engine.compile(&body.function) {
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
}
if let Some(ref filter) = params.filter {
if let Some(ref filter) = body.filter {
// we ensure the filter is well formed before enqueuing it
crate::search::parse_filter(
filter,
@@ -1414,8 +1503,8 @@ pub async fn edit_documents_by_function(
}
let task = KindWithContent::DocumentEdition {
index_uid: index_uid.clone(),
filter_expr: params.filter.clone(),
context: match params.context.clone() {
filter_expr: body.filter.clone(),
context: match body.context.clone() {
Some(Value::Object(m)) => Some(m),
None => None,
_ => {
@@ -1425,18 +1514,36 @@ pub async fn edit_documents_by_function(
))
}
},
function: params.function.clone(),
function: body.function.clone(),
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = {
let mut task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
tokio::task::spawn_blocking(move || {
index_scheduler.register_with_custom_metadata(
task,
uid,
custom_metadata,
dry_run,
task_network,
)
})
.await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(params), &task).await?;
if let Some(task_network) = task.network.take() {
proxy(
&index_scheduler,
Some(&index_uid),
&req,
task_network,
network,
Body::inline(body),
&task,
)
.await?;
}
let task: SummarizedTaskView = task.into();
@@ -1477,12 +1584,15 @@ pub async fn edit_documents_by_function(
pub async fn clear_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<CustomMetadataQuery, DeserrQueryParamError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let network = index_scheduler.network();
let CustomMetadataQuery { custom_metadata } = params.into_inner();
let task_network = task_network_and_check_leader(&req, &network)?;
analytics.publish(
DocumentsDeletionAggregator {
@@ -1498,14 +1608,24 @@ pub async fn clear_all_documents(
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = {
let mut task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
tokio::task::spawn_blocking(move || {
index_scheduler.register_with_custom_metadata(
task,
uid,
custom_metadata,
dry_run,
task_network,
)
})
.await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
if let Some(task_network) = task.network.take() {
proxy(&index_scheduler, Some(&index_uid), &req, task_network, network, Body::none(), &task)
.await?;
}
let task: SummarizedTaskView = task.into();

View File

@@ -3,6 +3,7 @@
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::fs::File;
@@ -10,8 +11,18 @@ use actix_web::http::header::CONTENT_TYPE;
use actix_web::HttpRequest;
use bytes::Bytes;
use index_scheduler::IndexScheduler;
use meilisearch_types::enterprise_edition::network::Remote;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::{Origin, RemoteTask, TaskNetwork};
use meilisearch_types::milli::DocumentId;
use meilisearch_types::tasks::enterprise_edition::network::headers::{
PROXY_IMPORT_DOCS_HEADER, PROXY_IMPORT_INDEX_COUNT_HEADER, PROXY_IMPORT_INDEX_HEADER,
PROXY_IMPORT_REMOTE_HEADER, PROXY_IMPORT_TASK_KEY_HEADER, PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
PROXY_ORIGIN_NETWORK_VERSION_HEADER, PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER,
};
use meilisearch_types::tasks::enterprise_edition::network::{
DbTaskNetwork, ImportData, ImportMetadata, Origin, TaskNetwork,
};
use meilisearch_types::tasks::Task;
use reqwest::StatusCode;
use serde::de::DeserializeOwned;
use serde_json::Value;
@@ -22,13 +33,18 @@ use crate::routes::indexes::enterprise_edition::proxy::error::{
};
use crate::routes::SummarizedTaskView;
pub enum Body<T: serde::Serialize> {
pub enum Body<T, F>
where
T: serde::Serialize,
F: FnMut(&str, &Remote, &mut T),
{
NdJsonPayload(File),
Inline(T),
Generated(T, F),
None,
}
impl Body<()> {
impl Body<(), fn(&str, &Remote, &mut ())> {
pub fn with_ndjson_payload(file: File) -> Self {
Self::NdJsonPayload(file)
}
@@ -38,7 +54,115 @@ impl Body<()> {
}
}
/// If necessary, proxies the passed request to the network and update the task description.
impl<T> Body<T, fn(&str, &Remote, &mut T)>
where
T: serde::Serialize,
{
pub fn inline(payload: T) -> Self {
Self::Inline(payload)
}
}
impl<T, F> Body<T, F>
where
T: serde::Serialize,
F: FnMut(&str, &Remote, &mut T),
{
pub fn generated(initial: T, f: F) -> Self {
Self::Generated(initial, f)
}
}
impl<T, F> Body<T, F>
where
T: serde::Serialize,
F: FnMut(&str, &Remote, &mut T),
{
pub fn into_bytes_iter(
self,
remotes: impl IntoIterator<Item = (String, Remote)>,
) -> Result<
impl Iterator<Item = (Option<Bytes>, (String, Remote))>,
meilisearch_types::milli::Error,
> {
let bytes = match self {
Body::NdJsonPayload(file) => {
Some(Bytes::from_owner(unsafe { memmap2::Mmap::map(&file)? }))
}
Body::Inline(payload) => {
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
}
Body::None => None,
Body::Generated(mut initial, mut f) => {
return Ok(either::Right(remotes.into_iter().map(move |(name, remote)| {
f(&name, &remote, &mut initial);
let bytes =
Some(Bytes::copy_from_slice(&serde_json::to_vec(&initial).unwrap()));
(bytes, (name, remote))
})));
}
};
Ok(either::Left(std::iter::repeat(bytes).zip(remotes)))
}
}
/// Parses the header to determine if this task is a duplicate and originates with a remote.
///
/// If not, checks whether this remote is the leader and return `MeilisearchHttpError::NotLeader` if not.
///
/// If there is no leader, returns `Ok(None)`
///
/// # Errors
///
/// - `MeiliearchHttpError::NotLeader`: if the following are true simultaneously:
/// 1. The task originates with the current node
/// 2. There's a declared `leader`
/// 3. The declared leader is **not** the current node
/// - `MeilisearchHttpError::InvalidHeaderValue`: if only parts of the headers are present, or if they cannot be parsed as a task network.
/// - `MeilisearchHttpError::Inconsistent`
pub fn task_network_and_check_leader(
req: &HttpRequest,
network: &meilisearch_types::enterprise_edition::network::Network,
) -> Result<Option<TaskNetwork>, MeilisearchHttpError> {
match (origin_from_req(req)?, import_data_from_req(req)?, import_metadata_from_req(req)?) {
(Some(network_change), Some(import_from), Some(metadata)) => {
Ok(Some(TaskNetwork::Import { import_from, network_change, metadata }))
}
(Some(origin), None, None) => Ok(Some(TaskNetwork::Origin { origin })),
(None, None, None) => {
match (network.leader.as_deref(), network.local.as_deref()) {
// 1. Always allowed if there is no leader
(None, _) => return Ok(None),
// 2. Allowed if the leader is self
(Some(leader), Some(this)) if leader == this => (),
// 3. Any other change is disallowed
(Some(leader), _) => {
return Err(
MeilisearchHttpError::NotLeader { leader: leader.to_string() }.into()
)
}
}
Ok(Some(TaskNetwork::Remotes {
remote_tasks: Default::default(),
network_version: network.version,
}))
}
// all good cases were matched, so this is always an error
(origin, import_from, metadata) => {
Err(MeilisearchHttpError::InconsistentTaskNetworkHeaders {
is_missing_origin: origin.is_none(),
is_missing_import: import_from.is_none(),
is_missing_import_metadata: metadata.is_none(),
})
}
}
}
/// Updates the task description and, if necessary, proxies the passed request to the network and update the task description.
///
/// This function reads the custom headers from the request to determine if must proxy the request or if the request
/// has already been proxied.
@@ -48,152 +172,139 @@ impl Body<()> {
/// with the task ids from the task queues of the remotes.
/// - when the request has already been proxied, the custom headers contains information about the remote that created the initial task.
/// This information is copied to the passed task.
pub async fn proxy<T: serde::Serialize>(
///
/// # Returns
///
/// The updated task. The task is read back from the database to avoid erasing concurrent changes.
pub async fn proxy<T, F>(
index_scheduler: &IndexScheduler,
index_uid: &str,
index_uid: Option<&str>,
req: &HttpRequest,
mut task_network: DbTaskNetwork,
network: meilisearch_types::enterprise_edition::network::Network,
body: Body<T>,
body: Body<T, F>,
task: &meilisearch_types::tasks::Task,
) -> Result<(), MeilisearchHttpError> {
match origin_from_req(req)? {
Some(origin) => {
index_scheduler.set_task_network(task.uid, TaskNetwork::Origin { origin })?
) -> Result<Task, MeilisearchHttpError>
where
T: serde::Serialize,
F: FnMut(&str, &Remote, &mut T),
{
if let DbTaskNetwork::Remotes { remote_tasks, network_version: _ } = &mut task_network {
let this = network
.local
.as_deref()
.expect("inconsistent `network.sharding` and `network.self`")
.to_owned();
let content_type = match &body {
// for file bodies, force x-ndjson
Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()),
// otherwise get content type from request
_ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()),
};
let mut in_flight_remote_queries = BTreeMap::new();
let client = reqwest::ClientBuilder::new()
.connect_timeout(std::time::Duration::from_secs(3))
.build()
.unwrap();
let method = from_old_http_method(req.method());
// send payload to all remotes
for (body, (node_name, node)) in body
.into_bytes_iter(network.remotes.into_iter().filter(|(name, _)| name.as_str() != this))
.map_err(|err| {
MeilisearchHttpError::from_milli(err, index_uid.map(ToOwned::to_owned))
})?
{
let client = client.clone();
let api_key = node.write_api_key;
let this = this.clone();
let method = method.clone();
let path_and_query = req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/");
in_flight_remote_queries.insert(
node_name,
tokio::spawn({
let url = format!("{}{}", node.url, path_and_query);
let url_encoded_this = urlencoding::encode(&this).into_owned();
let url_encoded_task_uid = task.uid.to_string(); // it's url encoded i promize
let content_type = content_type.map(|b| b.to_owned());
let backoff = backoff::ExponentialBackoffBuilder::new()
.with_max_elapsed_time(Some(std::time::Duration::from_secs(25)))
.build();
backoff::future::retry(backoff, move || {
let url = url.clone();
let client = client.clone();
let url_encoded_this = url_encoded_this.clone();
let url_encoded_task_uid = url_encoded_task_uid.clone();
let content_type = content_type.clone();
let body = body.clone();
let api_key = api_key.clone();
let method = method.clone();
async move {
try_proxy(
method,
&url,
content_type.as_deref(),
api_key.as_deref(),
&client,
&url_encoded_this,
&url_encoded_task_uid,
body,
)
.await
}
})
}),
);
}
None => {
let this = network
.local
.as_deref()
.expect("inconsistent `network.sharding` and `network.self`")
.to_owned();
let content_type = match &body {
// for file bodies, force x-ndjson
Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()),
// otherwise get content type from request
_ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()),
};
// wait for all in-flight queries to finish and collect their results
for (node_name, handle) in in_flight_remote_queries {
match handle.await {
Ok(Ok(res)) => {
let task_uid = res.task_uid;
let body = match body {
Body::NdJsonPayload(file) => Some(Bytes::from_owner(unsafe {
memmap2::Mmap::map(&file).map_err(|err| {
MeilisearchHttpError::from_milli(err.into(), Some(index_uid.to_owned()))
})?
})),
Body::Inline(payload) => {
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
remote_tasks.insert(node_name, Ok(task_uid).into());
}
Body::None => None,
};
let mut in_flight_remote_queries = BTreeMap::new();
let client = reqwest::ClientBuilder::new()
.connect_timeout(std::time::Duration::from_secs(3))
.build()
.unwrap();
let method = from_old_http_method(req.method());
// send payload to all remotes
for (node_name, node) in
network.remotes.into_iter().filter(|(name, _)| name.as_str() != this)
{
let body = body.clone();
let client = client.clone();
let api_key = node.write_api_key;
let this = this.clone();
let method = method.clone();
let path_and_query =
req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/");
in_flight_remote_queries.insert(
node_name,
tokio::spawn({
let url = format!("{}{}", node.url, path_and_query);
let url_encoded_this = urlencoding::encode(&this).into_owned();
let url_encoded_task_uid = task.uid.to_string(); // it's url encoded i promize
let content_type = content_type.map(|b| b.to_owned());
let backoff = backoff::ExponentialBackoffBuilder::new()
.with_max_elapsed_time(Some(std::time::Duration::from_secs(25)))
.build();
backoff::future::retry(backoff, move || {
let url = url.clone();
let client = client.clone();
let url_encoded_this = url_encoded_this.clone();
let url_encoded_task_uid = url_encoded_task_uid.clone();
let content_type = content_type.clone();
let body = body.clone();
let api_key = api_key.clone();
let method = method.clone();
async move {
try_proxy(
method,
&url,
content_type.as_deref(),
api_key.as_deref(),
&client,
&url_encoded_this,
&url_encoded_task_uid,
body,
)
.await
}
})
}),
);
}
// wait for all in-flight queries to finish and collect their results
let mut remote_tasks: BTreeMap<String, RemoteTask> = BTreeMap::new();
for (node_name, handle) in in_flight_remote_queries {
match handle.await {
Ok(Ok(res)) => {
let task_uid = res.task_uid;
remote_tasks.insert(node_name, Ok(task_uid).into());
}
Ok(Err(error)) => {
remote_tasks.insert(node_name, Err(error.as_response_error()).into());
}
Err(panic) => match panic.try_into_panic() {
Ok(panic) => {
let msg = match panic.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match panic.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
remote_tasks.insert(
node_name,
Err(ResponseError::from_msg(
msg.to_string(),
meilisearch_types::error::Code::Internal,
))
.into(),
);
}
Err(_) => {
tracing::error!("proxy task was unexpectedly cancelled")
}
},
Ok(Err(error)) => {
remote_tasks.insert(node_name, Err(error.as_response_error()).into());
}
Err(panic) => match panic.try_into_panic() {
Ok(panic) => {
let msg = match panic.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match panic.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
remote_tasks.insert(
node_name,
Err(ResponseError::from_msg(
msg.to_string(),
meilisearch_types::error::Code::Internal,
))
.into(),
);
}
Err(_) => {
tracing::error!("proxy task was unexpectedly cancelled")
}
},
}
// edit details to contain the return values from the remotes
index_scheduler.set_task_network(task.uid, TaskNetwork::Remotes { remote_tasks })?;
}
}
Ok(())
Ok(index_scheduler.set_task_network(task.uid, task_network)?)
}
fn from_old_http_method(method: &actix_http::Method) -> reqwest::Method {
@@ -375,25 +486,23 @@ mod error {
}
}
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote";
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid";
pub fn origin_from_req(req: &HttpRequest) -> Result<Option<Origin>, MeilisearchHttpError> {
let (remote_name, task_uid) = match (
let (remote_name, task_uid, network_version) = match (
req.headers().get(PROXY_ORIGIN_REMOTE_HEADER),
req.headers().get(PROXY_ORIGIN_TASK_UID_HEADER),
req.headers().get(PROXY_ORIGIN_NETWORK_VERSION_HEADER),
) {
(None, None) => return Ok(None),
(None, Some(_)) => {
(None, None, _) => return Ok(None),
(None, Some(_), _) => {
return Err(MeilisearchHttpError::InconsistentOriginHeaders { is_remote_missing: true })
}
(Some(_), None) => {
(Some(_), None, _) => {
return Err(MeilisearchHttpError::InconsistentOriginHeaders {
is_remote_missing: false,
})
}
(Some(remote_name), Some(task_uid)) => (
urlencoding::decode(remote_name.to_str().map_err(|err| {
(Some(remote_name), Some(task_uid), network_version) => {
let remote_name = urlencoding::decode(remote_name.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_REMOTE_HEADER,
msg: format!("while parsing remote name as UTF-8: {err}"),
@@ -402,8 +511,8 @@ pub fn origin_from_req(req: &HttpRequest) -> Result<Option<Origin>, MeilisearchH
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_REMOTE_HEADER,
msg: format!("while URL-decoding remote name: {err}"),
})?,
urlencoding::decode(task_uid.to_str().map_err(|err| {
})?;
let task_uid = urlencoding::decode(task_uid.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
msg: format!("while parsing task UID as UTF-8: {err}"),
@@ -412,15 +521,182 @@ pub fn origin_from_req(req: &HttpRequest) -> Result<Option<Origin>, MeilisearchH
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
msg: format!("while URL-decoding task UID: {err}"),
})?,
),
})?;
let network_version = match network_version {
Some(network_version) => {
urlencoding::decode(network_version.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_NETWORK_VERSION_HEADER,
msg: format!("while parsing network version as UTF-8: {err}"),
}
})?)
.map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_NETWORK_VERSION_HEADER,
msg: format!("while URL-decoding network version: {err}"),
}
})?
}
None => Cow::Borrowed("0"),
};
(remote_name, task_uid, network_version)
}
};
let task_uid: usize =
let task_uid: u32 =
task_uid.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
msg: format!("while parsing the task UID as an integer: {err}"),
})?;
Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid }))
let network_version: u128 =
network_version.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_NETWORK_VERSION_HEADER,
msg: format!("while parsing the network version as a u128: {err}"),
})?;
let network_version = uuid::Uuid::from_u128(network_version);
Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid, network_version }))
}
pub fn import_data_from_req(req: &HttpRequest) -> Result<Option<ImportData>, MeilisearchHttpError> {
let (remote_name, index_name, documents) = match (
req.headers().get(PROXY_IMPORT_REMOTE_HEADER),
req.headers().get(PROXY_IMPORT_INDEX_HEADER),
req.headers().get(PROXY_IMPORT_DOCS_HEADER),
) {
(None, None, None) => return Ok(None),
(Some(remote_name), Some(index_name), Some(documents)) => {
let remote_name = urlencoding::decode(remote_name.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_REMOTE_HEADER,
msg: format!("while parsing import remote name as UTF-8: {err}"),
}
})?)
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_REMOTE_HEADER,
msg: format!("while URL-decoding import remote name: {err}"),
})?;
let index_name = urlencoding::decode(index_name.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_INDEX_HEADER,
msg: format!("while parsing import index name as UTF-8: {err}"),
}
})?)
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_INDEX_HEADER,
msg: format!("while URL-decoding import index name: {err}"),
})?;
let documents = urlencoding::decode(documents.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_DOCS_HEADER,
msg: format!("while parsing documents as UTF-8: {err}"),
}
})?)
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_DOCS_HEADER,
msg: format!("while URL-decoding documents: {err}"),
})?;
(remote_name, index_name, documents)
}
// catch-all pattern that has to contain an inconsistency since we already matched (None, None, None) and (Some, Some, Some)
(remote_name, index_name, documents) => {
return Err(MeilisearchHttpError::InconsistentImportHeaders {
is_remote_missing: remote_name.is_none(),
is_index_missing: index_name.is_none(),
is_docs_missing: documents.is_none(),
})
}
};
let document_count: u64 =
documents.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_DOCS_HEADER,
msg: format!("while parsing the documents as an integer: {err}"),
})?;
Ok(Some(ImportData {
remote_name: remote_name.to_string(),
index_name: index_name.to_string(),
document_count,
}))
}
pub fn import_metadata_from_req(
req: &HttpRequest,
) -> Result<Option<ImportMetadata>, MeilisearchHttpError> {
let (index_count, task_key, total_index_documents) = match (
req.headers().get(PROXY_IMPORT_INDEX_COUNT_HEADER),
req.headers().get(PROXY_IMPORT_TASK_KEY_HEADER),
req.headers().get(PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER),
) {
(None, None, None) => return Ok(None),
(Some(index_count), Some(task_key), Some(total_index_documents)) => {
let index_count = urlencoding::decode(index_count.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_REMOTE_HEADER,
msg: format!("while parsing import index count as UTF-8: {err}"),
}
})?)
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_INDEX_COUNT_HEADER,
msg: format!("while URL-decoding import index count: {err}"),
})?;
let task_key = urlencoding::decode(task_key.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_TASK_KEY_HEADER,
msg: format!("while parsing import task key as UTF-8: {err}"),
}
})?)
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_TASK_KEY_HEADER,
msg: format!("while URL-decoding import task key: {err}"),
})?;
let total_index_documents =
urlencoding::decode(total_index_documents.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
msg: format!("while parsing total index documents as UTF-8: {err}"),
}
})?)
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
msg: format!("while URL-decoding total index documents: {err}"),
})?;
(index_count, task_key, total_index_documents)
}
// catch-all pattern that has to contain an inconsistency since we already matched (None, None, None) and (Some, Some, Some)
(index_count, task_key, total_index_documents) => {
return Err(MeilisearchHttpError::InconsistentImportMetadataHeaders {
is_index_count_missing: index_count.is_none(),
is_task_key_missing: task_key.is_none(),
is_total_index_documents_missing: total_index_documents.is_none(),
})
}
};
let index_count: u64 =
index_count.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_INDEX_COUNT_HEADER,
msg: format!("while parsing the index count as an integer: {err}"),
})?;
let task_key: DocumentId =
task_key.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_TASK_KEY_HEADER,
msg: format!("while parsing import task key as an integer: {err}"),
})?;
let total_index_documents: u64 =
total_index_documents.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_IMPORT_TOTAL_INDEX_DOCS_HEADER,
msg: format!("while parsing the total index documents as an integer: {err}"),
})?;
Ok(Some(ImportMetadata { index_count, task_key, total_index_documents }))
}

View File

@@ -343,6 +343,7 @@ impl From<FacetSearchQuery> for SearchQuery {
hybrid,
ranking_score_threshold,
locales,
personalize: None,
}
}
}

View File

@@ -28,8 +28,9 @@ use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::is_dry_run;
use crate::Opt;
pub mod compact;
pub mod documents;
mod enterprise_edition;
pub mod enterprise_edition;
pub mod facet_search;
pub mod search;
mod search_analytics;
@@ -40,7 +41,9 @@ mod settings_analytics;
pub mod similar;
mod similar_analytics;
pub use enterprise_edition::proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
pub use meilisearch_types::tasks::enterprise_edition::network::headers::{
PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER,
};
#[derive(OpenApi)]
#[openapi(
@@ -49,8 +52,9 @@ pub use enterprise_edition::proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TAS
(path = "/", api = facet_search::FacetSearchApi),
(path = "/", api = similar::SimilarApi),
(path = "/", api = settings::SettingsApi),
(path = "/", api = compact::CompactApi),
),
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats),
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats, compact::compact),
tags(
(
name = "Indexes",
@@ -80,7 +84,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/facet-search").configure(facet_search::configure))
.service(web::scope("/similar").configure(similar::configure))
.service(web::scope("/settings").configure(settings::configure)),
.service(web::scope("/settings").configure(settings::configure))
.service(web::scope("/compact").configure(compact::configure)),
);
}

View File

@@ -13,6 +13,7 @@ use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::debug;
use utoipa::{IntoParams, OpenApi};
use uuid::Uuid;
use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
@@ -21,11 +22,12 @@ use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
use crate::routes::parse_include_metadata_header;
use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
add_search_rules, perform_search, HybridQuery, MatchingStrategy, Personalize,
RankingScoreThreshold, RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult,
SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
};
use crate::search_queue::SearchQueue;
@@ -132,6 +134,8 @@ pub struct SearchQueryGet {
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
#[param(value_type = Vec<Locale>, explode = false)]
pub locales: Option<CS<Locale>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPersonalizeUserContext>)]
pub personalize_user_context: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
@@ -203,6 +207,9 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
));
}
let personalize =
other.personalize_user_context.map(|user_context| Personalize { user_context });
Ok(Self {
q: other.q,
// `media` not supported for `GET`
@@ -232,6 +239,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
locales: other.locales.map(|o| o.into_iter().collect()),
personalize,
})
}
}
@@ -320,12 +328,14 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
pub async fn search_with_url_query(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
index_uid: web::Path<String>,
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Search get");
let request_uid = Uuid::now_v7();
debug!(request_uid = ?request_uid, parameters = ?params, "Search get");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().try_into()?;
@@ -339,31 +349,56 @@ pub async fn search_with_url_query(
let index = index_scheduler.index(&index_uid)?;
// Extract personalization and query string before moving query
let personalize = query.personalize.take();
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
// Save the query string for personalization if requested
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
let permit = search_queue.try_get_search_permit().await?;
let include_metadata = parse_include_metadata_header(&req);
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid.to_string(),
SearchParams {
index_uid: index_uid.to_string(),
query,
search_kind,
retrieve_vectors: retrieve_vector,
features: index_scheduler.features(),
request_uid,
include_metadata,
},
&index,
query,
search_kind,
retrieve_vector,
index_scheduler.features(),
)
})
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok(ref search_result) = search_result {
if let Ok((search_result, _)) = search_result.as_ref() {
aggregate.succeed(search_result);
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
let (mut search_result, time_budget) = search_result?;
debug!(returns = ?search_result, "Search get");
// Apply personalization if requested
if let Some(personalize) = personalize.as_ref() {
search_result = personalization_service
.rerank_search_results(
search_result,
personalize,
personalize_query.as_deref(),
time_budget,
)
.await?;
}
debug!(request_uid = ?request_uid, returns = ?search_result, "Search get");
Ok(HttpResponse::Ok().json(search_result))
}
@@ -426,15 +461,17 @@ pub async fn search_with_url_query(
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
index_uid: web::Path<String>,
params: AwebJson<SearchQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let request_uid = Uuid::now_v7();
let mut query = params.into_inner();
debug!(parameters = ?query, "Search post");
debug!(request_uid = ?request_uid, parameters = ?query, "Search post");
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
@@ -445,25 +482,37 @@ pub async fn search_with_post(
let index = index_scheduler.index(&index_uid)?;
// Extract personalization and query string before moving query
let personalize = query.personalize.take();
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
let include_metadata = parse_include_metadata_header(&req);
// Save the query string for personalization if requested
let personalize_query = personalize.is_some().then(|| query.q.clone()).flatten();
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid.to_string(),
SearchParams {
index_uid: index_uid.to_string(),
query,
search_kind,
retrieve_vectors,
features: index_scheduler.features(),
request_uid,
include_metadata,
},
&index,
query,
search_kind,
retrieve_vectors,
index_scheduler.features(),
)
})
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok(ref search_result) = search_result {
if let Ok((ref search_result, _)) = search_result {
aggregate.succeed(search_result);
if search_result.degraded {
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
@@ -471,9 +520,21 @@ pub async fn search_with_post(
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
let (mut search_result, time_budget) = search_result?;
debug!(returns = ?search_result, "Search post");
// Apply personalization if requested
if let Some(personalize) = personalize.as_ref() {
search_result = personalization_service
.rerank_search_results(
search_result,
personalize,
personalize_query.as_deref(),
time_budget,
)
.await?;
}
debug!(request_uid = ?request_uid, returns = ?search_result, "Search post");
Ok(HttpResponse::Ok().json(search_result))
}

View File

@@ -7,6 +7,7 @@ use serde_json::{json, Value};
use crate::aggregate_methods;
use crate::analytics::{Aggregate, AggregateMethod};
use crate::metrics::MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS;
use crate::search::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
@@ -95,6 +96,9 @@ pub struct SearchAggregator<Method: AggregateMethod> {
show_ranking_score_details: bool,
ranking_score_threshold: bool,
// personalization
total_personalized: usize,
marker: std::marker::PhantomData<Method>,
}
@@ -129,6 +133,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
hybrid,
ranking_score_threshold,
locales,
personalize,
} = query;
let mut ret = Self::default();
@@ -204,6 +209,12 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
ret.locales = locales.iter().copied().collect();
}
// personalization
if personalize.is_some() {
ret.total_personalized = 1;
MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS.inc();
}
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
@@ -234,6 +245,8 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
facet_stats: _,
degraded,
used_negative_operator,
request_uid: _,
metadata: _,
} = result;
self.total_succeeded = self.total_succeeded.saturating_add(1);
@@ -294,6 +307,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
total_used_negative_operator,
ranking_score_threshold,
mut locales,
total_personalized,
marker: _,
} = *new;
@@ -379,6 +393,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
// locales
self.locales.append(&mut locales);
// personalization
self.total_personalized = self.total_personalized.saturating_add(total_personalized);
self
}
@@ -424,6 +441,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
total_used_negative_operator,
ranking_score_threshold,
locales,
total_personalized,
marker: _,
} = *self;
@@ -497,6 +515,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
},
"personalization": {
"total_personalized": total_personalized,
},
})
}
}

View File

@@ -180,12 +180,6 @@ pub async fn get_metrics(
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
// We cannot specify the version with ContentType(TEXT_PLAIN_UTF_8) so we have to write everything by hand :(
// see the following for what should be returned: https://prometheus.io/docs/instrumenting/content_negotiation/#content-type-response
let content_type = ("content-type", "text/plain; version=0.0.4; charset=utf-8");
Ok(HttpResponse::Ok()
// .insert_header(header::ContentType(mime::TEXT_PLAIN_UTF_8))
.insert_header(content_type)
.body(response))
let content_type = ("content-type", prometheus::TEXT_FORMAT);
Ok(HttpResponse::Ok().insert_header(content_type).body(response))
}

View File

@@ -41,10 +41,13 @@ use crate::routes::indexes::IndexView;
use crate::routes::multi_search::SearchResults;
use crate::routes::network::{Network, Remote};
use crate::routes::swap_indexes::SwapIndexesPayload;
use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetadata};
use crate::routes::webhooks::{
WebhookResults, WebhookSettings, WebhookWithMetadataRedactedAuthorization,
};
use crate::search::{
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult,
INCLUDE_METADATA_HEADER,
};
use crate::search_queue::SearchQueue;
use crate::Opt;
@@ -102,7 +105,7 @@ mod webhooks;
url = "/",
description = "Local server",
)),
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadata, meilisearch_types::milli::vector::VectorStoreBackend))
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadataRedactedAuthorization, meilisearch_types::milli::vector::VectorStoreBackend))
)]
pub struct MeilisearchApi;
@@ -184,6 +187,18 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
.is_some_and(|s| s.to_lowercase() == "true"))
}
/// Parse the `Meili-Include-Metadata` header from an HTTP request.
///
/// Returns `true` if the header is present and set to "true" or "1" (case-insensitive).
/// Returns `false` if the header is not present or has any other value.
pub fn parse_include_metadata_header(req: &HttpRequest) -> bool {
req.headers()
.get(INCLUDE_METADATA_HEADER)
.and_then(|h| h.to_str().ok())
.map(|v| matches!(v.to_lowercase().as_str(), "true" | "1"))
.unwrap_or(false)
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct SummarizedTaskView {
@@ -203,6 +218,8 @@ pub struct SummarizedTaskView {
deserialize_with = "time::serde::rfc3339::deserialize"
)]
enqueued_at: OffsetDateTime,
#[serde(default, skip_serializing_if = "Option::is_none")]
custom_metadata: Option<String>,
}
impl From<Task> for SummarizedTaskView {
@@ -213,6 +230,7 @@ impl From<Task> for SummarizedTaskView {
status: task.status,
kind: task.kind.as_kind(),
enqueued_at: task.enqueued_at,
custom_metadata: task.custom_metadata,
}
}
}

View File

@@ -9,6 +9,7 @@ use meilisearch_types::keys::actions;
use serde::Serialize;
use tracing::debug;
use utoipa::{OpenApi, ToSchema};
use uuid::Uuid;
use super::multi_search_analytics::MultiSearchAggregator;
use crate::analytics::Analytics;
@@ -17,10 +18,11 @@ use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::search_kind;
use crate::routes::parse_include_metadata_header;
use crate::search::{
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
FederatedSearchResult, RetrieveVectors, SearchParams, SearchQueryWithIndex,
SearchResultWithIndex, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
};
use crate::search_queue::SearchQueue;
@@ -144,6 +146,7 @@ pub struct SearchResults {
pub async fn multi_search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
params: AwebJson<FederatedSearch, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<Analytics>,
@@ -151,6 +154,7 @@ pub async fn multi_search_with_post(
// Since we don't want to process half of the search requests and then get a permit refused
// we're going to get one permit for the whole duration of the multi-search request.
let permit = search_queue.try_get_search_permit().await?;
let request_uid = Uuid::now_v7();
let federated_search = params.into_inner();
@@ -186,16 +190,31 @@ pub async fn multi_search_with_post(
err
})?;
let include_metadata = parse_include_metadata_header(&req);
let response = match federation {
Some(federation) => {
debug!(
request_uid = ?request_uid,
federation = ?federation,
parameters = ?queries,
"Federated-search"
);
// check remote header
let is_proxy = req
.headers()
.get(PROXY_SEARCH_HEADER)
.is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes());
let search_result =
perform_federated_search(&index_scheduler, queries, federation, features, is_proxy)
.await;
let search_result = perform_federated_search(
&index_scheduler,
queries,
federation,
features,
is_proxy,
request_uid,
include_metadata,
)
.await;
permit.drop().await;
if search_result.is_ok() {
@@ -203,6 +222,13 @@ pub async fn multi_search_with_post(
}
analytics.publish(multi_aggregate, &req);
debug!(
request_uid = ?request_uid,
returns = ?search_result,
"Federated-search"
);
HttpResponse::Ok().json(search_result?)
}
None => {
@@ -211,12 +237,17 @@ pub async fn multi_search_with_post(
// changes.
let search_results: Result<_, (ResponseError, usize)> = async {
let mut search_results = Vec::with_capacity(queries.len());
for (query_index, (index_uid, query, federation_options)) in queries
for (query_index, (index_uid, mut query, federation_options)) in queries
.into_iter()
.map(SearchQueryWithIndex::into_index_query_federation)
.enumerate()
{
debug!(on_index = query_index, parameters = ?query, "Multi-search");
debug!(
request_uid = ?request_uid,
on_index = query_index,
parameters = ?query,
"Multi-search"
);
if federation_options.is_some() {
return Err((
@@ -239,6 +270,13 @@ pub async fn multi_search_with_post(
})
.with_index(query_index)?;
// Extract personalization and query string before moving query
let personalize = query.personalize.take();
// Save the query string for personalization if requested
let personalize_query =
personalize.is_some().then(|| query.q.clone()).flatten();
let index_uid_str = index_uid.to_string();
let search_kind = search_kind(
@@ -250,22 +288,40 @@ pub async fn multi_search_with_post(
.with_index(query_index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
let search_result = tokio::task::spawn_blocking(move || {
let (mut search_result, time_budget) = tokio::task::spawn_blocking(move || {
perform_search(
index_uid_str.clone(),
SearchParams {
index_uid: index_uid_str.clone(),
query,
search_kind,
retrieve_vectors: retrieve_vector,
features,
request_uid,
include_metadata,
},
&index,
query,
search_kind,
retrieve_vector,
features,
)
})
.await
.with_index(query_index)?
.with_index(query_index)?;
// Apply personalization if requested
if let Some(personalize) = personalize.as_ref() {
search_result = personalization_service
.rerank_search_results(
search_result,
personalize,
personalize_query.as_deref(),
time_budget,
)
.await
.with_index(query_index)?;
}
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
result: search_result,
});
}
Ok(search_results)
@@ -286,7 +342,11 @@ pub async fn multi_search_with_post(
err
})?;
debug!(returns = ?search_results, "Multi-search");
debug!(
request_uid = ?request_uid,
returns = ?search_results,
"Multi-search"
);
HttpResponse::Ok().json(SearchResults { results: search_results })
}

View File

@@ -67,6 +67,7 @@ impl MultiSearchAggregator {
hybrid: _,
ranking_score_threshold: _,
locales: _,
personalize: _,
} in &federated_search.queries
{
if let Some(federation_options) = federation_options {

View File

@@ -9,20 +9,27 @@ use itertools::{EitherOrBoth, Itertools};
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::enterprise_edition::network::{Network as DbNetwork, Remote as DbRemote};
use meilisearch_types::error::deserr_codes::{
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkSharding,
InvalidNetworkLeader, InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf,
InvalidNetworkUrl, InvalidNetworkWriteApiKey,
};
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::tasks::enterprise_edition::network::{
NetworkTopologyChange, Origin, DbTaskNetwork,
};
use meilisearch_types::tasks::KindWithContent;
use serde::Serialize;
use tracing::debug;
use utoipa::{OpenApi, ToSchema};
use crate::analytics::{Aggregate, Analytics};
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::enterprise_edition::proxy::{proxy, Body};
use crate::routes::SummarizedTaskView;
#[derive(OpenApi)]
#[openapi(
@@ -83,7 +90,7 @@ async fn get_network(
Ok(HttpResponse::Ok().json(network))
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[derive(Clone, Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError<InvalidNetworkRemotes>, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
@@ -106,12 +113,19 @@ pub struct Remote {
pub write_api_key: Setting<String>,
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[derive(Clone, Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct Network {
#[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!("http://localhost:7700"))]
#[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!({
"ms-00": {
"url": "http://localhost:7700"
},
"ms-01": {
"url": "http://localhost:7701"
}
}))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkRemotes>)]
#[serde(default)]
pub remotes: Setting<BTreeMap<String, Option<Remote>>>,
@@ -119,10 +133,21 @@ pub struct Network {
#[serde(default, rename = "self")]
#[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)]
pub local: Setting<String>,
#[schema(value_type = Option<bool>, example = json!(true))]
#[schema(value_type = Option<String>, example = json!("ms-00"))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidNetworkSharding>)]
pub sharding: Setting<bool>,
#[deserr(default, error = DeserrJsonError<InvalidNetworkLeader>)]
pub leader: Setting<String>,
#[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!({
"ms-00": {
"url": "http://localhost:7700"
},
"ms-01": {
"url": "http://localhost:7701"
}
}))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkRemotes>)]
#[serde(default)]
pub previous_remotes: Setting<BTreeMap<String, Option<Remote>>>,
}
impl Remote {
@@ -207,29 +232,203 @@ async fn patch_network(
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_network("Using the /network route")?;
match crate::routes::indexes::enterprise_edition::proxy::origin_from_req(&req)? {
Some(origin) => {
patch_network_with_origin(index_scheduler, new_network, req, origin, analytics).await
}
None => patch_network_without_origin(index_scheduler, new_network, req, analytics).await,
}
}
async fn patch_network_without_origin(
index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_UPDATE }>, Data<IndexScheduler>>,
new_network: AwebJson<Network, DeserrJsonError>,
req: HttpRequest,
analytics: Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let new_network = new_network.0;
let old_network = index_scheduler.network();
debug!(parameters = ?new_network, "Patch network");
if !matches!(new_network.previous_remotes, Setting::NotSet) {
return Err(MeilisearchHttpError::UnexpectedNetworkPreviousRemotes.into());
}
let merged_network = merge_networks(old_network.clone(), new_network)?;
index_scheduler.put_network(merged_network.clone())?;
analytics.publish(
PatchNetworkAnalytics {
network_size: merged_network.remotes.len(),
network_has_self: merged_network.local.is_some(),
},
&req,
);
/// TODO: spawn task only if necessary
let network_topology_change =
NetworkTopologyChange::new(old_network.clone(), merged_network.clone());
let task = KindWithContent::NetworkTopologyChange(network_topology_change);
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, None, false)).await??
};
let mut proxied_network = Network {
remotes: Setting::Set(to_settings_remotes(&merged_network.remotes)),
local: Setting::NotSet,
leader: Setting::some_or_not_set(merged_network.leader.clone()),
previous_remotes: Setting::Set(to_settings_remotes(&old_network.remotes)),
};
let mut deleted_network = old_network;
let deleted_remotes = &mut deleted_network.remotes;
deleted_remotes.retain(|node, _| !merged_network.remotes.contains_key(node));
// proxy network change to the remaining remotes.
let updated_task = proxy(
&index_scheduler,
None,
&req,
DbTaskNetwork::Remotes {
remote_tasks: Default::default(),
network_version: merged_network.version,
},
merged_network,
Body::generated(proxied_network.clone(), |name, _remote, network| {
network.local = Setting::Set(name.to_string());
}),
&task,
)
.await?;
// unwrap: network was set by `proxy`
let task_network = updated_task.network.unwrap();
proxied_network.previous_remotes = Setting::NotSet;
// proxy network change to the deleted remotes
proxy(
&index_scheduler,
None,
&req,
task_network,
deleted_network,
Body::generated(proxied_network.clone(), |_name, _remote, network| {
network.local = Setting::Reset;
}),
&task,
)
.await?;
let task: SummarizedTaskView = task.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
async fn patch_network_with_origin(
index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_UPDATE }>, Data<IndexScheduler>>,
merged_network: AwebJson<Network, DeserrJsonError>,
req: HttpRequest,
origin: Origin,
analytics: Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let merged_network = merged_network.into_inner();
debug!(parameters = ?merged_network, ?origin, "Patch network");
let mut remotes = BTreeMap::new();
let mut old_network = index_scheduler.network();
for (name, remote) in merged_network.remotes.set().into_iter().flat_map(|x| x.into_iter()) {
let Some(remote) = remote else { continue };
let remote = remote.try_into_db_node(&name)?;
remotes.insert(name, remote);
}
let mut previous_remotes = BTreeMap::new();
for (name, remote) in
merged_network.previous_remotes.set().into_iter().flat_map(|x| x.into_iter())
{
let Some(remote) = remote else {
continue;
};
let remote = remote.try_into_db_node(&name)?;
previous_remotes.insert(name, remote);
}
old_network.remotes = previous_remotes;
let new_network = DbNetwork {
local: merged_network.local.set(),
remotes,
leader: merged_network.leader.set(),
version: origin.network_version,
};
index_scheduler.put_network(new_network.clone())?;
analytics.publish(
PatchNetworkAnalytics {
network_size: new_network.remotes.len(),
network_has_self: new_network.local.is_some(),
},
&req,
);
/// TODO: spawn task only if necessary
let network_topology_change = NetworkTopologyChange::new(old_network, new_network);
let task = KindWithContent::NetworkTopologyChange(network_topology_change);
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, None, false)).await??
};
index_scheduler.set_task_network(task.uid, DbTaskNetwork::Origin { origin })?;
let task: SummarizedTaskView = task.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
fn to_settings_remotes(
db_remotes: &BTreeMap<String, DbRemote>,
) -> BTreeMap<String, Option<Remote>> {
db_remotes
.iter()
.map(|(name, remote)| {
(
name.clone(),
Some(Remote {
url: Setting::Set(remote.url.clone()),
search_api_key: Setting::some_or_not_set(remote.search_api_key.clone()),
write_api_key: Setting::some_or_not_set(remote.write_api_key.clone()),
}),
)
})
.collect()
}
fn merge_networks(
old_network: DbNetwork,
new_network: Network,
) -> Result<DbNetwork, ResponseError> {
let merged_self = match new_network.local {
Setting::Set(new_self) => Some(new_self),
Setting::Reset => None,
Setting::NotSet => old_network.local,
};
let merged_sharding = match new_network.sharding {
Setting::Set(new_sharding) => new_sharding,
Setting::Reset => false,
Setting::NotSet => old_network.sharding,
let merged_leader = match new_network.leader {
Setting::Set(new_leader) => Some(new_leader),
Setting::Reset => None,
Setting::NotSet => old_network.leader,
};
if merged_sharding && merged_self.is_none() {
return Err(ResponseError::from_msg(
"`.sharding`: enabling the sharding requires `.self` to be set\n - Hint: Disable `sharding` or set `self` to a value.".into(),
meilisearch_types::error::Code::InvalidNetworkSharding,
));
match (merged_leader.as_deref(), merged_self.as_deref()) {
// 1. Always allowed if there is no leader
(None, _) => (),
// 2. Allowed if the leader is self
(Some(leader), Some(this)) if leader == this => (),
// 3. Any other change is disallowed
(Some(leader), _) => {
return Err(MeilisearchHttpError::NotLeader { leader: leader.to_string() }.into())
}
}
let new_version = uuid::Uuid::now_v7();
let merged_remotes = match new_network.remotes {
Setting::Set(new_remotes) => {
let mut merged_remotes = BTreeMap::new();
@@ -301,18 +500,11 @@ async fn patch_network(
Setting::Reset => BTreeMap::new(),
Setting::NotSet => old_network.remotes,
};
analytics.publish(
PatchNetworkAnalytics {
network_size: merged_remotes.len(),
network_has_self: merged_self.is_some(),
},
&req,
);
let merged_network =
DbNetwork { local: merged_self, remotes: merged_remotes, sharding: merged_sharding };
index_scheduler.put_network(merged_network.clone())?;
debug!(returns = ?merged_network, "Patch network");
Ok(HttpResponse::Ok().json(merged_network))
let merged_network = DbNetwork {
local: merged_self,
remotes: merged_remotes,
leader: merged_leader,
version: new_version,
};
Ok(merged_network)
}

View File

@@ -226,14 +226,14 @@ mod tests {
{
let params = "types=createIndex";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r#"
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `indexCompaction`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
}
"#);
"###);
}
}
#[test]

View File

@@ -1,3 +1,4 @@
use core::convert::Infallible;
use std::collections::BTreeMap;
use std::str::FromStr;
@@ -7,7 +8,6 @@ use actix_http::header::{
};
use actix_web::web::{self, Data, Path};
use actix_web::{HttpRequest, HttpResponse};
use core::convert::Infallible;
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler;
@@ -24,12 +24,12 @@ use tracing::debug;
use url::Url;
use utoipa::{OpenApi, ToSchema};
use uuid::Uuid;
use WebhooksError::*;
use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use WebhooksError::*;
#[derive(OpenApi)]
#[openapi(
@@ -90,7 +90,7 @@ fn deny_immutable_fields_webhook(
#[derive(Debug, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub(super) struct WebhookWithMetadata {
pub(super) struct WebhookWithMetadataRedactedAuthorization {
uuid: Uuid,
is_editable: bool,
#[schema(value_type = WebhookSettings)]
@@ -98,8 +98,9 @@ pub(super) struct WebhookWithMetadata {
webhook: Webhook,
}
impl WebhookWithMetadata {
pub fn from(uuid: Uuid, webhook: Webhook) -> Self {
impl WebhookWithMetadataRedactedAuthorization {
pub fn from(uuid: Uuid, mut webhook: Webhook) -> Self {
webhook.redact_authorization_header();
Self { uuid, is_editable: uuid != Uuid::nil(), webhook }
}
}
@@ -107,7 +108,7 @@ impl WebhookWithMetadata {
#[derive(Debug, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub(super) struct WebhookResults {
results: Vec<WebhookWithMetadata>,
results: Vec<WebhookWithMetadataRedactedAuthorization>,
}
#[utoipa::path(
@@ -150,7 +151,7 @@ async fn get_webhooks(
let results = webhooks
.webhooks
.into_iter()
.map(|(uuid, webhook)| WebhookWithMetadata::from(uuid, webhook))
.map(|(uuid, webhook)| WebhookWithMetadataRedactedAuthorization::from(uuid, webhook))
.collect::<Vec<_>>();
let results = WebhookResults { results };
@@ -301,7 +302,7 @@ fn check_changed(uuid: Uuid, webhook: &Webhook) -> Result<(), WebhooksError> {
tag = "Webhooks",
security(("Bearer" = ["webhooks.get", "webhooks.*", "*.get", "*"])),
responses(
(status = 200, description = "Webhook found", body = WebhookWithMetadata, content_type = "application/json", example = json!({
(status = 200, description = "Webhook found", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
"uuid": "550e8400-e29b-41d4-a716-446655440000",
"url": "https://your.site/on-tasks-completed",
"headers": {
@@ -324,7 +325,7 @@ async fn get_webhook(
let mut webhooks = index_scheduler.webhooks_view();
let webhook = webhooks.webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
let webhook = WebhookWithMetadata::from(uuid, webhook);
let webhook = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
debug!(returns = ?webhook, "Get webhook");
Ok(HttpResponse::Ok().json(webhook))
@@ -337,7 +338,7 @@ async fn get_webhook(
request_body = WebhookSettings,
security(("Bearer" = ["webhooks.create", "webhooks.*", "*"])),
responses(
(status = 201, description = "Webhook created successfully", body = WebhookWithMetadata, content_type = "application/json", example = json!({
(status = 201, description = "Webhook created successfully", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
"uuid": "550e8400-e29b-41d4-a716-446655440000",
"url": "https://your.site/on-tasks-completed",
"headers": {
@@ -383,7 +384,7 @@ async fn post_webhook(
analytics.publish(PostWebhooksAnalytics, &req);
let response = WebhookWithMetadata::from(uuid, webhook);
let response = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
debug!(returns = ?response, "Post webhook");
Ok(HttpResponse::Created().json(response))
}
@@ -395,7 +396,7 @@ async fn post_webhook(
request_body = WebhookSettings,
security(("Bearer" = ["webhooks.update", "webhooks.*", "*"])),
responses(
(status = 200, description = "Webhook updated successfully", body = WebhookWithMetadata, content_type = "application/json", example = json!({
(status = 200, description = "Webhook updated successfully", body = WebhookWithMetadataRedactedAuthorization, content_type = "application/json", example = json!({
"uuid": "550e8400-e29b-41d4-a716-446655440000",
"url": "https://your.site/on-tasks-completed",
"headers": {
@@ -435,7 +436,7 @@ async fn patch_webhook(
analytics.publish(PatchWebhooksAnalytics, &req);
let response = WebhookWithMetadata::from(uuid, webhook);
let response = WebhookWithMetadataRedactedAuthorization::from(uuid, webhook);
debug!(returns = ?response, "Patch webhook");
Ok(HttpResponse::Ok().json(response))
}

View File

@@ -17,11 +17,13 @@ use meilisearch_types::milli::vector::Embedding;
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
use roaring::RoaringBitmap;
use tokio::task::JoinHandle;
use uuid::Uuid;
use super::super::ranking_rules::{self, RankingRules};
use super::super::{
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, HitMaker,
HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchMetadata, SearchQuery,
SearchQueryWithIndex,
};
use super::proxy::{proxy_search, ProxySearchError, ProxySearchParams};
use super::types::{
@@ -39,32 +41,58 @@ pub async fn perform_federated_search(
federation: Federation,
features: RoFeatures,
is_proxy: bool,
request_uid: Uuid,
include_metadata: bool,
) -> Result<FederatedSearchResult, ResponseError> {
if is_proxy {
features.check_network("Performing a remote federated search")?;
}
let before_search = std::time::Instant::now();
let deadline = before_search + std::time::Duration::from_secs(9);
let timeout = std::env::var("MEILI_EXPERIMENTAL_REMOTE_SEARCH_TIMEOUT_SECONDS")
.ok()
.map(|p| p.parse().unwrap())
.unwrap_or(25);
let deadline = before_search + std::time::Duration::from_secs(timeout);
let required_hit_count = federation.limit + federation.offset;
let retrieve_vectors = queries.iter().any(|q| q.retrieve_vectors);
let network = index_scheduler.network();
// Preconstruct metadata keeping the original queries order for later metadata building
let precomputed_query_metadata: Option<Vec<_>> = include_metadata.then(|| {
queries
.iter()
.map(|q| {
(
q.index_uid.to_string(),
q.federation_options.as_ref().and_then(|o| o.remote.clone()),
)
})
.collect()
});
// this implementation partition the queries by index to guarantee an important property:
// - all the queries to a particular index use the same read transaction.
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
// 1. partition queries by host and index
let mut partitioned_queries = PartitionedQueries::new();
for (query_index, federated_query) in queries.into_iter().enumerate() {
partitioned_queries.partition(federated_query, query_index, &network, features)?
}
// 2. perform queries, merge and make hits index by index
// 2.1. start remote queries
let remote_search =
RemoteSearch::start(partitioned_queries.remote_queries_by_host, &federation, deadline);
let remote_search = RemoteSearch::start(
partitioned_queries.remote_queries_by_host,
&federation,
deadline,
include_metadata,
);
// 2.2. concurrently execute local queries
let params = SearchByIndexParams {
@@ -106,11 +134,25 @@ pub async fn perform_federated_search(
let after_waiting_remote_results = std::time::Instant::now();
// 3. merge hits and metadata across indexes and hosts
// 3.1. merge metadata
// 3.1. Build metadata in the same order as the original queries
let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| {
// If a remote is present, set the local remote name
let local_remote_name = network.local.clone().filter(|_| partitioned_queries.has_remote);
build_query_metadata(
precomputed_query_metadata,
local_remote_name,
&remote_results,
&results_by_index,
)
});
// 3.2. merge federation metadata
let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) =
merge_metadata(&mut results_by_index, &remote_results);
// 3.2. merge hits
// 3.3. merge hits
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
.skip(federation.offset)
.take(federation.limit)
@@ -125,7 +167,7 @@ pub async fn perform_federated_search(
.map(|hit| hit.hit())
.collect();
// 3.3. merge query vectors
// 3.4. merge query vectors
let query_vectors = if retrieve_vectors {
for remote_results in remote_results.iter_mut() {
if let Some(remote_vectors) = remote_results.query_vectors.take() {
@@ -144,7 +186,7 @@ pub async fn perform_federated_search(
None
};
// 3.4. merge facets
// 3.5. merge facets
let (facet_distribution, facet_stats, facets_by_index) =
facet_order.merge(federation.merge_facets, remote_results, facets);
@@ -170,6 +212,8 @@ pub async fn perform_federated_search(
facet_stats,
facets_by_index,
remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
request_uid: Some(request_uid),
metadata: query_metadata,
})
}
@@ -219,7 +263,7 @@ struct SearchResultByQueryIterItem<'a> {
fn merge_index_local_results(
results_by_query: Vec<SearchResultByQuery<'_>>,
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
) -> impl Iterator<Item = SearchResultByQueryIterItem<'_>> + '_ {
itertools::kmerge_by(
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
@@ -393,6 +437,7 @@ struct SearchHitByIndex {
struct SearchResultByIndex {
index: String,
primary_key: Option<String>,
hits: Vec<SearchHitByIndex>,
estimated_total_hits: usize,
degraded: bool,
@@ -400,6 +445,61 @@ struct SearchResultByIndex {
facets: Option<ComputedFacets>,
}
/// Builds query metadata for federated search results.
///
/// This function creates metadata for each query in the same order as the original queries,
/// combining information from both local and remote search results. It handles the mapping
/// of primary keys to their respective indexes and remotes to prevent collisions when
/// multiple remotes have the same index_uid but different primary keys.
fn build_query_metadata(
precomputed_query_metadata: Vec<(String, Option<String>)>,
local_remote_name: Option<String>,
remote_results: &[FederatedSearchResult],
results_by_index: &[SearchResultByIndex],
) -> Vec<SearchMetadata> {
// Create a map of (remote, index_uid) -> primary_key for quick lookup
// This prevents collisions when multiple remotes have the same index_uid but different primary keys
let mut primary_key_per_index = std::collections::HashMap::new();
// Build metadata for remote results
for remote_result in remote_results {
if let Some(remote_metadata) = &remote_result.metadata {
for remote_meta in remote_metadata {
if let SearchMetadata {
remote: Some(remote_name),
index_uid,
primary_key: Some(primary_key),
..
} = remote_meta
{
let key = (Some(remote_name), index_uid);
primary_key_per_index.insert(key, primary_key);
}
}
}
}
// Build metadata for local results
for local_meta in results_by_index {
if let SearchResultByIndex { index, primary_key: Some(primary_key), .. } = local_meta {
let key = (None, index);
primary_key_per_index.insert(key, primary_key);
}
}
// Build metadata in the same order as the original queries
let mut query_metadata = Vec::new();
for (index_uid, remote) in precomputed_query_metadata {
let primary_key =
primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string());
let query_uid = Uuid::now_v7();
// if the remote is not set, use the local remote name
let remote = remote.or_else(|| local_remote_name.clone());
query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote });
}
query_metadata
}
fn merge_metadata(
results_by_index: &mut Vec<SearchResultByIndex>,
remote_results: &Vec<FederatedSearchResult>,
@@ -411,6 +511,7 @@ fn merge_metadata(
let mut max_remote_duration = Duration::ZERO;
for SearchResultByIndex {
index,
primary_key: _,
hits: _,
estimated_total_hits: estimated_total_hits_by_index,
facets: facets_by_index,
@@ -439,6 +540,8 @@ fn merge_metadata(
degraded: degraded_for_host,
used_negative_operator: host_used_negative_operator,
remote_errors: _,
metadata: _,
request_uid: _,
} in remote_results
{
let this_remote_duration = Duration::from_millis(*processing_time_ms as u64);
@@ -498,6 +601,10 @@ impl PartitionedQueries {
.into());
}
if federated_query.has_personalize() {
return Err(MeilisearchHttpError::PersonalizationInFederatedQuery(query_index).into());
}
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
let federation_options = federation_options.unwrap_or_default();
@@ -566,7 +673,12 @@ struct RemoteSearch {
}
impl RemoteSearch {
fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self {
fn start(
queries: RemoteQueriesByHost,
federation: &Federation,
deadline: Instant,
include_metadata: bool,
) -> Self {
let mut in_flight_remote_queries = BTreeMap::new();
let client = reqwest::ClientBuilder::new()
.connect_timeout(std::time::Duration::from_millis(200))
@@ -586,7 +698,10 @@ impl RemoteSearch {
// never merge distant facets
proxy_federation.merge_facets = None;
let params = params.clone();
async move { proxy_search(&node, queries, proxy_federation, &params).await }
async move {
proxy_search(&node, queries, proxy_federation, &params, include_metadata)
.await
}
}),
);
}
@@ -630,6 +745,13 @@ impl RemoteSearch {
continue 'remote_queries;
}
// Add remote name to metadata
if let Some(metadata) = res.metadata.as_mut() {
for meta in metadata {
meta.remote = Some(node_name.clone());
}
}
federation.insert(
FEDERATION_REMOTE.to_string(),
serde_json::Value::String(node_name.clone()),
@@ -725,6 +847,7 @@ impl SearchByIndex {
}
};
let rtxn = index.read_txn()?;
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
let criteria = index.criteria(&rtxn)?;
let dictionary = index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
@@ -751,6 +874,12 @@ impl SearchByIndex {
return Err(error);
}
let mut results_by_query = Vec::with_capacity(queries.len());
// all queries for an index share the same budget
let time_budget = match cutoff {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
None => TimeBudget::default(),
};
for QueryByIndex { query, weight, query_index } in queries {
// use an immediately invoked lambda to capture the result without returning from the function
@@ -820,17 +949,13 @@ impl SearchByIndex {
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
let time_budget = match cutoff {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
None => TimeBudget::default(),
};
let (mut search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search(
&index,
&rtxn,
&query,
&search_kind,
time_budget,
// clones of `TimeBudget` share the budget rather than restart it
time_budget.clone(),
params.features,
)?;
@@ -977,6 +1102,7 @@ impl SearchByIndex {
})?;
self.results_by_index.push(SearchResultByIndex {
index: index_uid,
primary_key,
hits: merged_result,
estimated_total_hits,
degraded,

View File

@@ -7,7 +7,7 @@ use serde::de::DeserializeOwned;
use serde_json::Value;
use super::types::{FederatedSearch, FederatedSearchResult, Federation};
use crate::search::SearchQueryWithIndex;
use crate::search::{SearchQueryWithIndex, INCLUDE_METADATA_HEADER};
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
@@ -98,6 +98,7 @@ pub async fn proxy_search(
queries: Vec<SearchQueryWithIndex>,
federation: Federation,
params: &ProxySearchParams,
include_metadata: bool,
) -> Result<FederatedSearchResult, ProxySearchError> {
let url = format!("{}/multi-search", node.url);
@@ -105,7 +106,12 @@ pub async fn proxy_search(
let search_api_key = node.search_api_key.as_deref();
let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
let timeout = std::env::var("MEILI_EXPERIMENTAL_REMOTE_SEARCH_TIMEOUT_SECONDS")
.ok()
.map(|p| p.parse().unwrap())
.unwrap_or(25);
let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout);
let deadline = if let Some(deadline) = params.deadline {
std::time::Instant::min(deadline, max_deadline)
@@ -114,7 +120,16 @@ pub async fn proxy_search(
};
for i in 0..params.try_count {
match try_proxy_search(&url, search_api_key, &federated, &params.client, deadline).await {
match try_proxy_search(
&url,
search_api_key,
&federated,
&params.client,
deadline,
include_metadata,
)
.await
{
Ok(response) => return Ok(response),
Err(retry) => {
let duration = retry.into_duration(i)?;
@@ -122,7 +137,7 @@ pub async fn proxy_search(
}
}
}
try_proxy_search(&url, search_api_key, &federated, &params.client, deadline)
try_proxy_search(&url, search_api_key, &federated, &params.client, deadline, include_metadata)
.await
.map_err(Retry::into_error)
}
@@ -133,6 +148,7 @@ async fn try_proxy_search(
federated: &FederatedSearch,
client: &Client,
deadline: std::time::Instant,
include_metadata: bool,
) -> Result<FederatedSearchResult, Retry> {
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
@@ -143,6 +159,8 @@ async fn try_proxy_search(
request
};
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
let request =
if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request };
let response = request.send().await;
let response = match response {

View File

@@ -16,6 +16,9 @@ use meilisearch_types::milli::order_by_map::OrderByMap;
use meilisearch_types::milli::OrderBy;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use uuid::Uuid;
use crate::search::SearchMetadata;
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
use crate::milli::vector::Embedding;
@@ -131,6 +134,10 @@ pub struct FederatedSearchResult {
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
#[serde(default, skip_serializing_if = "FederatedFacets::is_empty")]
pub facets_by_index: FederatedFacets,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub request_uid: Option<Uuid>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub metadata: Option<Vec<SearchMetadata>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub remote_errors: Option<BTreeMap<String, ResponseError>>,
@@ -156,6 +163,8 @@ impl fmt::Debug for FederatedSearchResult {
facet_stats,
facets_by_index,
remote_errors,
request_uid,
metadata,
} = self;
let mut debug = f.debug_struct("SearchResult");
@@ -188,6 +197,12 @@ impl fmt::Debug for FederatedSearchResult {
if let Some(remote_errors) = remote_errors {
debug.field("remote_errors", &remote_errors);
}
if let Some(request_uid) = request_uid {
debug.field("request_uid", &request_uid);
}
if let Some(metadata) = metadata {
debug.field("metadata", &metadata);
}
debug.finish()
}

View File

@@ -36,6 +36,7 @@ use serde_json::{json, Value};
#[cfg(test)]
mod mod_test;
use utoipa::ToSchema;
use uuid::Uuid;
use crate::error::MeilisearchHttpError;
@@ -56,6 +57,14 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
#[derive(Clone, Default, PartialEq, Deserr, ToSchema, Debug)]
#[deserr(error = DeserrJsonError<InvalidSearchPersonalize>, rename_all = camelCase, deny_unknown_fields)]
pub struct Personalize {
#[deserr(error = DeserrJsonError<InvalidSearchPersonalizeUserContext>)]
pub user_context: String,
}
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
@@ -120,6 +129,8 @@ pub struct SearchQuery {
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
pub personalize: Option<Personalize>,
}
impl From<SearchParameters> for SearchQuery {
@@ -167,6 +178,7 @@ impl From<SearchParameters> for SearchQuery {
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
crop_marker: DEFAULT_CROP_MARKER(),
locales: None,
personalize: None,
}
}
}
@@ -248,6 +260,7 @@ impl fmt::Debug for SearchQuery {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
} = self;
let mut debug = f.debug_struct("SearchQuery");
@@ -336,6 +349,10 @@ impl fmt::Debug for SearchQuery {
debug.field("locales", &locales);
}
if let Some(personalize) = personalize {
debug.field("personalize", &personalize);
}
debug.finish()
}
}
@@ -541,6 +558,9 @@ pub struct SearchQueryWithIndex {
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
#[serde(skip)]
pub personalize: Option<Personalize>,
#[deserr(default)]
pub federation_options: Option<FederationOptions>,
@@ -565,6 +585,10 @@ impl SearchQueryWithIndex {
self.facets.as_deref().filter(|v| !v.is_empty())
}
pub fn has_personalize(&self) -> bool {
self.personalize.is_some()
}
pub fn from_index_query_federation(
index_uid: IndexUid,
query: SearchQuery,
@@ -598,6 +622,7 @@ impl SearchQueryWithIndex {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
} = query;
SearchQueryWithIndex {
@@ -629,6 +654,7 @@ impl SearchQueryWithIndex {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
federation_options,
}
}
@@ -664,6 +690,7 @@ impl SearchQueryWithIndex {
hybrid,
ranking_score_threshold,
locales,
personalize,
} = self;
(
index_uid,
@@ -695,6 +722,7 @@ impl SearchQueryWithIndex {
hybrid,
ranking_score_threshold,
locales,
personalize,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
@@ -835,6 +863,18 @@ pub struct SearchHit {
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct SearchMetadata {
pub query_uid: Uuid,
pub index_uid: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub remote: Option<String>,
}
#[derive(Serialize, Clone, PartialEq, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
@@ -851,6 +891,10 @@ pub struct SearchResult {
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub request_uid: Option<Uuid>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<SearchMetadata>,
#[serde(skip_serializing_if = "Option::is_none")]
pub semantic_hit_count: Option<u32>,
@@ -872,6 +916,8 @@ impl fmt::Debug for SearchResult {
hits_info,
facet_distribution,
facet_stats,
request_uid,
metadata,
semantic_hit_count,
degraded,
used_negative_operator,
@@ -901,6 +947,12 @@ impl fmt::Debug for SearchResult {
if let Some(semantic_hit_count) = semantic_hit_count {
debug.field("semantic_hit_count", &semantic_hit_count);
}
if let Some(request_uid) = request_uid {
debug.field("request_uid", &request_uid);
}
if let Some(metadata) = metadata {
debug.field("metadata", &metadata);
}
debug.finish()
}
@@ -1113,15 +1165,31 @@ pub fn prepare_search<'t>(
Ok((search, is_finite_pagination, max_total_hits, offset))
}
pub struct SearchParams {
pub index_uid: String,
pub query: SearchQuery,
pub search_kind: SearchKind,
pub retrieve_vectors: RetrieveVectors,
pub features: RoFeatures,
pub request_uid: Uuid,
pub include_metadata: bool,
}
pub fn perform_search(
index_uid: String,
params: SearchParams,
index: &Index,
query: SearchQuery,
search_kind: SearchKind,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
) -> Result<SearchResult, ResponseError> {
) -> Result<(SearchResult, TimeBudget), ResponseError> {
let SearchParams {
index_uid,
query,
search_kind,
retrieve_vectors,
features,
request_uid,
include_metadata,
} = params;
let before_search = Instant::now();
let index_uid_for_metadata = index_uid.clone();
let rtxn = index.read_txn()?;
let time_budget = match index.search_cutoff(&rtxn)? {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
@@ -1129,7 +1197,7 @@ pub fn perform_search(
};
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, &search_kind, time_budget, features)?;
prepare_search(index, &rtxn, &query, &search_kind, time_budget.clone(), features)?;
let (
milli::SearchResult {
@@ -1142,7 +1210,20 @@ pub fn perform_search(
query_vector,
},
semantic_hit_count,
) = search_from_kind(index_uid, search_kind, search)?;
) = search_from_kind(index_uid.clone(), search_kind, search)?;
let metadata = if include_metadata {
let query_uid = Uuid::now_v7();
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
Some(SearchMetadata {
query_uid,
index_uid: index_uid_for_metadata,
primary_key,
remote: None, // Local searches don't have a remote
})
} else {
None
};
let SearchQuery {
q,
@@ -1174,6 +1255,7 @@ pub fn perform_search(
attributes_to_search_on: _,
filter: _,
distinct: _,
personalize: _,
} = query;
let format = AttributesFormat {
@@ -1225,7 +1307,6 @@ pub fn perform_search(
.transpose()?
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
.unzip();
let result = SearchResult {
hits: documents,
hits_info,
@@ -1237,8 +1318,10 @@ pub fn perform_search(
degraded,
used_negative_operator,
semantic_hit_count,
request_uid: Some(request_uid),
metadata,
};
Ok(result)
Ok((result, time_budget))
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
@@ -2080,7 +2163,7 @@ pub(crate) fn parse_filter(
facets: &Value,
filter_parsing_error_code: Code,
features: RoFeatures,
) -> Result<Option<Filter>, ResponseError> {
) -> Result<Option<Filter<'_>>, ResponseError> {
let filter = match facets {
Value::String(expr) => Filter::from_str(expr).map_err(|e| e.into()),
Value::Array(arr) => parse_filter_array(arr).map_err(|e| e.into()),
@@ -2117,7 +2200,7 @@ pub(crate) fn parse_filter(
Ok(filter)
}
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {
fn parse_filter_array(arr: &'_ [Value]) -> Result<Option<Filter<'_>>, MeilisearchHttpError> {
let mut ands = Vec::new();
for value in arr {
match value {

View File

@@ -13,9 +13,9 @@
//! What is going to happen at this point is that you're going to send a oneshot::Sender over an async mpsc channel.
//! Then, the queue/scheduler is going to either:
//! - Drop your oneshot channel => that means there are too many searches going on, and yours won't be executed.
//! You should exit and free all the RAM you use ASAP.
//! You should exit and free all the RAM you use ASAP.
//! - Sends you a Permit => that will unlock the method, and you will be able to process your search.
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
use std::num::NonZeroUsize;
use std::sync::atomic::{AtomicUsize, Ordering};

Some files were not shown because too many files have changed in this diff Show More